Init gcc-9.3.0

This commit is contained in:
eastb233 2020-05-14 10:48:46 +08:00
parent b89da782cf
commit c491376c0c
43 changed files with 5055 additions and 8992 deletions

View File

@ -1,126 +0,0 @@
From 900ccfa89dda3ab5f7e44a0dd4d1e9d108b5dc8b Mon Sep 17 00:00:00 2001
From: rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Tue, 26 Mar 2019 13:18:23 +0000
Subject: [PATCH] 2019-02-26 Richard Biener <rguenther@suse.de>
Backport from mainline
2019-02-12 Richard Biener <rguenther@suse.de>
PR tree-optimization/89253
* tree-ssa-loop-split.c (tree_ssa_split_loops): Check we can
duplicate the loop.
* gfortran.dg/pr89253.f: New testcase.
2019-02-08 Richard Biener <rguenther@suse.de>
PR middle-end/89223
* tree-data-ref.c (initialize_matrix_A): Fail if constant
doesn't fit in HWI.
(analyze_subscript_affine_affine): Handle failure from
initialize_matrix_A.
* gcc.dg/torture/pr89223.c: New testcase.
2019-01-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/88739
* tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid generating
BIT_FIELD_REFs of non-mode-precision integral operands.
* gcc.c-torture/execute/pr88739.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-7-branch@269942 138bc75d-0d04-0410-961f-82ee72b054a4
---
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 2480f4e..a349e3e 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -2118,6 +2118,8 @@ initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
switch (TREE_CODE (chrec))
{
case POLYNOMIAL_CHREC:
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+ return chrec_dont_know;
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
@@ -2499,7 +2501,7 @@ analyze_subscript_affine_affine (tree chrec_a,
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
lambda_matrix A, U, S;
struct obstack scratch_obstack;
@@ -2536,9 +2538,20 @@ analyze_subscript_affine_affine (tree chrec_a,
A = lambda_matrix_new (dim, 1, &scratch_obstack);
S = lambda_matrix_new (dim, 1, &scratch_obstack);
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
- gamma = init_b - init_a;
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+ if (init_a == chrec_dont_know
+ || init_b == chrec_dont_know)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: "
+ "representation issue.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
+ goto end_analyze_subs_aa;
+ }
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
/* Don't do all the hard work of solving the Diophantine equation
when we already know the solution: for example,
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index fd97213..3992597 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -649,7 +649,8 @@ tree_ssa_split_loops (void)
false, true)
&& niter.cmp != ERROR_MARK
/* We can't yet handle loops controlled by a != predicate. */
- && niter.cmp != NE_EXPR)
+ && niter.cmp != NE_EXPR
+ && can_duplicate_loop_p (loop))
{
if (split_loop (loop, &niter))
{
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index c93f1f2..a2e3ce2 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -2029,6 +2029,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
&offset2, &size2, &maxsize2,
&reverse);
+ tree def_rhs = gimple_assign_rhs1 (def_stmt);
if (!reverse
&& maxsize2 != -1
&& maxsize2 == size2
@@ -2041,11 +2042,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
according to endianness. */
&& (! INTEGRAL_TYPE_P (vr->type)
|| ref->size == TYPE_PRECISION (vr->type))
- && ref->size % BITS_PER_UNIT == 0)
+ && ref->size % BITS_PER_UNIT == 0
+ && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
+ || (TYPE_PRECISION (TREE_TYPE (def_rhs))
+ == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (def_rhs))))))
{
code_helper rcode = BIT_FIELD_REF;
tree ops[3];
- ops[0] = SSA_VAL (gimple_assign_rhs1 (def_stmt));
+ ops[0] = SSA_VAL (def_rhs);
ops[1] = bitsize_int (ref->size);
ops[2] = bitsize_int (offset - offset2);
tree val = vn_nary_build_or_lookup (rcode, vr->type, ops);
--
2.9.3

View File

@ -1,655 +0,0 @@
diff -urpN a/gcc/cfgexpand.c b/gcc/cfgexpand.c
--- a/gcc/cfgexpand.c 2019-05-30 16:58:45.350508770 +0800
+++ b/gcc/cfgexpand.c 2019-05-30 11:53:13.315156625 +0800
@@ -6094,6 +6094,23 @@ stack_protect_prologue (void)
rtx x, y;
x = expand_normal (crtl->stack_protect_guard);
+
+ if (targetm.have_stack_protect_combined_set () && guard_decl)
+ {
+ gcc_assert (DECL_P (guard_decl));
+ y = DECL_RTL (guard_decl);
+
+ /* Allow the target to compute address of Y and copy it to X without
+ leaking Y into a register. This combined address + copy pattern
+ allows the target to prevent spilling of any intermediate results by
+ splitting it after register allocator. */
+ if (rtx_insn *insn = targetm.gen_stack_protect_combined_set (x, y))
+ {
+ emit_insn (insn);
+ return;
+ }
+ }
+
if (guard_decl)
y = expand_normal (guard_decl);
else
diff -urpN a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
--- a/gcc/config/arm/arm.c 2019-05-30 16:58:45.354508770 +0800
+++ b/gcc/config/arm/arm.c 2019-05-30 16:59:05.058508073 +0800
@@ -7236,21 +7236,34 @@ legitimate_pic_operand_p (rtx x)
return 1;
}
-/* Record that the current function needs a PIC register. Initialize
- cfun->machine->pic_reg if we have not already done so. */
+/* Record that the current function needs a PIC register. If PIC_REG is null,
+ a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
+ both case cfun->machine->pic_reg is initialized if we have not already done
+ so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
+ PIC register is reloaded in the current position of the instruction stream
+ irregardless of whether it was loaded before. Otherwise, it is only loaded
+ if not already done so (crtl->uses_pic_offset_table is null). Note that
+ nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
+ is only supported iff COMPUTE_NOW is false. */
static void
-require_pic_register (void)
+require_pic_register (rtx pic_reg, bool compute_now)
{
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
/* A lot of the logic here is made obscure by the fact that this
routine gets called as part of the rtx cost estimation process.
We don't want those calls to affect any assumptions about the real
function; and further, we can't call entry_of_function() until we
start the real expansion process. */
- if (!crtl->uses_pic_offset_table)
+ if (!crtl->uses_pic_offset_table || compute_now)
{
- gcc_assert (can_create_pseudo_p ());
+ gcc_assert (can_create_pseudo_p ()
+ || (pic_reg != NULL_RTX
+ && REG_P (pic_reg)
+ && GET_MODE (pic_reg) == Pmode));
if (arm_pic_register != INVALID_REGNUM
+ && !compute_now
&& !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
{
if (!cfun->machine->pic_reg)
@@ -7266,8 +7279,19 @@ require_pic_register (void)
{
rtx_insn *seq, *insn;
- if (!cfun->machine->pic_reg)
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+ if (pic_reg == NULL_RTX && cfun->machine->pic_reg == NULL_RTX)
+ {
+ pic_reg = gen_reg_rtx (Pmode);
+ cfun->machine->pic_reg = pic_reg;
+ }
+ else if (pic_reg == NULL_RTX)
+ {
+ pic_reg = cfun->machine->pic_reg;
+ }
+ else if (cfun->machine->pic_reg == NULL_RTX)
+ {
+ cfun->machine->pic_reg = pic_reg;
+ }
/* Play games to avoid marking the function as needing pic
if we are being called as part of the cost-estimation
@@ -7278,11 +7306,12 @@ require_pic_register (void)
start_sequence ();
if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
- && arm_pic_register > LAST_LO_REGNUM)
+ && arm_pic_register > LAST_LO_REGNUM
+ && !compute_now)
emit_move_insn (cfun->machine->pic_reg,
gen_rtx_REG (Pmode, arm_pic_register));
else
- arm_load_pic_register (0UL);
+ arm_load_pic_register (0UL, pic_reg);
seq = get_insns ();
end_sequence ();
@@ -7295,16 +7324,33 @@ require_pic_register (void)
we can't yet emit instructions directly in the final
insn stream. Queue the insns on the entry edge, they will
be committed after everything else is expanded. */
- insert_insn_on_edge (seq,
- single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+ if (currently_expanding_to_rtl)
+ insert_insn_on_edge (seq,
+ single_succ_edge
+ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+ else
+ emit_insn (seq);
}
}
}
}
+/* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
+ created to hold the result of the load. If not NULL, PIC_REG indicates
+ which register to use as PIC register, otherwise it is decided by register
+ allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
+ location in the instruction stream, irregardless of whether it was loaded
+ previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
+ true and null PIC_REG is only supported iff COMPUTE_NOW is false.
+
+ Returns the register REG into which the PIC load is performed. */
+
rtx
-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
+ bool compute_now)
{
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
if (GET_CODE (orig) == SYMBOL_REF
|| GET_CODE (orig) == LABEL_REF)
{
@@ -7337,9 +7383,12 @@ legitimize_pic_address (rtx orig, machin
rtx mem;
/* If this function doesn't have a pic register, create one now. */
- require_pic_register ();
+ require_pic_register (pic_reg, compute_now);
+
+ if (pic_reg == NULL_RTX)
+ pic_reg = cfun->machine->pic_reg;
- pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
+ pat = gen_calculate_pic_address (reg, pic_reg, orig);
/* Make the MEM as close to a constant as possible. */
mem = SET_SRC (pat);
@@ -7388,9 +7437,11 @@ legitimize_pic_address (rtx orig, machin
gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
- base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
+ pic_reg, compute_now);
offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
- base == reg ? 0 : reg);
+ base == reg ? 0 : reg, pic_reg,
+ compute_now);
if (CONST_INT_P (offset))
{
@@ -7490,16 +7541,17 @@ static GTY(()) int pic_labelno;
low register. */
void
-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
{
- rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+ rtx l1, labelno, pic_tmp, pic_rtx;
if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
return;
gcc_assert (flag_pic);
- pic_reg = cfun->machine->pic_reg;
+ if (pic_reg == NULL_RTX)
+ pic_reg = cfun->machine->pic_reg;
if (TARGET_VXWORKS_RTP)
{
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
@@ -8558,7 +8610,8 @@ arm_legitimize_address (rtx x, rtx orig_
{
/* We need to find and carefully transform any SYMBOL and LABEL
references; so go back to the original address expression. */
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+ false /*compute_now*/);
if (new_x != orig_x)
x = new_x;
@@ -8626,7 +8679,8 @@ thumb_legitimize_address (rtx x, rtx ori
{
/* We need to find and carefully transform any SYMBOL and LABEL
references; so go back to the original address expression. */
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+ false /*compute_now*/);
if (new_x != orig_x)
x = new_x;
@@ -17800,7 +17854,7 @@ arm_emit_call_insn (rtx pat, rtx addr, b
? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
: !SYMBOL_REF_LOCAL_P (addr)))
{
- require_pic_register ();
+ require_pic_register (NULL_RTX, false /*compute_now*/);
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
}
@@ -21706,7 +21760,7 @@ arm_expand_prologue (void)
mask &= THUMB2_WORK_REGS;
if (!IS_NESTED (func_type))
mask |= (1 << IP_REGNUM);
- arm_load_pic_register (mask);
+ arm_load_pic_register (mask, NULL_RTX);
}
/* If we are profiling, make sure no instructions are scheduled before
@@ -24909,7 +24963,7 @@ thumb1_expand_prologue (void)
/* Load the pic register before setting the frame pointer,
so we can use r7 as a temporary work register. */
if (flag_pic && arm_pic_register != INVALID_REGNUM)
- arm_load_pic_register (live_regs_mask);
+ arm_load_pic_register (live_regs_mask, NULL_RTX);
if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
diff -urpN a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
--- a/gcc/config/arm/arm.md 2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/arm.md 2019-05-30 11:52:58.491157149 +0800
@@ -6051,7 +6051,8 @@
operands[1] = legitimize_pic_address (operands[1], SImode,
(!can_create_pseudo_p ()
? operands[0]
- : 0));
+ : NULL_RTX), NULL_RTX,
+ false /*compute_now*/);
}
"
)
@@ -6340,7 +6341,7 @@
/* r3 is clobbered by set/longjmp, so we can use it as a scratch
register. */
if (arm_pic_register != INVALID_REGNUM)
- arm_load_pic_register (1UL << 3);
+ arm_load_pic_register (1UL << 3, NULL_RTX);
DONE;
}")
@@ -8666,6 +8667,164 @@
(set_attr "conds" "clob")]
)
+;; Named patterns for stack smashing protection.
+(define_expand "stack_protect_combined_set"
+ [(parallel
+ [(set (match_operand:SI 0 "memory_operand" "")
+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
+ UNSPEC_SP_SET))
+ (clobber (match_scratch:SI 2 ""))
+ (clobber (match_scratch:SI 3 ""))])]
+ ""
+ ""
+)
+
+;; Use a separate insn from the above expand to be able to have the mem outside
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
+;; try to reload the guard since we need to control how PIC access is done in
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
+;; legitimize_pic_address ()).
+(define_insn_and_split "*stack_protect_combined_set_insn"
+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
+ UNSPEC_SP_SET))
+ (clobber (match_scratch:SI 2 "=&l,&r"))
+ (clobber (match_scratch:SI 3 "=&l,&r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:SI [(mem:SI (match_dup 2))]
+ UNSPEC_SP_SET))
+ (clobber (match_dup 2))])]
+ "
+{
+ if (flag_pic)
+ {
+ /* Forces recomputing of GOT base now. */
+ legitimize_pic_address (operands[1], SImode, operands[2], operands[3],
+ true /*compute_now*/);
+ }
+ else
+ {
+ if (address_operand (operands[1], SImode))
+ operands[2] = operands[1];
+ else
+ {
+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+ emit_move_insn (operands[2], mem);
+ }
+ }
+}"
+ [(set_attr "arch" "t1,32")]
+)
+
+(define_insn "*stack_protect_set_insn"
+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
+ (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))]
+ UNSPEC_SP_SET))
+ (clobber (match_dup 1))]
+ ""
+ "@
+ ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0
+ ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0"
+ [(set_attr "length" "8,12")
+ (set_attr "conds" "clob,nocond")
+ (set_attr "type" "multiple")
+ (set_attr "arch" "t1,32")]
+)
+
+(define_expand "stack_protect_combined_test"
+ [(parallel
+ [(set (pc)
+ (if_then_else
+ (eq (match_operand:SI 0 "memory_operand" "")
+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
+ UNSPEC_SP_TEST))
+ (label_ref (match_operand 2))
+ (pc)))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (reg:CC CC_REGNUM))])]
+ ""
+ ""
+)
+
+;; Use a separate insn from the above expand to be able to have the mem outside
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
+;; try to reload the guard since we need to control how PIC access is done in
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
+;; legitimize_pic_address ()).
+(define_insn_and_split "*stack_protect_combined_test_insn"
+ [(set (pc)
+ (if_then_else
+ (eq (match_operand:SI 0 "memory_operand" "m,m")
+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
+ UNSPEC_SP_TEST))
+ (label_ref (match_operand 2))
+ (pc)))
+ (clobber (match_scratch:SI 3 "=&l,&r"))
+ (clobber (match_scratch:SI 4 "=&l,&r"))
+ (clobber (reg:CC CC_REGNUM))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ rtx eq;
+
+ if (flag_pic)
+ {
+ /* Forces recomputing of GOT base now. */
+ legitimize_pic_address (operands[1], SImode, operands[3], operands[4],
+ true /*compute_now*/);
+ }
+ else
+ {
+ if (address_operand (operands[1], SImode))
+ operands[3] = operands[1];
+ else
+ {
+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+ emit_move_insn (operands[3], mem);
+ }
+ }
+ if (TARGET_32BIT)
+ {
+ emit_insn (gen_arm_stack_protect_test_insn (operands[4], operands[0],
+ operands[3]));
+ rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ eq = gen_rtx_EQ (CC_Zmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_arm_cond_branch (operands[2], eq, cc_reg));
+ }
+ else
+ {
+ emit_insn (gen_thumb1_stack_protect_test_insn (operands[4], operands[0],
+ operands[3]));
+ eq = gen_rtx_EQ (VOIDmode, operands[4], const0_rtx);
+ emit_jump_insn (gen_cbranchsi4 (eq, operands[4], const0_rtx,
+ operands[2]));
+ }
+ DONE;
+}
+ [(set_attr "arch" "t1,32")]
+)
+
+(define_insn "arm_stack_protect_test_insn"
+ [(set (reg:CC_Z CC_REGNUM)
+ (compare:CC_Z (unspec:SI [(match_operand:SI 1 "memory_operand" "m,m")
+ (mem:SI (match_operand:SI 2 "register_operand" "+l,r"))]
+ UNSPEC_SP_TEST)
+ (const_int 0)))
+ (clobber (match_operand:SI 0 "register_operand" "=&l,&r"))
+ (clobber (match_dup 2))]
+ "TARGET_32BIT"
+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
+ [(set_attr "length" "8,12")
+ (set_attr "conds" "set")
+ (set_attr "type" "multiple")
+ (set_attr "arch" "t,32")]
+)
+
(define_expand "casesi"
[(match_operand:SI 0 "s_register_operand" "") ; index to jump on
(match_operand:SI 1 "const_int_operand" "") ; lower bound
diff -urpN a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
--- a/gcc/config/arm/arm-protos.h 2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/arm-protos.h 2019-05-30 11:52:58.491157149 +0800
@@ -28,7 +28,7 @@ extern enum unwind_info_type arm_except_
extern int use_return_insn (int, rtx);
extern bool use_simple_return_p (void);
extern enum reg_class arm_regno_class (int);
-extern void arm_load_pic_register (unsigned long);
+extern void arm_load_pic_register (unsigned long, rtx);
extern int arm_volatile_func (void);
extern void arm_expand_prologue (void);
extern void arm_expand_epilogue (bool);
@@ -69,7 +69,7 @@ extern int const_ok_for_dimode_op (HOST_
extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
extern int legitimate_pic_operand_p (rtx);
-extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
+extern rtx legitimize_pic_address (rtx, machine_mode, rtx, rtx, bool);
extern rtx legitimize_tls_address (rtx, rtx);
extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
diff -urpN a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
--- a/gcc/config/arm/predicates.md 2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/predicates.md 2019-05-30 11:52:58.491157149 +0800
@@ -31,6 +31,23 @@
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
})
+; Predicate for stack protector guard's address in
+; stack_protect_combined_set_insn and stack_protect_combined_test_insn patterns
+(define_predicate "guard_addr_operand"
+ (match_test "true")
+{
+ return (CONSTANT_ADDRESS_P (op)
+ || !targetm.cannot_force_const_mem (mode, op));
+})
+
+; Predicate for stack protector guard in stack_protect_combined_set and
+; stack_protect_combined_test patterns
+(define_predicate "guard_operand"
+ (match_code "mem")
+{
+ return guard_addr_operand (XEXP (op, 0), mode);
+})
+
(define_predicate "imm_for_neon_inv_logic_operand"
(match_code "const_vector")
{
diff -urpN a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
--- a/gcc/config/arm/thumb1.md 2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/thumb1.md 2019-05-30 11:52:58.491157149 +0800
@@ -1964,4 +1964,17 @@
}"
[(set_attr "type" "mov_reg")]
)
+
+(define_insn "thumb1_stack_protect_test_insn"
+ [(set (match_operand:SI 0 "register_operand" "=&l")
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+ (mem:SI (match_operand:SI 2 "register_operand" "+l"))]
+ UNSPEC_SP_TEST))
+ (clobber (match_dup 2))]
+ "TARGET_THUMB1"
+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
+ [(set_attr "length" "8")
+ (set_attr "conds" "set")
+ (set_attr "type" "multiple")]
+)
diff -urpN a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
--- a/gcc/config/arm/unspecs.md 2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/unspecs.md 2019-05-30 11:52:58.491157149 +0800
@@ -86,6 +86,9 @@
UNSPEC_PROBE_STACK ; Probe stack memory reference
UNSPEC_NONSECURE_MEM ; Represent non-secure memory in ARMv8-M with
; security extension
+ UNSPEC_SP_SET ; Represent the setting of stack protector's canary
+ UNSPEC_SP_TEST ; Represent the testing of stack protector's canary
+ ; against the guard.
])
(define_c_enum "unspec" [
diff -urpN a/gcc/doc/md.texi b/gcc/doc/md.texi
--- a/gcc/doc/md.texi 2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/doc/md.texi 2019-05-30 11:52:58.491157149 +0800
@@ -6955,22 +6955,61 @@ builtins.
The get/set patterns have a single output/input operand respectively,
with @var{mode} intended to be @code{Pmode}.
+@cindex @code{stack_protect_combined_set} instruction pattern
+@item @samp{stack_protect_combined_set}
+This pattern, if defined, moves a @code{ptr_mode} value from an address
+whose declaration RTX is given in operand 1 to the memory in operand 0
+without leaving the value in a register afterward. If several
+instructions are needed by the target to perform the operation (eg. to
+load the address from a GOT entry then load the @code{ptr_mode} value
+and finally store it), it is the backend's responsibility to ensure no
+intermediate result gets spilled. This is to avoid leaking the value
+some place that an attacker might use to rewrite the stack guard slot
+after having clobbered it.
+
+If this pattern is not defined, then the address declaration is
+expanded first in the standard way and a @code{stack_protect_set}
+pattern is then generated to move the value from that address to the
+address in operand 0.
+
@cindex @code{stack_protect_set} instruction pattern
@item @samp{stack_protect_set}
-This pattern, if defined, moves a @code{ptr_mode} value from the memory
-in operand 1 to the memory in operand 0 without leaving the value in
-a register afterward. This is to avoid leaking the value some place
-that an attacker might use to rewrite the stack guard slot after
-having clobbered it.
+This pattern, if defined, moves a @code{ptr_mode} value from the valid
+memory location in operand 1 to the memory in operand 0 without leaving
+the value in a register afterward. This is to avoid leaking the value
+some place that an attacker might use to rewrite the stack guard slot
+after having clobbered it.
+
+Note: on targets where the addressing modes do not allow to load
+directly from stack guard address, the address is expanded in a standard
+way first which could cause some spills.
If this pattern is not defined, then a plain move pattern is generated.
+@cindex @code{stack_protect_combined_test} instruction pattern
+@item @samp{stack_protect_combined_test}
+This pattern, if defined, compares a @code{ptr_mode} value from an
+address whose declaration RTX is given in operand 1 with the memory in
+operand 0 without leaving the value in a register afterward and
+branches to operand 2 if the values were equal. If several
+instructions are needed by the target to perform the operation (eg. to
+load the address from a GOT entry then load the @code{ptr_mode} value
+and finally store it), it is the backend's responsibility to ensure no
+intermediate result gets spilled. This is to avoid leaking the value
+some place that an attacker might use to rewrite the stack guard slot
+after having clobbered it.
+
+If this pattern is not defined, then the address declaration is
+expanded first in the standard way and a @code{stack_protect_test}
+pattern is then generated to compare the value from that address to the
+value at the memory in operand 0.
+
@cindex @code{stack_protect_test} instruction pattern
@item @samp{stack_protect_test}
This pattern, if defined, compares a @code{ptr_mode} value from the
-memory in operand 1 with the memory in operand 0 without leaving the
-value in a register afterward and branches to operand 2 if the values
-were equal.
+valid memory location in operand 1 with the memory in operand 0 without
+leaving the value in a register afterward and branches to operand 2 if
+the values were equal.
If this pattern is not defined, then a plain compare pattern and
conditional branch pattern is used.
diff -urpN a/gcc/function.c b/gcc/function.c
--- a/gcc/function.c 2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/function.c 2019-05-30 11:53:14.071156599 +0800
@@ -5065,18 +5065,34 @@ stack_protect_epilogue (void)
tree guard_decl = targetm.stack_protect_guard ();
rtx_code_label *label = gen_label_rtx ();
rtx x, y;
- rtx_insn *seq;
+ rtx_insn *seq = NULL;
x = expand_normal (crtl->stack_protect_guard);
- if (guard_decl)
- y = expand_normal (guard_decl);
+
+ if (targetm.have_stack_protect_combined_test () && guard_decl)
+ {
+ gcc_assert (DECL_P (guard_decl));
+ y = DECL_RTL (guard_decl);
+ /* Allow the target to compute address of Y and compare it with X without
+ leaking Y into a register. This combined address + compare pattern
+ allows the target to prevent spilling of any intermediate results by
+ splitting it after register allocator. */
+ seq = targetm.gen_stack_protect_combined_test (x, y, label);
+ }
else
- y = const0_rtx;
+ {
+ if (guard_decl)
+ y = expand_normal (guard_decl);
+ else
+ y = const0_rtx;
+
+ /* Allow the target to compare Y with X without leaking either into
+ a register. */
+ if (targetm.have_stack_protect_test ())
+ seq = targetm.gen_stack_protect_test (x, y, label);
+ }
- /* Allow the target to compare Y with X without leaking either into
- a register. */
- if (targetm.have_stack_protect_test ()
- && ((seq = targetm.gen_stack_protect_test (x, y, label)) != NULL_RTX))
+ if (seq)
emit_insn (seq);
else
emit_cmp_and_jump_insns (x, y, EQ, NULL_RTX, ptr_mode, 1, label);
diff -urpN a/gcc/genpreds.c b/gcc/genpreds.c
--- a/gcc/genpreds.c 2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/genpreds.c 2019-05-30 11:53:14.163156595 +0800
@@ -1581,7 +1581,8 @@ write_insn_preds_c (void)
#include \"reload.h\"\n\
#include \"regs.h\"\n\
#include \"emit-rtl.h\"\n\
-#include \"tm-constrs.h\"\n");
+#include \"tm-constrs.h\"\n\
+#include \"target.h\"\n");
FOR_ALL_PREDICATES (p)
write_one_predicate_function (p);
diff -urpN a/gcc/target-insns.def b/gcc/target-insns.def
--- a/gcc/target-insns.def 2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/target-insns.def 2019-05-30 11:52:58.495157149 +0800
@@ -96,7 +96,9 @@ DEF_TARGET_INSN (sibcall_value, (rtx x0,
DEF_TARGET_INSN (simple_return, (void))
DEF_TARGET_INSN (split_stack_prologue, (void))
DEF_TARGET_INSN (split_stack_space_check, (rtx x0, rtx x1))
+DEF_TARGET_INSN (stack_protect_combined_set, (rtx x0, rtx x1))
DEF_TARGET_INSN (stack_protect_set, (rtx x0, rtx x1))
+DEF_TARGET_INSN (stack_protect_combined_test, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (stack_protect_test, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))

View File

@ -1,51 +0,0 @@
diff -urpN a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
--- a/gcc/config/rs6000/altivec.md 2018-01-15 01:47:30.483964000 +0800
+++ b/gcc/config/rs6000/altivec.md 2019-09-09 00:01:25.770835633 +0800
@@ -74,9 +74,6 @@
UNSPEC_VUNPACK_LO_SIGN_DIRECT
UNSPEC_VUPKHPX
UNSPEC_VUPKLPX
- UNSPEC_DARN
- UNSPEC_DARN_32
- UNSPEC_DARN_RAW
UNSPEC_DST
UNSPEC_DSTT
UNSPEC_DSTST
@@ -3770,21 +3767,21 @@
(define_insn "darn_32"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(const_int 0)] UNSPEC_DARN_32))]
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_DARN_32))]
"TARGET_P9_MISC"
"darn %0,0"
[(set_attr "type" "integer")])
(define_insn "darn_raw"
[(set (match_operand:DI 0 "register_operand" "=r")
- (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))]
+ (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN_RAW))]
"TARGET_P9_MISC && TARGET_64BIT"
"darn %0,2"
[(set_attr "type" "integer")])
(define_insn "darn"
[(set (match_operand:DI 0 "register_operand" "=r")
- (unspec:DI [(const_int 0)] UNSPEC_DARN))]
+ (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN))]
"TARGET_P9_MISC && TARGET_64BIT"
"darn %0,1"
[(set_attr "type" "integer")])
diff -urpN a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
--- a/gcc/config/rs6000/rs6000.md 2018-01-21 21:32:58.843504000 +0800
+++ b/gcc/config/rs6000/rs6000.md 2019-09-08 23:53:13.122859153 +0800
@@ -163,6 +163,9 @@
UNSPECV_EH_RR ; eh_reg_restore
UNSPECV_ISYNC ; isync instruction
UNSPECV_MFTB ; move from time base
+ UNSPECV_DARN ; darn 1 (deliver a random number)
+ UNSPECV_DARN_32 ; darn 2
+ UNSPECV_DARN_RAW ; darn 0
UNSPECV_NLGR ; non-local goto receiver
UNSPECV_MFFS ; Move from FPSCR
UNSPECV_MTFSF ; Move to FPSCR Fields

View File

@ -1,24 +0,0 @@
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2018-10-09 11:49:19.000000000 +0800
+++ b/gcc/config/aarch64/aarch64.c 2018-10-09 13:42:15.000000000 +0800
@@ -1619,7 +1619,7 @@ aarch64_load_symref_appropriately (rtx d
case SYMBOL_SMALL_TLSDESC:
{
machine_mode mode = GET_MODE (dest);
- rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
+ rtx x0 = gen_rtx_REG (ptr_mode, R0_REGNUM);
rtx tp;
gcc_assert (mode == Pmode || mode == ptr_mode);
@@ -1635,6 +1635,11 @@ aarch64_load_symref_appropriately (rtx d
if (mode != Pmode)
tp = gen_lowpart (mode, tp);
+ if (mode != ptr_mode)
+ {
+ x0 = force_reg (mode, gen_rtx_SIGN_EXTEND (mode, x0));
+ }
+
emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
return;

View File

@ -1,31 +0,0 @@
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
--- a/gcc/config/aarch64/aarch64.md 2018-10-09 11:30:50.000000000 +0800
+++ b/gcc/config/aarch64/aarch64.md 2018-10-09 11:52:54.000000000 +0800
@@ -857,6 +857,13 @@
: !REG_P (callee))
XEXP (operands[0], 0) = force_reg (Pmode, callee);
+ if (TARGET_ILP32
+ && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+ && GET_MODE (XEXP (operands[0], 0)) == SImode)
+ XEXP (operands[0], 0) = convert_memory_address (DImode,
+ XEXP (operands[0], 0));
+
+
if (operands[2] == NULL_RTX)
operands[2] = const0_rtx;
@@ -889,6 +896,13 @@
: !REG_P (callee))
XEXP (operands[1], 0) = force_reg (Pmode, callee);
+ if (TARGET_ILP32
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+ && GET_MODE (XEXP (operands[1], 0)) == SImode)
+ XEXP (operands[1], 0) = convert_memory_address (DImode,
+ XEXP (operands[1], 0));
+
+
if (operands[3] == NULL_RTX)
operands[3] = const0_rtx;

View File

@ -1,780 +0,0 @@
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2019-04-15 14:50:25.866378665 +0800
+++ b/gcc/config/aarch64/aarch64.c 2019-04-15 14:49:21.986376983 +0800
@@ -554,6 +554,31 @@ static const struct tune_params generic_
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
+static const struct tune_params tsv110_tunings =
+{
+ &cortexa57_extra_costs,
+ &generic_addrcost_table,
+ &generic_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 4, /* issue_rate */
+ AARCH64_FUSE_NOTHING, /* fusible_ops */
+ 16, /* function_align. */
+ 16, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+};
+
static const struct tune_params cortexa35_tunings =
{
&cortexa53_extra_costs,
diff -urpN a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
--- a/gcc/config/aarch64/aarch64-cores.def 2017-02-15 08:09:28.845771000 +0800
+++ b/gcc/config/aarch64/aarch64-cores.def 2019-04-15 14:49:21.986376983 +0800
@@ -78,6 +78,8 @@ AARCH64_CORE("xgene1", xgene1, x
AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, tsv110, 0x48, 0xd01, -1)
+
/* V8 big.LITTLE implementations. */
AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
--- a/gcc/config/aarch64/aarch64.md 2019-04-15 14:50:25.870378665 +0800
+++ b/gcc/config/aarch64/aarch64.md 2019-04-15 14:49:21.986376983 +0800
@@ -226,6 +226,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff -urpN a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
--- a/gcc/config/aarch64/aarch64-tune.md 2017-02-15 08:09:28.845771000 +0800
+++ b/gcc/config/aarch64/aarch64-tune.md 2019-04-15 14:49:21.986376983 +0800
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,tsv110,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
--- a/gcc/config/aarch64/tsv110.md 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/config/aarch64/tsv110.md 2019-04-15 14:55:30.420081420 +0800
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "tsv110_fsu1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "tsv110_fsu1")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "tsv110_fsu1")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_sqrts" 24
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "tsv110_fsu2")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha256_fast"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")

View File

@ -0,0 +1,57 @@
diff -Nurp a/gcc/testsuite/gcc.dg/pr94269.c b/gcc/testsuite/gcc.dg/pr94269.c
--- a/gcc/testsuite/gcc.dg/pr94269.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/pr94269.c 2020-04-17 17:04:50.608000000 +0800
@@ -0,0 +1,26 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256" } */
+
+float
+foo(long n, float *x, int inc_x,
+ float *y, int inc_y)
+{
+ float dot = 0.0;
+ int ix = 0, iy = 0;
+
+ if (n < 0) {
+ return dot;
+ }
+
+ int i = 0;
+ while (i < n) {
+ dot += y[iy] * x[ix];
+ ix += inc_x;
+ iy += inc_y;
+ i++;
+ }
+
+ return dot;
+}
+
+/* { dg-final { scan-assembler-not "smaddl" { target aarch64*-*-* } } } */
diff -Nurp a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
--- a/gcc/tree-ssa-math-opts.c 2020-04-17 16:43:59.540000000 +0800
+++ b/gcc/tree-ssa-math-opts.c 2020-04-17 16:48:34.072036000 +0800
@@ -2721,11 +2721,14 @@ convert_plusminus_to_widen (gimple_stmt_
multiply-and-accumulate instructions.
If the widened-multiplication result has more than one uses, it is
- probably wiser not to do the conversion. */
+ probably wiser not to do the conversion. Also restrict this operation
+ to single basic block to avoid moving the multiply to a different block
+ with a higher execution frequency. */
if (code == PLUS_EXPR
&& (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
{
if (!has_single_use (rhs1)
+ || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
|| !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
&type2, &mult_rhs2))
return false;
@@ -2735,6 +2738,7 @@ convert_plusminus_to_widen (gimple_stmt_
else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
{
if (!has_single_use (rhs2)
+ || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
|| !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
&type2, &mult_rhs2))
return false;

View File

@ -1,60 +0,0 @@
diff -urp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
--- a/gcc/config/arm/arm.c 2019-01-18 11:25:20.840179114 +0800
+++ b/gcc/config/arm/arm.c 2019-01-18 11:25:47.548179817 +0800
@@ -14306,18 +14306,36 @@ gen_movmem_ldrd_strd (rtx *operands)
emit_move_insn (reg0, src);
else
{
- emit_insn (gen_unaligned_loadsi (low_reg, src));
- src = next_consecutive_mem (src);
- emit_insn (gen_unaligned_loadsi (hi_reg, src));
+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
+ {
+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
+ src = next_consecutive_mem (src);
+ emit_insn (gen_unaligned_loadsi (low_reg, src));
+ }
+ else
+ {
+ emit_insn (gen_unaligned_loadsi (low_reg, src));
+ src = next_consecutive_mem (src);
+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
+ }
}
if (dst_aligned)
emit_move_insn (dst, reg0);
else
{
- emit_insn (gen_unaligned_storesi (dst, low_reg));
- dst = next_consecutive_mem (dst);
- emit_insn (gen_unaligned_storesi (dst, hi_reg));
+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
+ {
+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
+ dst = next_consecutive_mem (dst);
+ emit_insn (gen_unaligned_storesi (dst, low_reg));
+ }
+ else
+ {
+ emit_insn (gen_unaligned_storesi (dst, low_reg));
+ dst = next_consecutive_mem (dst);
+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
+ }
}
src = next_consecutive_mem (src);
diff -urp a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
--- a/gcc/config/arm/arm.opt 2019-01-18 11:25:20.840179114 +0800
+++ b/gcc/config/arm/arm.opt 2019-01-18 11:28:51.744184666 +0800
@@ -274,6 +274,10 @@ masm-syntax-unified
Target Report Var(inline_asm_unified) Init(0) Save
Assume unified syntax for inline assembly code.
+mlsrd-be-adjust
+Target Report Var(flag_lsrd_be_adjust) Init(1)
+Adjust ldrd/strd splitting order when it's big-endian.
+
mpure-code
Target Report Var(target_pure_code) Init(0)
Do not allow constant data to be placed in code sections.

View File

@ -1,19 +0,0 @@
diff -urpN gcc-7.3.0-bak/gcc/config/arm/arm.c gcc-7.3.0/gcc/config/arm/arm.c
--- gcc-7.3.0-bak/gcc/config/arm/arm.c 2018-11-13 14:23:21.362347728 +0800
+++ gcc-7.3.0/gcc/config/arm/arm.c 2018-11-13 14:31:15.722360215 +0800
@@ -26853,7 +26853,14 @@ static bool
arm_array_mode_supported_p (machine_mode mode,
unsigned HOST_WIDE_INT nelems)
{
- if (TARGET_NEON
+
+
+ /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
+ for now, as the lane-swapping logic needs to be extended in the expanders.
+ See PR target/82518. */
+
+
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
&& (nelems >= 2 && nelems <= 4))
return true;

View File

@ -1,25 +0,0 @@
diff -Nurp a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
--- a/gcc/config/arm/arm.md 2019-08-10 00:21:12.658523444 +0800
+++ b/gcc/config/arm/arm.md 2019-08-10 00:21:53.478521496 +0800
@@ -5337,7 +5337,9 @@
#
ldrh%?\\t%0, %1"
[(set_attr "type" "alu_shift_reg,load_byte")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "pool_range" "*,256")
+ (set_attr "neg_pool_range" "*,244")]
)
(define_insn "*arm_zero_extendhisi2_v6"
@@ -5348,7 +5350,9 @@
uxth%?\\t%0, %1
ldrh%?\\t%0, %1"
[(set_attr "predicable" "yes")
- (set_attr "type" "extend,load_byte")]
+ (set_attr "type" "extend,load_byte")
+ (set_attr "pool_range" "*,256")
+ (set_attr "neg_pool_range" "*,244")]
)
(define_insn "*arm_zero_extendhisi2addsi"

Binary file not shown.

View File

@ -1,21 +0,0 @@
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2018-11-16 18:02:11.000000000 +0800
+++ b/gcc/config/aarch64/aarch64.c 2018-11-16 18:07:39.000000000 +0800
@@ -6102,7 +6102,7 @@ aarch64_elf_asm_constructor (rtx symbol,
-Wformat-truncation false positive, use a larger size. */
char buf[23];
snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
- s = get_section (buf, SECTION_WRITE, NULL);
+ s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
switch_to_section (s);
assemble_align (POINTER_SIZE);
assemble_aligned_integer (POINTER_BYTES, symbol);
@@ -6122,7 +6122,7 @@ aarch64_elf_asm_destructor (rtx symbol,
-Wformat-truncation false positive, use a larger size. */
char buf[23];
snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
- s = get_section (buf, SECTION_WRITE, NULL);
+ s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
switch_to_section (s);
assemble_align (POINTER_SIZE);
assemble_aligned_integer (POINTER_BYTES, symbol);

View File

@ -0,0 +1,52 @@
diff -uprN a/gcc/testsuite/gcc.dg/pr91195.c b/gcc/testsuite/gcc.dg/pr91195.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr91195.c
@@ -0,0 +1,25 @@
+/* PR middle-end/91195 */
+/* { dg-do compile } */
+/* { dg-options "-Wmaybe-uninitialized -O2" } */
+
+int bar (char*);
+
+void
+foo (char *x, char *y)
+{
+ char *a[2];
+ int b = 0;
+
+ if (x)
+ a[b++] = x; /* { dg-bogus "may be used uninitialized in this function" } */
+ if (y)
+ a[b++] = y;
+
+ for (int j = 0; j < 4; j++)
+ switch (j)
+ {
+ case 0:
+ if (b == 0 || bar (a[0]))
+ break;
+ }
+}
diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -2269,6 +2269,10 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
new_stmt = gimple_build_assign (name, lhs);
gimple_set_location (new_stmt, locus);
+ lhs = unshare_expr (lhs);
+ /* Set TREE_NO_WARNING on the rhs of the load to avoid uninit
+ warnings. */
+ TREE_NO_WARNING (gimple_assign_rhs1 (new_stmt)) = 1;
gsi_insert_on_edge (e1, new_stmt);
/* 3) Create a PHI node at the join block, with one argument
@@ -2279,7 +2283,6 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
add_phi_arg (newphi, rhs, e0, locus);
add_phi_arg (newphi, name, e1, locus);
- lhs = unshare_expr (lhs);
new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
/* 4) Insert that PHI node. */

69
div-opti.patch Normal file
View File

@ -0,0 +1,69 @@
From dbf3dc75888623e9d4bb7cc5e9c30caa9b24ffe7 Mon Sep 17 00:00:00 2001
From: Bu Le <bule1@huawei.com>
Date: Thu, 12 Mar 2020 22:39:12 +0000
Subject: [PATCH] aarch64: Add --params to control the number of recip steps
[PR94154]
-mlow-precision-div hard-coded the number of iterations to 2 for double
and 1 for float. This patch adds a --param to control the number.
2020-03-13 Bu Le <bule1@huawei.com>
gcc/
PR target/94154
* config/aarch64/aarch64.opt (-param=aarch64-float-recp-precision=)
(-param=aarch64-double-recp-precision=): New options.
* doc/invoke.texi: Document them.
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Use them
instead of hard-coding the choice of 1 for float and 2 for double.
---
gcc/ChangeLog | 9 +++++++++
gcc/config/aarch64/aarch64.c | 8 +++++---
gcc/config/aarch64/aarch64.opt | 9 +++++++++
gcc/doc/invoke.texi | 11 +++++++++++
4 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c320d5ba51d..2c81f86dd2a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12911,10 +12911,12 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
/* Iterate over the series twice for SF and thrice for DF. */
int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
- /* Optionally iterate over the series once less for faster performance,
- while sacrificing the accuracy. */
+ /* Optionally iterate over the series less for faster performance,
+ while sacrificing the accuracy. The default is 2 for DF and 1 for SF. */
if (flag_mlow_precision_div)
- iterations--;
+ iterations = (GET_MODE_INNER (mode) == DFmode
+ ? PARAM_VALUE (PARAM_AARCH64_DOUBLE_RECP_PRECISION)
+ : PARAM_VALUE (PARAM_AARCH64_FLOAT_RECP_PRECISION));
/* Iterate over the series to calculate the approximate reciprocal. */
rtx xtmp = gen_reg_rtx (mode);
--- a/gcc/params.def 2020-04-15 17:24:31.984000000 +0800
+++ b/gcc/params.def 2020-04-15 16:59:21.752000000 +0800
@@ -1420,6 +1414,17 @@ DEFPARAM(PARAM_SSA_NAME_DEF_CHAIN_LIMIT,
"a value.",
512, 0, 0)
+DEFPARAM(PARAM_AARCH64_FLOAT_RECP_PRECISION,
+ "aarch64-float-recp-precision",
+ "The number of Newton iterations for calculating the reciprocal "
+ "for float type. ",
+ 1, 1, 5)
+
+DEFPARAM(PARAM_AARCH64_DOUBLE_RECP_PRECISION,
+ "aarch64-double-recp-precision",
+ "The number of Newton iterations for calculating the reciprocal "
+ "for double type.",
+ 2, 1, 5)
/*
Local variables:
--
2.18.2

View File

@ -0,0 +1,19 @@
diff --git a/gcc/combine.c b/gcc/combine.c
index 4de759a8e6b..ce7aeecb5c2 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5909,14 +5909,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest,
mode, VOIDmode,
cond, cop1),
mode);
- else
- return gen_rtx_IF_THEN_ELSE (mode,
- simplify_gen_relational (cond_code,
- mode,
- VOIDmode,
- cond,
- cop1),
- true_rtx, false_rtx);
code = GET_CODE (x);
op0_mode = VOIDmode;

View File

@ -0,0 +1,460 @@
diff -urpN a/libquadmath/Makefile.in b/libquadmath/Makefile.in
--- a/libquadmath/Makefile.in 2020-03-31 09:51:59.000000000 +0800
+++ b/libquadmath/Makefile.in 2020-04-06 11:52:45.650793256 +0800
@@ -90,7 +90,7 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
-@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
+#libquadmath_la_DEPENDENCIES =
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -146,68 +146,68 @@ am__installdirs = "$(DESTDIR)$(toolexecl
"$(DESTDIR)$(libsubincludedir)"
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
am__dirstamp = $(am__leading_dot)dirstamp
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \
-@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \
-@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo
+am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
+ math/acoshq.lo math/fmodq.lo \
+ math/acosq.lo math/frexpq.lo \
+ math/rem_pio2q.lo math/asinhq.lo \
+ math/hypotq.lo math/remainderq.lo \
+ math/asinq.lo math/rintq.lo \
+ math/atan2q.lo math/isinfq.lo \
+ math/roundq.lo math/atanhq.lo \
+ math/isnanq.lo math/scalblnq.lo \
+ math/atanq.lo math/j0q.lo \
+ math/scalbnq.lo math/cbrtq.lo \
+ math/j1q.lo math/signbitq.lo \
+ math/ceilq.lo math/jnq.lo \
+ math/sincos_table.lo math/complex.lo \
+ math/ldexpq.lo math/sincosq.lo \
+ math/copysignq.lo math/lgammaq.lo \
+ math/sincosq_kernel.lo math/coshq.lo \
+ math/llroundq.lo math/sinhq.lo \
+ math/cosq.lo math/log10q.lo \
+ math/sinq.lo math/cosq_kernel.lo \
+ math/log1pq.lo math/sinq_kernel.lo \
+ math/erfq.lo math/logq.lo \
+ math/sqrtq.lo math/expm1q.lo \
+ math/lroundq.lo math/tanhq.lo \
+ math/expq.lo math/modfq.lo \
+ math/tanq.lo math/fabsq.lo \
+ math/nanq.lo math/tgammaq.lo \
+ math/finiteq.lo math/nextafterq.lo \
+ math/truncq.lo math/floorq.lo \
+ math/powq.lo math/fmaq.lo \
+ math/logbq.lo math/exp2q.lo \
+ math/issignalingq.lo \
+ math/lgammaq_neg.lo \
+ math/lgammaq_product.lo \
+ math/tanq_kernel.lo \
+ math/tgammaq_product.lo \
+ math/casinhq_kernel.lo math/cacoshq.lo \
+ math/cacosq.lo math/casinhq.lo \
+ math/casinq.lo math/catanhq.lo \
+ math/catanq.lo math/cimagq.lo \
+ math/conjq.lo math/cprojq.lo \
+ math/crealq.lo math/fdimq.lo \
+ math/fmaxq.lo math/fminq.lo \
+ math/ilogbq.lo math/llrintq.lo \
+ math/log2q.lo math/lrintq.lo \
+ math/nearbyintq.lo math/remquoq.lo \
+ math/ccoshq.lo math/cexpq.lo \
+ math/clog10q.lo math/clogq.lo \
+ math/csinq.lo math/csinhq.lo \
+ math/csqrtq.lo math/ctanq.lo \
+ math/ctanhq.lo printf/addmul_1.lo \
+ printf/add_n.lo printf/cmp.lo \
+ printf/divrem.lo printf/flt1282mpn.lo \
+ printf/fpioconst.lo printf/lshift.lo \
+ printf/mul_1.lo printf/mul_n.lo \
+ printf/mul.lo printf/printf_fphex.lo \
+ printf/printf_fp.lo \
+ printf/quadmath-printf.lo \
+ printf/rshift.lo printf/submul_1.lo \
+ printf/sub_n.lo strtod/strtoflt128.lo \
+ strtod/mpn2flt128.lo \
+ strtod/tens_in_limb.lo
libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -217,8 +217,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
$(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \
$@
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \
-@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir)
+am_libquadmath_la_rpath = -rpath \
+ $(toolexeclibdir)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -336,7 +336,7 @@ CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
+DEFS = @DEFS@ -D__float128="long double"
DEPDIR = @DEPDIR@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
@@ -408,7 +408,7 @@ datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
-enable_shared = @enable_shared@
+enable_shared = yes
enable_static = @enable_static@
exec_prefix = @exec_prefix@
get_gcc_base_ver = @get_gcc_base_ver@
@@ -450,109 +450,109 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign info-in-builddir
-@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config
-@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include
-@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS)
-@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg =
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep =
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
-@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD =
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
-@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm
-
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
-@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
-@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \
-@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
-@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
-@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
-@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
+ACLOCAL_AMFLAGS = -I .. -I ../config
+AM_CPPFLAGS = -I $(top_srcdir)/../include
+AM_CFLAGS = $(XCFLAGS)
+gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
+@LIBQUAD_USE_SYMVER_FALSE@version_arg =
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
+@LIBQUAD_USE_SYMVER_FALSE@version_dep =
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
+toolexeclib_LTLIBRARIES = libquadmath.la
+libquadmath_la_LIBADD =
+libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
+ $(version_arg) $(lt_host_flags) -lm
+
+libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
+nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
+libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
+libquadmath_la_SOURCES = \
+ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
+ math/acosq.c math/frexpq.c \
+ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
+ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
+ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
+ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
+ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
+ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
+ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
+ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
+ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
+ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
+ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
+ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
+ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
+ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
+ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
+ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
+ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
+ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
+ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
+ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
+ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
+ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
+ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
+ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
+ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
# Work around what appears to be a GNU make bug handling MAKEFLAGS
# values defined in terms of make variables, as is the case for CC and
# friends when we are called from the top level Makefile.
-@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \
-@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
-@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
-@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \
-@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
-@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \
-@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \
-@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \
-@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \
-@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \
-@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \
-@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \
-@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \
-@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \
-@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
-@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \
-@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)"
+AM_MAKEFLAGS = \
+ "AR_FLAGS=$(AR_FLAGS)" \
+ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
+ "CFLAGS=$(CFLAGS)" \
+ "CXXFLAGS=$(CXXFLAGS)" \
+ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
+ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
+ "INSTALL=$(INSTALL)" \
+ "INSTALL_DATA=$(INSTALL_DATA)" \
+ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
+ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
+ "JC1FLAGS=$(JC1FLAGS)" \
+ "LDFLAGS=$(LDFLAGS)" \
+ "LIBCFLAGS=$(LIBCFLAGS)" \
+ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
+ "MAKE=$(MAKE)" \
+ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
+ "PICFLAG=$(PICFLAG)" \
+ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
+ "SHELL=$(SHELL)" \
+ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
+ "exec_prefix=$(exec_prefix)" \
+ "infodir=$(infodir)" \
+ "libdir=$(libdir)" \
+ "prefix=$(prefix)" \
+ "includedir=$(includedir)" \
+ "AR=$(AR)" \
+ "AS=$(AS)" \
+ "CC=$(CC)" \
+ "CXX=$(CXX)" \
+ "LD=$(LD)" \
+ "LIBCFLAGS=$(LIBCFLAGS)" \
+ "NM=$(NM)" \
+ "PICFLAG=$(PICFLAG)" \
+ "RANLIB=$(RANLIB)" \
+ "DESTDIR=$(DESTDIR)"
# Subdir rules rely on $(FLAGS_TO_PASS)
-@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS)
-@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES =
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC =
+FLAGS_TO_PASS = $(AM_MAKEFLAGS)
+MAKEOVERRIDES =
+@GENINSRC_FALSE@STAMP_GENINSRC =
# AM_CONDITIONAL on configure option --generated-files-in-srcdir
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
-@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
-@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO =
+@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
+ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
+@BUILD_INFO_FALSE@STAMP_BUILD_INFO =
# AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO])
-@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info
-@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
-@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
+@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info
+CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
+MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
# Automake Documentation:
# If your package has Texinfo files in many directories, you can use the
@@ -563,8 +563,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo
# Defines info, dvi, pdf and html targets
MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include
-@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS =
-@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi
+info_TEXINFOS =
+info_TEXINFOS = libquadmath.texi
libquadmath_TEXINFOS = libquadmath-vers.texi
MULTISRCTOP =
MULTIBUILDTOP =
@@ -1186,6 +1186,7 @@ distclean-tags:
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
check-am: all-am
check: check-am
+#all-local
all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \
all-local
installdirs:
@@ -1424,22 +1425,22 @@ uninstall-am: uninstall-dvi-am uninstall
.PRECIOUS: Makefile
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
-
-@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info
-@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
-
-@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
-@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
+
+stamp-geninsrc: libquadmath.info
+ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
+ @touch $@
+
+stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
+ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
+ @touch $@
all-local: $(ALL_LOCAL_DEPS)
diff -Nurp a/libquadmath/quadmath.h b/libquadmath/quadmath.h
--- a/libquadmath/quadmath.h 2020-03-31 09:51:59.000000000 +0800
+++ b/libquadmath/quadmath.h 2020-04-06 11:52:45.650793256 +0800
@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */
extern "C" {
#endif
+#ifdef AARCH64_QUADMATH
+typedef long double __float128;
+#endif
/* Define the complex type corresponding to __float128
("_Complex __float128" is not allowed) */
#if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
diff -Nurp a/libquadmath/quadmath.h b/libquadmath/quadmath.h
--- a/libquadmath/quadmath.h 2015-08-09 16:46:52.541904000 +0800
+++ b/libquadmath/quadmath.h 2019-08-17 18:25:51.923399149 +0800
@@ -154,10 +154,9 @@ extern int quadmath_snprintf (char *str,
#define FLT128_MAX_10_EXP 4932
-#define HUGE_VALQ __builtin_huge_valq()
/* The following alternative is valid, but brings the warning:
(floating constant exceeds range of __float128) */
-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
+ #define HUGE_VALQ (__extension__ 0x1.0p32767Q)
#define M_Eq 2.718281828459045235360287471352662498Q /* e */
#define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */

View File

@ -0,0 +1,32 @@
diff -uprN a/gcc/testsuite/gcc.dg/pr94574.c b/gcc/testsuite/gcc.dg/pr94574.c
--- a/gcc/testsuite/gcc.dg/pr94574.c 1970-01-01 00:00:00.000000000 +0000
+++ b/gcc/testsuite/gcc.dg/pr94574.c 2020-04-15 21:08:48.972000000 +0000
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int v4si __attribute__((vector_size(16)));
+typedef unsigned int v2si __attribute__((vector_size(8)));
+
+/* The aliasing is somewhat dubious here, but it must compile. */
+
+v2si
+foo (v4si v)
+{
+ v2si res;
+ *(v4si *) &res = v;
+ return res;
+}
diff -uprN a/gcc/tree-ssa.c b/gcc/tree-ssa.c
--- a/gcc/tree-ssa.c 2020-03-31 01:51:30.000000000 +0000
+++ b/gcc/tree-ssa.c 2020-04-15 21:26:09.828000000 +0000
@@ -1528,7 +1528,9 @@ non_rewritable_lvalue_p (tree lhs)
&& known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
mem_ref_offset (lhs))
&& multiple_of_p (sizetype, TREE_OPERAND (lhs, 1),
- TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
+ TYPE_SIZE_UNIT (TREE_TYPE (lhs)))
+ && known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))),
+ wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs)))))
return false;
}

View File

@ -0,0 +1,65 @@
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr94398.c b/gcc/testsuite/gcc.target/aarch64/pr94398.c
--- a/gcc/testsuite/gcc.target/aarch64/pr94398.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.target/aarch64/pr94398.c 2020-04-17 17:15:58.176000000 +0800
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -mstrict-align" } */
+
+float
+foo(long n, float *x, int inc_x,
+ float *y, int inc_y)
+{
+ float dot = 0.0;
+ int ix = 0, iy = 0;
+
+ if (n < 0) {
+ return dot;
+ }
+
+ int i = 0;
+ while (i < n) {
+ dot += y[iy] * x[ix];
+ ix += inc_x;
+ iy += inc_y;
+ i++;
+ }
+
+ return dot;
+}
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
--- a/gcc/tree-vect-stmts.c 2020-04-17 17:10:14.796000000 +0800
+++ b/gcc/tree-vect-stmts.c 2020-04-17 17:15:08.611850850 +0800
@@ -7025,8 +7025,14 @@ vectorizable_store (stmt_vec_info stmt_i
auto_vec<tree> dr_chain (group_size);
oprnds.create (group_size);
- alignment_support_scheme
- = vect_supportable_dr_alignment (first_dr_info, false);
+ /* Gather-scatter accesses perform only component accesses, alignment
+ is irrelevant for them. */
+ if (memory_access_type == VMAT_GATHER_SCATTER)
+ alignment_support_scheme = dr_unaligned_supported;
+ else
+ alignment_support_scheme
+ = vect_supportable_dr_alignment (first_dr_info, false);
+
gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
@@ -8162,8 +8168,14 @@ vectorizable_load (stmt_vec_info stmt_in
ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
}
- alignment_support_scheme
- = vect_supportable_dr_alignment (first_dr_info, false);
+ /* Gather-scatter accesses perform only component accesses, alignment
+ is irrelevant for them. */
+ if (memory_access_type == VMAT_GATHER_SCATTER)
+ alignment_support_scheme = dr_unaligned_supported;
+ else
+ alignment_support_scheme
+ = vect_supportable_dr_alignment (first_dr_info, false);
+
gcc_assert (alignment_support_scheme);
vec_loop_masks *loop_masks
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)

View File

@ -0,0 +1,81 @@
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b0cbb6e2d55..58d38f74bde 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,8 +2739,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
}
case SYMBOL_TINY_GOT:
- emit_insn (gen_ldr_got_tiny (dest, imm));
- return;
+ {
+ rtx insn;
+ machine_mode mode = GET_MODE (dest);
+
+ if (mode == ptr_mode)
+ insn = gen_ldr_got_tiny (mode, dest, imm);
+ else
+ {
+ gcc_assert (mode == Pmode);
+ insn = gen_ldr_got_tiny_sidi (dest, imm);
+ }
+
+ emit_insn (insn);
+ return;
+ }
case SYMBOL_TINY_TLSIE:
{
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7ad4e918578..c7c4d1dd519 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6766,13 +6766,23 @@
[(set_attr "type" "load_4")]
)
-(define_insn "ldr_got_tiny"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
- UNSPEC_GOTTINYPIC))]
+(define_insn "@ldr_got_tiny_<mode>"
+ [(set (match_operand:PTR 0 "register_operand" "=r")
+ (unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")]
+ UNSPEC_GOTTINYPIC))]
""
- "ldr\\t%0, %L1"
- [(set_attr "type" "load_8")]
+ "ldr\t%<w>0, %L1"
+ [(set_attr "type" "load_<ldst_sz>")]
+)
+
+(define_insn "ldr_got_tiny_sidi"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
+ UNSPEC_GOTTINYPIC)))]
+ "TARGET_ILP32"
+ "ldr\t%w0, %L1"
+ [(set_attr "type" "load_4")]
)
(define_insn "aarch64_load_tp_hard"
diff --git a/gcc/testsuite/gcc.target/aarch64/pr94201.c b/gcc/testsuite/gcc.target/aarch64/pr94201.c
new file mode 100644
index 00000000000..69176169186
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr94201.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmodel=tiny -mabi=ilp32 -fPIC" } */
+
+extern int bar (void *);
+extern long long a;
+
+int
+foo (void)
+{
+ a = 1;
+ return bar ((void *)bar);
+}
+

13
fix-cost-of-plus.patch Normal file
View File

@ -0,0 +1,13 @@
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 56a4a47db73..71d44de1d0a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10753,7 +10753,7 @@ cost_plus:
}
if (GET_MODE_CLASS (mode) == MODE_INT
- && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
+ && (aarch64_plus_immediate (op1, mode)
|| aarch64_sve_addvl_addpl_immediate (op1, mode)))
{
*cost += rtx_cost (op0, mode, PLUS, 0, speed);

View File

@ -1,155 +0,0 @@
diff -N -urp a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
--- a/gcc/config/i386/sse.md 2019-10-30 10:02:45.894920908 +0800
+++ b/gcc/config/i386/sse.md 2019-10-30 10:17:39.682887612 +0800
@@ -16012,9 +16012,11 @@
switch (INTVAL (operands[4]))
{
case 3:
- return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+ gas changed what it requires incompatibly. */
+ return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
case 2:
- return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
default:
gcc_unreachable ();
}
@@ -16057,9 +16059,11 @@
switch (INTVAL (operands[4]))
{
case 3:
- return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+ gas changed what it requires incompatibly. */
+ return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
case 2:
- return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
default:
gcc_unreachable ();
}
@@ -16103,10 +16107,12 @@
{
case 3:
case 7:
- return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+ gas changed what it requires incompatibly. */
+ return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
case 2:
case 6:
- return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+ return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
default:
gcc_unreachable ();
}
@@ -16150,10 +16156,12 @@
{
case 3:
case 7:
- return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+ gas changed what it requires incompatibly. */
+ return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
case 2:
case 6:
- return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
default:
gcc_unreachable ();
}
@@ -19153,12 +19161,6 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-;; Memory operand override for -masm=intel of the v*gatherq* patterns.
-(define_mode_attr gatherq_mode
- [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
- (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
- (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
-
(define_expand "<avx512>_gathersi<mode>"
[(parallel [(set (match_operand:VI48F 0 "register_operand")
(unspec:VI48F
@@ -19192,7 +19194,9 @@
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
"TARGET_AVX512F"
- "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -19211,7 +19215,9 @@
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
"TARGET_AVX512F"
- "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -19250,9 +19256,9 @@
UNSPEC_GATHER))
(clobber (match_scratch:QI 2 "=&Yk"))]
"TARGET_AVX512F"
-{
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
-}
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -19272,14 +19278,16 @@
(clobber (match_scratch:QI 1 "=&Yk"))]
"TARGET_AVX512F"
{
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+ gas changed what it requires incompatibly. */
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
{
if (<MODE_SIZE> != 64)
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
else
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
}
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
@@ -19316,7 +19324,9 @@
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
"TARGET_AVX512F"
- "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+ "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -19352,11 +19362,9 @@
UNSPEC_SCATTER))
(clobber (match_scratch:QI 1 "=&Yk"))]
"TARGET_AVX512F"
-{
- if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
- return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
- return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
-}
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+ "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])

View File

@ -0,0 +1,12 @@
diff -Nurp a/gcc/lra-assigns.c b/gcc/lra-assigns.c
--- a/gcc/lra-assigns.c 2020-04-17 16:27:46.192000000 +0800
+++ b/gcc/lra-assigns.c 2020-04-17 16:29:37.125688580 +0800
@@ -968,6 +968,8 @@ spill_for (int regno, bitmap spilled_pse
bitmap_clear (&spill_pseudos_bitmap);
for (j = hard_regno_nregs (hard_regno, mode) - 1; j >= 0; j--)
{
+ if (hard_regno + j >= FIRST_PSEUDO_REGISTER)
+ break;
if (try_hard_reg_pseudos_check[hard_regno + j] != curr_pseudo_check)
continue;
lra_assert (!bitmap_empty_p (&try_hard_reg_pseudos[hard_regno + j]));

File diff suppressed because it is too large Load Diff

View File

@ -1,905 +0,0 @@
diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in
--- a/gcc/Makefile.in 2018-11-07 11:37:24.615223860 +0800
+++ b/gcc/Makefile.in 2018-11-07 11:38:26.155223860 +0800
@@ -1292,6 +1292,7 @@ OBJS = \
gimple-iterator.o \
gimple-fold.o \
gimple-laddress.o \
+ gimple-loop-jam.o \
gimple-low.o \
gimple-pretty-print.o \
gimple-ssa-backprop.o \
diff -N -urp a/gcc/cfgloop.c b/gcc/cfgloop.c
--- a/gcc/cfgloop.c 2018-11-07 11:37:24.947223860 +0800
+++ b/gcc/cfgloop.c 2018-11-07 11:38:26.155223860 +0800
@@ -296,13 +296,25 @@ establish_preds (struct loop *loop, stru
/* Add LOOP to the loop hierarchy tree where FATHER is father of the
added loop. If LOOP has some children, take care of that their
- pred field will be initialized correctly. */
+ pred field will be initialized correctly. If AFTER is non-null
+ then it's expected it's a pointer into FATHERs inner sibling
+ list and LOOP is added behind AFTER, otherwise it's added in front
+ of FATHERs siblings. */
void
-flow_loop_tree_node_add (struct loop *father, struct loop *loop)
+flow_loop_tree_node_add (struct loop *father, struct loop *loop,
+ struct loop *after)
{
- loop->next = father->inner;
- father->inner = loop;
+ if (after)
+ {
+ loop->next = after->next;
+ after->next = loop;
+ }
+ else
+ {
+ loop->next = father->inner;
+ father->inner = loop;
+ }
establish_preds (loop, father);
}
diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h
--- a/gcc/cfgloop.h 2018-11-07 11:37:24.331223860 +0800
+++ b/gcc/cfgloop.h 2018-11-07 11:38:26.155223860 +0800
@@ -324,7 +324,8 @@ void record_loop_exits (void);
void rescan_loop_exit (edge, bool, bool);
/* Loop data structure manipulation/querying. */
-extern void flow_loop_tree_node_add (struct loop *, struct loop *);
+extern void flow_loop_tree_node_add (struct loop *, struct loop *,
+ struct loop * = NULL);
extern void flow_loop_tree_node_remove (struct loop *);
extern bool flow_loop_nested_p (const struct loop *, const struct loop *);
extern bool flow_bb_inside_loop_p (const struct loop *, const_basic_block);
diff -N -urp a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
--- a/gcc/cfgloopmanip.c 2018-11-07 11:37:24.847223860 +0800
+++ b/gcc/cfgloopmanip.c 2018-11-07 11:38:26.155223860 +0800
@@ -1026,9 +1026,11 @@ copy_loop_info (struct loop *loop, struc
}
/* Copies copy of LOOP as subloop of TARGET loop, placing newly
- created loop into loops structure. */
+ created loop into loops structure. If AFTER is non-null
+ the new loop is added at AFTER->next, otherwise in front of TARGETs
+ sibling list. */
struct loop *
-duplicate_loop (struct loop *loop, struct loop *target)
+duplicate_loop (struct loop *loop, struct loop *target, struct loop *after)
{
struct loop *cloop;
cloop = alloc_loop ();
@@ -1040,36 +1042,46 @@ duplicate_loop (struct loop *loop, struc
set_loop_copy (loop, cloop);
/* Add it to target. */
- flow_loop_tree_node_add (target, cloop);
+ flow_loop_tree_node_add (target, cloop, after);
return cloop;
}
/* Copies structure of subloops of LOOP into TARGET loop, placing
- newly created loops into loop tree. */
+ newly created loops into loop tree at the end of TARGETs sibling
+ list in the original order. */
void
duplicate_subloops (struct loop *loop, struct loop *target)
{
- struct loop *aloop, *cloop;
+ struct loop *aloop, *cloop, *tail;
+ for (tail = target->inner; tail && tail->next; tail = tail->next)
+ ;
for (aloop = loop->inner; aloop; aloop = aloop->next)
{
- cloop = duplicate_loop (aloop, target);
+ cloop = duplicate_loop (aloop, target, tail);
+ tail = cloop;
+ gcc_assert (!tail->next);
duplicate_subloops (aloop, cloop);
}
}
/* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
- into TARGET loop, placing newly created loops into loop tree. */
+ into TARGET loop, placing newly created loops into loop tree adding
+ them to TARGETs sibling list at the end in order. */
static void
copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
{
- struct loop *aloop;
+ struct loop *aloop, *tail;
int i;
+ for (tail = target->inner; tail && tail->next; tail = tail->next)
+ ;
for (i = 0; i < n; i++)
{
- aloop = duplicate_loop (copied_loops[i], target);
+ aloop = duplicate_loop (copied_loops[i], target, tail);
+ tail = aloop;
+ gcc_assert (!tail->next);
duplicate_subloops (copied_loops[i], aloop);
}
}
@@ -1133,14 +1145,15 @@ set_zero_probability (edge e)
}
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
- loop structure and dominators. E's destination must be LOOP header for
- this to work, i.e. it must be entry or latch edge of this loop; these are
- unique, as the loops must have preheaders for this function to work
- correctly (in case E is latch, the function unrolls the loop, if E is entry
- edge, it peels the loop). Store edges created by copying ORIG edge from
- copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
- original LOOP body, the other copies are numbered in order given by control
- flow through them) into TO_REMOVE array. Returns false if duplication is
+ loop structure and dominators (order of inner subloops is retained).
+ E's destination must be LOOP header for this to work, i.e. it must be entry
+ or latch edge of this loop; these are unique, as the loops must have
+ preheaders for this function to work correctly (in case E is latch, the
+ function unrolls the loop, if E is entry edge, it peels the loop). Store
+ edges created by copying ORIG edge from copies corresponding to set bits in
+ WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
+ are numbered in order given by control flow through them) into TO_REMOVE
+ array. Returns false if duplication is
impossible. */
bool
diff -N -urp a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
--- a/gcc/cfgloopmanip.h 2018-11-07 11:37:24.939223860 +0800
+++ b/gcc/cfgloopmanip.h 2018-11-07 11:38:26.155223860 +0800
@@ -47,7 +47,8 @@ extern struct loop *loopify (edge, edge,
unsigned, unsigned);
extern void unloop (struct loop *, bool *, bitmap);
extern void copy_loop_info (struct loop *loop, struct loop *target);
-extern struct loop * duplicate_loop (struct loop *, struct loop *);
+extern struct loop * duplicate_loop (struct loop *, struct loop *,
+ struct loop * = NULL);
extern void duplicate_subloops (struct loop *, struct loop *);
extern bool can_duplicate_loop_p (const struct loop *loop);
extern bool duplicate_loop_to_header_edge (struct loop *, edge,
diff -N -urp a/gcc/common.opt b/gcc/common.opt
--- a/gcc/common.opt 2018-11-07 11:37:24.859223860 +0800
+++ b/gcc/common.opt 2018-11-07 11:38:26.159223860 +0800
@@ -1496,8 +1496,8 @@ Common Alias(floop-nest-optimize)
Enable loop nest transforms. Same as -floop-nest-optimize.
floop-unroll-and-jam
-Common Alias(floop-nest-optimize)
-Enable loop nest transforms. Same as -floop-nest-optimize.
+Common Report Var(flag_unroll_jam) Optimization
+Perform unroll-and-jam on loops.
fgnu-tm
Common Report Var(flag_tm)
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
--- a/gcc/doc/invoke.texi 2018-11-07 11:37:24.915223860 +0800
+++ b/gcc/doc/invoke.texi 2018-11-07 11:39:49.031223860 +0800
@@ -7120,7 +7120,8 @@ Optimize yet more. @option{-O3} turns o
by @option{-O2} and also turns on the @option{-finline-functions},
@option{-funswitch-loops}, @option{-fpredictive-commoning},
@option{-fgcse-after-reload}, @option{-ftree-loop-vectorize},
-@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths}
+@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths},
+@option{-floop-unroll-and-jam},
@option{-ftree-slp-vectorize}, @option{-fvect-cost-model},
@option{-ftree-partial-pre}, @option{-fpeel-loops}
and @option{-fipa-cp-clone} options.
@@ -8226,12 +8227,10 @@ at @option{-O} and higher.
@itemx -floop-interchange
@itemx -floop-strip-mine
@itemx -floop-block
-@itemx -floop-unroll-and-jam
@opindex ftree-loop-linear
@opindex floop-interchange
@opindex floop-strip-mine
@opindex floop-block
-@opindex floop-unroll-and-jam
Perform loop nest optimizations. Same as
@option{-floop-nest-optimize}. To use this code transformation, GCC has
to be configured with @option{--with-isl} to enable the Graphite loop
@@ -8323,6 +8322,12 @@ ENDDO
@end smallexample
and the initialization loop is transformed into a call to memset zero.
+@item -floop-unroll-and-jam
+@opindex floop-unroll-and-jam
+Apply unroll and jam transformations on feasible loops. In a loop
+nest this unrolls the outer loop by some factor and fuses the resulting
+multiple inner loops. This flag is enabled by default at @option{-O3}.
+
@item -ftree-loop-im
@opindex ftree-loop-im
Perform loop invariant motion on trees. This pass moves only invariants that
@@ -10353,13 +10358,13 @@ loop in the loop nest by a given number
length can be changed using the @option{loop-block-tile-size}
parameter. The default value is 51 iterations.
-@item loop-unroll-jam-size
-Specify the unroll factor for the @option{-floop-unroll-and-jam} option. The
-default value is 4.
-
-@item loop-unroll-jam-depth
-Specify the dimension to be unrolled (counting from the most inner loop)
-for the @option{-floop-unroll-and-jam}. The default value is 2.
+@item unroll-jam-min-percent
+The minimum percentage of memory references that must be optimized
+away for the unroll-and-jam transformation to be considered profitable.
+
+@item unroll-jam-max-unroll
+The maximum number of times the outer loop should be unrolled by
+the unroll-and-jam transformation.
@item ipa-cp-value-list-size
IPA-CP attempts to track all possible values and types passed to a function's
diff -N -urp a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c
--- a/gcc/gimple-loop-jam.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/gimple-loop-jam.c 2018-11-07 11:38:26.167223860 +0800
@@ -0,0 +1,598 @@
+/* Loop unroll-and-jam.
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "params.h"
+#include "tree-pass.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "ssa.h"
+#include "fold-const.h"
+#include "tree-cfg.h"
+#include "tree-ssa.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-ssa-loop-manip.h"
+#include "cfgloop.h"
+#include "tree-scalar-evolution.h"
+#include "gimple-iterator.h"
+#include "cfghooks.h"
+#include "tree-data-ref.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-vectorizer.h"
+
+/* Unroll and Jam transformation
+
+ This is a combination of two transformations, where the second
+ is not always valid. It's applicable if a loop nest has redundancies
+ over the iterations of an outer loop while not having that with
+ an inner loop.
+
+ Given this nest:
+ for (i) {
+ for (j) {
+ B (i,j)
+ }
+ }
+
+ first unroll:
+ for (i by 2) {
+ for (j) {
+ B (i,j)
+ }
+ for (j) {
+ B (i+1,j)
+ }
+ }
+
+ then fuse the two adjacent inner loops resulting from that:
+ for (i by 2) {
+ for (j) {
+ B (i,j)
+ B (i+1,j)
+ }
+ }
+
+ As the order of evaluations of the body B changes this is valid
+ only in certain situations: all distance vectors need to be forward.
+ Additionally if there are multiple induction variables than just
+ a counting control IV (j above) we can also deal with some situations.
+
+ The validity is checked by unroll_jam_possible_p, and the data-dep
+ testing below.
+
+ A trivial example where the fusion is wrong would be when
+ B (i,j) == x[j-1] = x[j];
+ for (i by 2) {
+ for (j) {
+ x[j-1] = x[j];
+ }
+ for (j) {
+ x[j-1] = x[j];
+ }
+ } effect: move content to front by two elements
+ -->
+ for (i by 2) {
+ for (j) {
+ x[j-1] = x[j];
+ x[j-1] = x[j];
+ }
+ } effect: move content to front by one element
+*/
+
+/* Modify the loop tree for the fact that all code once belonging
+ to the OLD loop or the outer loop of OLD now is inside LOOP. */
+
+static void
+merge_loop_tree (struct loop *loop, struct loop *old)
+{
+ basic_block *bbs;
+ int i, n;
+ struct loop *subloop;
+ edge e;
+ edge_iterator ei;
+
+ /* Find its nodes. */
+ bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+ n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
+
+ for (i = 0; i < n; i++)
+ {
+ /* If the block was direct child of OLD loop it's now part
+ of LOOP. If it was outside OLD, then it moved into LOOP
+ as well. This avoids changing the loop father for BBs
+ in inner loops of OLD. */
+ if (bbs[i]->loop_father == old
+ || loop_depth (bbs[i]->loop_father) < loop_depth (old))
+ {
+ remove_bb_from_loops (bbs[i]);
+ add_bb_to_loop (bbs[i], loop);
+ continue;
+ }
+
+ /* If we find a direct subloop of OLD, move it to LOOP. */
+ subloop = bbs[i]->loop_father;
+ if (loop_outer (subloop) == old && subloop->header == bbs[i])
+ {
+ flow_loop_tree_node_remove (subloop);
+ flow_loop_tree_node_add (loop, subloop);
+ }
+ }
+
+ /* Update the information about loop exit edges. */
+ for (i = 0; i < n; i++)
+ {
+ FOR_EACH_EDGE (e, ei, bbs[i]->succs)
+ {
+ rescan_loop_exit (e, false, false);
+ }
+ }
+
+ loop->num_nodes = n;
+
+ free (bbs);
+}
+
+/* BB is part of the outer loop of an unroll-and-jam situation.
+ Check if any statements therein would prevent the transformation. */
+
+static bool
+bb_prevents_fusion_p (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ /* BB is duplicated by outer unrolling and then all N-1 first copies
+ move into the body of the fused inner loop. If BB exits the outer loop
+ the last copy still does so, and the first N-1 copies are cancelled
+ by loop unrolling, so also after fusion it's the exit block.
+ But there might be other reasons that prevent fusion:
+ * stores or unknown side-effects prevent fusion
+ * loads don't
+ * computations into SSA names: these aren't problematic. Their
+ result will be unused on the exit edges of the first N-1 copies
+ (those aren't taken after unrolling). If they are used on the
+ other edge (the one leading to the outer latch block) they are
+ loop-carried (on the outer loop) and the Nth copy of BB will
+ compute them again (i.e. the first N-1 copies will be dead). */
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *g = gsi_stmt (gsi);
+ if (gimple_vdef (g) || gimple_has_side_effects (g))
+ return true;
+ }
+ return false;
+}
+
+/* Given an inner loop LOOP (of some OUTER loop) determine if
+ we can safely fuse copies of it (generated by outer unrolling).
+ If so return true, otherwise return false. */
+
+static bool
+unroll_jam_possible_p (struct loop *outer, struct loop *loop)
+{
+ basic_block *bbs;
+ int i, n;
+ struct tree_niter_desc niter;
+
+ /* When fusing the loops we skip the latch block
+ of the first one, so it mustn't have any effects to
+ preserve. */
+ if (!empty_block_p (loop->latch))
+ return false;
+
+ if (!single_exit (loop))
+ return false;
+
+ /* We need a perfect nest. Quick check for adjacent inner loops. */
+ if (outer->inner != loop || loop->next)
+ return false;
+
+ /* Prevent head-controlled inner loops, that we usually have.
+ The guard block would need to be accepted
+ (invariant condition either entering or skipping the loop),
+ without also accepting arbitrary control flow. When unswitching
+ ran before us (as with -O3) this won't be a problem because its
+ outer loop unswitching will have moved out the invariant condition.
+
+ If we do that we need to extend fuse_loops () to cope with this
+ by threading through the (still invariant) copied condition
+ between the two loop copies. */
+ if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
+ return false;
+
+ /* The number of iterations of the inner loop must be loop invariant
+ with respect to the outer loop. */
+ if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
+ false, true)
+ || niter.cmp == ERROR_MARK
+ || !integer_zerop (niter.may_be_zero)
+ || !expr_invariant_in_loop_p (outer, niter.niter))
+ return false;
+
+ /* If the inner loop produces any values that are used inside the
+ outer loop (except the virtual op) then it can flow
+ back (perhaps indirectly) into the inner loop. This prevents
+ fusion: without fusion the value at the last iteration is used,
+ with fusion the value after the initial iteration is used.
+
+ If all uses are outside the outer loop this doesn't prevent fusion;
+ the value of the last iteration is still used (and the values from
+ all intermediate iterations are dead). */
+ gphi_iterator psi;
+ for (psi = gsi_start_phis (single_exit (loop)->dest);
+ !gsi_end_p (psi); gsi_next (&psi))
+ {
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ tree op = gimple_phi_result (psi.phi ());
+ if (virtual_operand_p (op))
+ continue;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ if (!is_gimple_debug (use_stmt)
+ && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
+ return false;
+ }
+ }
+
+ /* And check blocks belonging to just outer loop. */
+ bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+ n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
+
+ for (i = 0; i < n; i++)
+ if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i]))
+ break;
+ free (bbs);
+ if (i != n)
+ return false;
+
+ /* For now we can safely fuse copies of LOOP only if all
+ loop carried variables are inductions (or the virtual op).
+
+ We could handle reductions as well (the initial value in the second
+ body would be the after-iter value of the first body) if it's over
+ an associative and commutative operation. We wouldn't
+ be able to handle unknown cycles. */
+ for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
+ {
+ affine_iv iv;
+ tree op = gimple_phi_result (psi.phi ());
+
+ if (virtual_operand_p (op))
+ continue;
+ if (!simple_iv (loop, loop, op, &iv, true))
+ return false;
+ /* The inductions must be regular, loop invariant step and initial
+ value. */
+ if (!expr_invariant_in_loop_p (outer, iv.step)
+ || !expr_invariant_in_loop_p (outer, iv.base))
+ return false;
+ /* XXX With more effort we could also be able to deal with inductions
+ where the initial value is loop variant but a simple IV in the
+ outer loop. The initial value for the second body would be
+ the original initial value plus iv.base.step. The next value
+ for the fused loop would be the original next value of the first
+ copy, _not_ the next value of the second body. */
+ }
+
+ return true;
+}
+
+/* Fuse LOOP with all further neighbors. The loops are expected to
+ be in appropriate form. */
+
+static void
+fuse_loops (struct loop *loop)
+{
+ struct loop *next = loop->next;
+
+ while (next)
+ {
+ edge e;
+
+ remove_branch (single_pred_edge (loop->latch));
+ /* Make delete_basic_block not fiddle with the loop structure. */
+ basic_block oldlatch = loop->latch;
+ loop->latch = NULL;
+ delete_basic_block (oldlatch);
+ e = redirect_edge_and_branch (loop_latch_edge (next),
+ loop->header);
+ loop->latch = e->src;
+ flush_pending_stmts (e);
+
+ gcc_assert (EDGE_COUNT (next->header->preds) == 1);
+
+ /* The PHI nodes of the second body (single-argument now)
+ need adjustments to use the right values: either directly
+ the value of the corresponding PHI in the first copy or
+ the one leaving the first body which unrolling did for us.
+
+ See also unroll_jam_possible_p () for further possibilities. */
+ gphi_iterator psi_first, psi_second;
+ e = single_pred_edge (next->header);
+ for (psi_first = gsi_start_phis (loop->header),
+ psi_second = gsi_start_phis (next->header);
+ !gsi_end_p (psi_first);
+ gsi_next (&psi_first), gsi_next (&psi_second))
+ {
+ gphi *phi_first = psi_first.phi ();
+ gphi *phi_second = psi_second.phi ();
+ tree firstop = gimple_phi_result (phi_first);
+ /* The virtual operand is correct already as it's
+ always live at exit, hence has a LCSSA node and outer
+ loop unrolling updated SSA form. */
+ if (virtual_operand_p (firstop))
+ continue;
+
+ /* Due to unroll_jam_possible_p () we know that this is
+ an induction. The second body goes over the same
+ iteration space. */
+ add_phi_arg (phi_second, firstop, e,
+ gimple_location (phi_first));
+ }
+ gcc_assert (gsi_end_p (psi_second));
+
+ merge_loop_tree (loop, next);
+ gcc_assert (!next->num_nodes);
+ struct loop *ln = next->next;
+ delete_loop (next);
+ next = ln;
+ }
+ rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+}
+
+/* Returns true if the distance in DDR can be determined and adjusts
+ the unroll factor in *UNROLL to make unrolling valid for that distance.
+ Otherwise return false.
+
+ If this data dep can lead to a removed memory reference, increment
+ *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
+ for this to happen. */
+
+static bool
+adjust_unroll_factor (struct data_dependence_relation *ddr,
+ unsigned *unroll, unsigned *profit_unroll,
+ unsigned *removed)
+{
+ bool ret = false;
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
+ {
+ if (DDR_NUM_DIST_VECTS (ddr) == 0)
+ return false;
+ unsigned i;
+ lambda_vector dist_v;
+ FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+ {
+ /* A distance (a,b) is at worst transformed into (a/N,b) by the
+ unrolling (factor N), so the transformation is valid if
+ a >= N, or b > 0, or b is zero and a > 0. Otherwise the unroll
+ factor needs to be limited so that the first condition holds.
+ That may limit the factor down to zero in the worst case. */
+ int dist = dist_v[0];
+ if (dist < 0)
+ gcc_unreachable ();
+ else if ((unsigned)dist >= *unroll)
+ ;
+ else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
+ || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
+ && dist > 0))
+ ;
+ else
+ *unroll = dist;
+
+ /* With a distance (a,0) it's always profitable to unroll-and-jam
+ (by a+1), because one memory reference will go away. With
+ (a,b) and b != 0 that's less clear. We will increase the
+ number of streams without lowering the number of mem refs.
+ So for now only handle the first situation. */
+ if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
+ {
+ *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
+ (*removed)++;
+ }
+
+ ret = true;
+ }
+ }
+ return ret;
+}
+
+/* Main entry point for the unroll-and-jam transformation
+ described above. */
+
+static unsigned int
+tree_loop_unroll_and_jam (void)
+{
+ struct loop *loop;
+ bool changed = false;
+
+ gcc_assert (scev_initialized_p ());
+
+ /* Go through all innermost loops. */
+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
+ {
+ struct loop *outer = loop_outer (loop);
+
+ if (loop_depth (loop) < 2
+ || optimize_loop_nest_for_size_p (outer))
+ continue;
+
+ if (!unroll_jam_possible_p (outer, loop))
+ continue;
+
+ vec<data_reference_p> datarefs;
+ vec<ddr_p> dependences;
+ unsigned unroll_factor, profit_unroll, removed;
+ struct tree_niter_desc desc;
+ bool unroll = false;
+
+ auto_vec<loop_p, 3> loop_nest;
+ dependences.create (10);
+ datarefs.create (10);
+ if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
+ &datarefs, &dependences))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Cannot analyze data dependencies\n");
+ free_data_refs (datarefs);
+ free_dependence_relations (dependences);
+ return false;
+ }
+ if (!datarefs.length ())
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_data_dependence_relations (dump_file, dependences);
+
+ unroll_factor = (unsigned)-1;
+ profit_unroll = 1;
+ removed = 0;
+
+ /* Check all dependencies. */
+ unsigned i;
+ struct data_dependence_relation *ddr;
+ FOR_EACH_VEC_ELT (dependences, i, ddr)
+ {
+ struct data_reference *dra, *drb;
+
+ /* If the refs are independend there's nothing to do. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+ continue;
+ dra = DDR_A (ddr);
+ drb = DDR_B (ddr);
+ /* Nothing interesting for the self dependencies. */
+ if (dra == drb)
+ continue;
+
+ /* Now check the distance vector, for determining a sensible
+ outer unroll factor, and for validity of merging the inner
+ loop copies. */
+ if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
+ &removed))
+ {
+ /* Couldn't get the distance vector. For two reads that's
+ harmless (we assume we should unroll). For at least
+ one write this means we can't check the dependence direction
+ and hence can't determine safety. */
+
+ if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
+ {
+ unroll_factor = 0;
+ break;
+ }
+ }
+ }
+
+ /* We regard a user-specified minimum percentage of zero as a request
+ to ignore all profitability concerns and apply the transformation
+ always. */
+ if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
+ profit_unroll = 2;
+ else if (removed * 100 / datarefs.length ()
+ < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
+ profit_unroll = 1;
+ if (unroll_factor > profit_unroll)
+ unroll_factor = profit_unroll;
+ if (unroll_factor > (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL))
+ unroll_factor = PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL);
+ unroll = (unroll_factor > 1
+ && can_unroll_loop_p (outer, unroll_factor, &desc));
+
+ if (unroll)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
+ find_loop_location (outer),
+ "applying unroll and jam with factor %d\n",
+ unroll_factor);
+ initialize_original_copy_tables ();
+ tree_unroll_loop (outer, unroll_factor, single_dom_exit (outer),
+ &desc);
+ free_original_copy_tables ();
+ fuse_loops (outer->inner);
+ changed = true;
+ }
+
+ loop_nest.release ();
+ free_dependence_relations (dependences);
+ free_data_refs (datarefs);
+ }
+
+ if (changed)
+ {
+ scev_reset ();
+ free_dominance_info (CDI_DOMINATORS);
+ return TODO_cleanup_cfg;
+ }
+ return 0;
+}
+
+/* Pass boilerplate. */
+
+namespace {
+
+const pass_data pass_data_loop_jam =
+{
+ GIMPLE_PASS, /* type. */
+ "unrolljam", /* name. */
+ OPTGROUP_LOOP, /* optinfo_flags. */
+ TV_LOOP_JAM, /* tv_id. */
+ PROP_cfg, /* properties_required. */
+ 0, /* properties_provided. */
+ 0, /* properties_destroyed. */
+ 0, /* todo_flags_start. */
+ 0, /* todo_flags_finish. */
+};
+
+class pass_loop_jam : public gimple_opt_pass
+{
+public:
+ pass_loop_jam (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_loop_jam, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_unroll_jam != 0;
+ }
+ virtual unsigned int execute (function *);
+
+};
+
+unsigned int
+pass_loop_jam::execute (function *fun)
+{
+ if (number_of_loops (fun) <= 1)
+ return 0;
+
+ return tree_loop_unroll_and_jam ();
+}
+
+}
+
+gimple_opt_pass *
+make_pass_loop_jam (gcc::context *ctxt)
+{
+ return new pass_loop_jam (ctxt);
+}
+
diff -N -urp a/gcc/opts.c b/gcc/opts.c
--- a/gcc/opts.c 2018-11-07 11:37:24.891223860 +0800
+++ b/gcc/opts.c 2018-11-07 11:38:26.171223860 +0800
@@ -534,6 +534,7 @@ static const struct default_options defa
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
+ { OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
diff -N -urp a/gcc/params.def b/gcc/params.def
--- a/gcc/params.def 2018-11-07 11:37:27.543223860 +0800
+++ b/gcc/params.def 2018-11-07 11:38:26.171223860 +0800
@@ -1280,6 +1280,16 @@ DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
"Enable loop epilogue vectorization using smaller vector size.",
0, 0, 1)
+DEFPARAM (PARAM_UNROLL_JAM_MIN_PERCENT,
+ "unroll-jam-min-percent",
+ "Minimum percentage of memrefs that must go away for unroll-and-jam to be considered profitable.",
+ 1, 0, 100)
+
+DEFPARAM (PARAM_UNROLL_JAM_MAX_UNROLL,
+ "unroll-jam-max-unroll",
+ "Maximum unroll factor for the unroll-and-jam transformation.",
+ 4, 0, 0)
+
/*
Local variables:
diff -N -urp a/gcc/passes.def b/gcc/passes.def
--- a/gcc/passes.def 2018-11-07 11:37:24.859223860 +0800
+++ b/gcc/passes.def 2018-11-07 11:38:26.171223860 +0800
@@ -272,6 +272,7 @@ along with GCC; see the file COPYING3.
NEXT_PASS (pass_tree_unswitch);
NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_loop_split);
+ NEXT_PASS (pass_loop_jam);
/* All unswitching, final value replacement and splitting can expose
empty loops. Remove them now. */
NEXT_PASS (pass_cd_dce);
diff -N -urp a/gcc/timevar.def b/gcc/timevar.def
--- a/gcc/timevar.def 2018-11-07 11:37:24.935223860 +0800
+++ b/gcc/timevar.def 2018-11-07 11:38:26.175223860 +0800
@@ -186,6 +186,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting")
+DEFTIMEVAR (TV_LOOP_JAM , "unroll and jam")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h
--- a/gcc/tree-pass.h 2018-11-07 11:37:24.887223860 +0800
+++ b/gcc/tree-pass.h 2018-11-07 11:38:26.175223860 +0800
@@ -369,6 +369,7 @@ extern gimple_opt_pass *make_pass_tree_l
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);

View File

@ -1,768 +0,0 @@
diff -N -urp a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c
--- a/gcc/combine-stack-adj.c 2017-01-20 08:05:30.925466000 +0800
+++ b/gcc/combine-stack-adj.c 2019-01-10 17:10:16.606528459 +0800
@@ -508,6 +508,8 @@ combine_stack_adjustments_for_block (bas
continue;
set = single_set_for_csa (insn);
+ if (set && find_reg_note (insn, REG_STACK_CHECK, NULL_RTX))
+ set = NULL_RTX;
if (set)
{
rtx dest = SET_DEST (set);
diff -N -urp a/gcc/common.opt b/gcc/common.opt
--- a/gcc/common.opt 2019-01-10 13:33:20.926185828 +0800
+++ b/gcc/common.opt 2019-01-10 16:37:35.238476827 +0800
@@ -2336,13 +2336,18 @@ Common Report Var(flag_variable_expansio
Apply variable expansion when loops are unrolled.
fstack-check=
-Common Report RejectNegative Joined
+Common Report RejectNegative Joined Optimization
-fstack-check=[no|generic|specific] Insert stack checking code into the program.
fstack-check
Common Alias(fstack-check=, specific, no)
Insert stack checking code into the program. Same as -fstack-check=specific.
+fstack-clash-protection
+Common Report Var(flag_stack_clash_protection) Optimization
+Insert code to probe each page of stack space as it is allocated to protect
+from stack-clash style attacks.
+
fstack-limit
Common Var(common_deferred_options) Defer
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2019-01-10 13:33:20.914185828 +0800
+++ b/gcc/config/aarch64/aarch64.c 2019-01-11 14:12:22.248521895 +0800
@@ -3881,12 +3881,14 @@ aarch64_expand_prologue (void)
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
- frame_size - STACK_CHECK_PROTECT);
+ if (frame_size > PROBE_INTERVAL
+ && frame_size > get_stack_check_protect ())
+ aarch64_emit_probe_stack_range (get_stack_check_protect (),
+ (frame_size
+ - get_stack_check_protect ()));
}
else if (frame_size > 0)
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
+ aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
}
aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
diff -N -urp a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
--- a/gcc/config/i386/i386.c 2019-01-10 13:33:20.674185822 +0800
+++ b/gcc/config/i386/i386.c 2019-01-28 10:55:37.006876481 +0800
@@ -14396,7 +14396,7 @@ ix86_expand_prologue (void)
HOST_WIDE_INT size = allocate;
if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
- size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+ size = 0x80000000 - get_stack_check_protect () - 1;
if (TARGET_STACK_PROBE)
{
@@ -14406,18 +14406,21 @@ ix86_expand_prologue (void)
ix86_emit_probe_stack_range (0, size);
}
else
- ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+ ix86_emit_probe_stack_range (0,
+ size + get_stack_check_protect ());
}
else
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL
+ && size > get_stack_check_protect ())
+ ix86_emit_probe_stack_range (get_stack_check_protect (),
+ (size
+ - get_stack_check_protect ()));
}
else
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ ix86_emit_probe_stack_range (get_stack_check_protect (), size);
}
}
}
diff -N -urp a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
--- a/gcc/config/ia64/ia64.c 2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/config/ia64/ia64.c 2019-01-28 10:58:37.582881234 +0800
@@ -3481,15 +3481,16 @@ ia64_expand_prologue (void)
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT,
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect (),
bs_size);
- else if (size + bs_size > STACK_CHECK_PROTECT)
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
+ else if (size + bs_size > get_stack_check_protect ())
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
+ 0, bs_size);
}
else if (size + bs_size > 0)
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
+ ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
}
if (dump_file)
diff -N -urp a/gcc/coretypes.h b/gcc/coretypes.h
--- a/gcc/coretypes.h 2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/coretypes.h 2019-01-11 14:09:58.612518114 +0800
@@ -371,6 +371,7 @@ typedef unsigned char uchar;
#include "input.h"
#include "is-a.h"
#include "memory-block.h"
+#include "dumpfile.h"
#endif /* GENERATOR_FILE && !USED_FOR_TARGET */
#endif /* coretypes.h */
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
--- a/gcc/doc/invoke.texi 2019-01-10 13:33:20.882185827 +0800
+++ b/gcc/doc/invoke.texi 2019-01-10 16:40:40.066481692 +0800
@@ -10050,6 +10050,21 @@ compilation without. The value for comp
needs to be more conservative (higher) in order to make tracer
effective.
+@item stack-clash-protection-guard-size
+Specify the size of the operating system provided stack guard as
+2 raised to @var{num} bytes. The default value is 12 (4096 bytes).
+Acceptable values are between 12 and 30. Higher values may reduce the
+number of explicit probes, but a value larger than the operating system
+provided guard will leave code vulnerable to stack clash style attacks.
+
+@item stack-clash-protection-probe-interval
+Stack clash protection involves probing stack space as it is allocated. This
+param controls the maximum distance between probes into the stack as 2 raised
+to @var{num} bytes. Acceptable values are between 10 and 16 and defaults to
+12. Higher values may reduce the number of explicit probes, but a value
+larger than the operating system provided guard will leave code vulnerable to
+stack clash style attacks.
+
@item max-cse-path-length
The maximum number of basic blocks on path that CSE considers.
@@ -11248,7 +11263,8 @@ target support in the compiler but comes
@enumerate
@item
Modified allocation strategy for large objects: they are always
-allocated dynamically if their size exceeds a fixed threshold.
+allocated dynamically if their size exceeds a fixed threshold. Note this
+may change the semantics of some code.
@item
Fixed limit on the size of the static frame of functions: when it is
@@ -11263,6 +11279,25 @@ generic implementation, code performance
Note that old-style stack checking is also the fallback method for
@samp{specific} if no target support has been added in the compiler.
+@samp{-fstack-check=} is designed for Ada's needs to detect infinite recursion
+and stack overflows. @samp{specific} is an excellent choice when compiling
+Ada code. It is not generally sufficient to protect against stack-clash
+attacks. To protect against those you want @samp{-fstack-clash-protection}.
+
+@item -fstack-clash-protection
+@opindex fstack-clash-protection
+Generate code to prevent stack clash style attacks. When this option is
+enabled, the compiler will only allocate one page of stack space at a time
+and each page is accessed immediately after allocation. Thus, it prevents
+allocations from jumping over any stack guard page provided by the
+operating system.
+
+Most targets do not fully support stack clash protection. However, on
+those targets @option{-fstack-clash-protection} will protect dynamic stack
+allocations. @option{-fstack-clash-protection} may also provide limited
+protection for static stack allocations if the target supports
+@option{-fstack-check=specific}.
+
@item -fstack-limit-register=@var{reg}
@itemx -fstack-limit-symbol=@var{sym}
@itemx -fno-stack-limit
diff -N -urp a/gcc/doc/tm.texi b/gcc/doc/tm.texi
--- a/gcc/doc/tm.texi 2017-04-05 01:52:27.193766000 +0800
+++ b/gcc/doc/tm.texi 2019-01-10 16:50:44.006497591 +0800
@@ -3419,6 +3419,10 @@ GCC computed the default from the values
normally not need to override that default.
@end defmac
+@deftypefn {Target Hook} bool TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE (rtx @var{residual})
+Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.
+@end deftypefn
+
@need 2000
@node Frame Registers
@subsection Registers That Address the Stack Frame
diff -N -urp a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
--- a/gcc/doc/tm.texi.in 2017-04-05 01:52:27.193766000 +0800
+++ b/gcc/doc/tm.texi.in 2019-01-10 16:51:41.530499105 +0800
@@ -2999,6 +2999,8 @@ GCC computed the default from the values
normally not need to override that default.
@end defmac
+@hook TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
+
@need 2000
@node Frame Registers
@subsection Registers That Address the Stack Frame
diff -N -urp a/gcc/explow.c b/gcc/explow.c
--- a/gcc/explow.c 2017-02-02 20:39:09.589196000 +0800
+++ b/gcc/explow.c 2019-01-10 16:56:07.454506105 +0800
@@ -39,8 +39,10 @@ along with GCC; see the file COPYING3.
#include "expr.h"
#include "common/common-target.h"
#include "output.h"
+#include "params.h"
static rtx break_out_memory_refs (rtx);
+static void anti_adjust_stack_and_probe_stack_clash (rtx);
/* Truncate and perhaps sign-extend C as appropriate for MODE. */
@@ -1271,6 +1273,29 @@ get_dynamic_stack_size (rtx *psize, unsi
*psize = size;
}
+/* Return the number of bytes to "protect" on the stack for -fstack-check.
+
+ "protect" in the context of -fstack-check means how many bytes we
+ should always ensure are available on the stack. More importantly
+ this is how many bytes are skipped when probing the stack.
+
+ On some targets we want to reuse the -fstack-check prologue support
+ to give a degree of protection against stack clashing style attacks.
+
+ In that scenario we do not want to skip bytes before probing as that
+ would render the stack clash protections useless.
+
+ So we never use STACK_CHECK_PROTECT directly. Instead we indirect though
+ this helper which allows us to provide different values for
+ -fstack-check and -fstack-clash-protection. */
+HOST_WIDE_INT
+get_stack_check_protect (void)
+{
+ if (flag_stack_clash_protection)
+ return 0;
+ return STACK_CHECK_PROTECT;
+}
+
/* Return an rtx representing the address of an area of memory dynamically
pushed on the stack.
@@ -1429,7 +1454,7 @@ allocate_dynamic_stack_space (rtx size,
probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
size);
else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
- probe_stack_range (STACK_CHECK_PROTECT, size);
+ probe_stack_range (get_stack_check_protect (), size);
/* Don't let anti_adjust_stack emit notes. */
suppress_reg_args_size = true;
@@ -1482,6 +1507,8 @@ allocate_dynamic_stack_space (rtx size,
if (flag_stack_check && STACK_CHECK_MOVING_SP)
anti_adjust_stack_and_probe (size, false);
+ else if (flag_stack_clash_protection)
+ anti_adjust_stack_and_probe_stack_clash (size);
else
anti_adjust_stack (size);
@@ -1757,6 +1784,237 @@ probe_stack_range (HOST_WIDE_INT first,
emit_insn (gen_blockage ());
}
+/* Compute parameters for stack clash probing a dynamic stack
+ allocation of SIZE bytes.
+
+ We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL.
+
+ Additionally we conditionally dump the type of probing that will
+ be needed given the values computed. */
+
+void
+compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr,
+ rtx *residual,
+ HOST_WIDE_INT *probe_interval,
+ rtx size)
+{
+ /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
+ *probe_interval
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+ *rounded_size = simplify_gen_binary (AND, Pmode, size,
+ GEN_INT (-*probe_interval));
+
+ /* Compute the value of the stack pointer for the last iteration.
+ It's just SP + ROUNDED_SIZE. */
+ rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX);
+ *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode,
+ stack_pointer_rtx,
+ rounded_size_op),
+ NULL_RTX);
+
+ /* Compute any residuals not allocated by the loop above. Residuals
+ are just the ROUNDED_SIZE - SIZE. */
+ *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size);
+
+ /* Dump key information to make writing tests easy. */
+ if (dump_file)
+ {
+ if (*rounded_size == CONST0_RTX (Pmode))
+ fprintf (dump_file,
+ "Stack clash skipped dynamic allocation and probing loop.\n");
+ else if (CONST_INT_P (*rounded_size)
+ && INTVAL (*rounded_size) <= 4 * *probe_interval)
+ fprintf (dump_file,
+ "Stack clash dynamic allocation and probing inline.\n");
+ else if (CONST_INT_P (*rounded_size))
+ fprintf (dump_file,
+ "Stack clash dynamic allocation and probing in "
+ "rotated loop.\n");
+ else
+ fprintf (dump_file,
+ "Stack clash dynamic allocation and probing in loop.\n");
+
+ if (*residual != CONST0_RTX (Pmode))
+ fprintf (dump_file,
+ "Stack clash dynamic allocation and probing residuals.\n");
+ else
+ fprintf (dump_file,
+ "Stack clash skipped dynamic allocation and "
+ "probing residuals.\n");
+ }
+}
+
+/* Emit the start of an allocate/probe loop for stack
+ clash protection.
+
+ LOOP_LAB and END_LAB are returned for use when we emit the
+ end of the loop.
+
+ LAST addr is the value for SP which stops the loop. */
+void
+emit_stack_clash_protection_probe_loop_start (rtx *loop_lab,
+ rtx *end_lab,
+ rtx last_addr,
+ bool rotated)
+{
+ /* Essentially we want to emit any setup code, the top of loop
+ label and the comparison at the top of the loop. */
+ *loop_lab = gen_label_rtx ();
+ *end_lab = gen_label_rtx ();
+
+ emit_label (*loop_lab);
+ if (!rotated)
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX,
+ Pmode, 1, *end_lab);
+}
+
+/* Emit the end of a stack clash probing loop.
+
+ This consists of just the jump back to LOOP_LAB and
+ emitting END_LOOP after the loop. */
+
+void
+emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop,
+ rtx last_addr, bool rotated)
+{
+ if (rotated)
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX,
+ Pmode, 1, loop_lab);
+ else
+ emit_jump (loop_lab);
+
+ emit_label (end_loop);
+
+}
+
+/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
+ while probing it. This pushes when SIZE is positive. SIZE need not
+ be constant.
+
+ This is subtly different than anti_adjust_stack_and_probe to try and
+ prevent stack-clash attacks
+
+ 1. It must assume no knowledge of the probing state, any allocation
+ must probe.
+
+ Consider the case of a 1 byte alloca in a loop. If the sum of the
+ allocations is large, then this could be used to jump the guard if
+ probes were not emitted.
+
+ 2. It never skips probes, whereas anti_adjust_stack_and_probe will
+ skip probes on the first couple PROBE_INTERVALs on the assumption
+ they're done elsewhere.
+
+ 3. It only allocates and probes SIZE bytes, it does not need to
+ allocate/probe beyond that because this probing style does not
+ guarantee signal handling capability if the guard is hit. */
+
+static void
+anti_adjust_stack_and_probe_stack_clash (rtx size)
+{
+ /* First ensure SIZE is Pmode. */
+ if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
+ size = convert_to_mode (Pmode, size, 1);
+
+ /* We can get here with a constant size on some targets. */
+ rtx rounded_size, last_addr, residual;
+ HOST_WIDE_INT probe_interval;
+ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr,
+ &residual, &probe_interval, size);
+
+ if (rounded_size != CONST0_RTX (Pmode))
+ {
+ if (CONST_INT_P (rounded_size)
+ && INTVAL (rounded_size) <= 4 * probe_interval)
+ {
+ for (HOST_WIDE_INT i = 0;
+ i < INTVAL (rounded_size);
+ i += probe_interval)
+ {
+ anti_adjust_stack (GEN_INT (probe_interval));
+
+ /* The prologue does not probe residuals. Thus the offset
+ here to probe just beyond what the prologue had already
+ allocated. */
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+ (probe_interval
+ - GET_MODE_SIZE (word_mode))));
+ emit_insn (gen_blockage ());
+ }
+ }
+ else
+ {
+ rtx loop_lab, end_loop;
+ bool rotate_loop = CONST_INT_P (rounded_size);
+ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop,
+ last_addr, rotate_loop);
+
+ anti_adjust_stack (GEN_INT (probe_interval));
+
+ /* The prologue does not probe residuals. Thus the offset here
+ to probe just beyond what the prologue had already allocated. */
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+ (probe_interval
+ - GET_MODE_SIZE (word_mode))));
+
+ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop,
+ last_addr, rotate_loop);
+ emit_insn (gen_blockage ());
+ }
+ }
+
+ if (residual != CONST0_RTX (Pmode))
+ {
+ rtx label = NULL_RTX;
+ /* RESIDUAL could be zero at runtime and in that case *sp could
+ hold live data. Furthermore, we do not want to probe into the
+ red zone.
+
+ Go ahead and just guard the probe at *sp on RESIDUAL != 0 at
+ runtime if RESIDUAL is not a compile time constant. */
+ if (!CONST_INT_P (residual))
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (residual, CONST0_RTX (GET_MODE (residual)),
+ EQ, NULL_RTX, Pmode, 1, label);
+ }
+
+ rtx x = force_reg (Pmode, plus_constant (Pmode, residual,
+ -GET_MODE_SIZE (word_mode)));
+ anti_adjust_stack (residual);
+ emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x));
+ emit_insn (gen_blockage ());
+ if (!CONST_INT_P (residual))
+ emit_label (label);
+ }
+
+ /* Some targets make optimistic assumptions in their prologues about
+ how the caller may have probed the stack. Make sure we honor
+ those assumptions when needed. */
+ if (size != CONST0_RTX (Pmode)
+ && targetm.stack_clash_protection_final_dynamic_probe (residual))
+ {
+ /* SIZE could be zero at runtime and in that case *sp could hold
+ live data. Furthermore, we don't want to probe into the red
+ zone.
+
+ Go ahead and just guard the probe at *sp on SIZE != 0 at runtime
+ if SIZE is not a compile time constant. */
+ rtx label = NULL_RTX;
+ if (!CONST_INT_P (size))
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)),
+ EQ, NULL_RTX, Pmode, 1, label);
+ }
+
+ emit_stack_probe (stack_pointer_rtx);
+ emit_insn (gen_blockage ());
+ if (!CONST_INT_P (size))
+ emit_label (label);
+ }
+}
+
/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
while probing it. This pushes when SIZE is positive. SIZE need not
be constant. If ADJUST_BACK is true, adjust back the stack pointer
diff -N -urp a/gcc/explow.h b/gcc/explow.h
--- a/gcc/explow.h 2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/explow.h 2019-01-10 16:57:37.934508487 +0800
@@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx);
/* Add some bytes to the stack while probing it. An rtx says how many. */
extern void anti_adjust_stack_and_probe (rtx, bool);
+/* Support for building allocation/probing loops for stack-clash
+ protection of dyamically allocated stack space. */
+extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *,
+ HOST_WIDE_INT *, rtx);
+extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *,
+ rtx, bool);
+extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx,
+ rtx, bool);
+
/* This enum is used for the following two functions. */
enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL};
diff -N -urp a/gcc/flag-types.h b/gcc/flag-types.h
--- a/gcc/flag-types.h 2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/flag-types.h 2019-01-10 16:42:11.490484099 +0800
@@ -166,7 +166,14 @@ enum permitted_flt_eval_methods
PERMITTED_FLT_EVAL_METHODS_C11
};
-/* Type of stack check. */
+/* Type of stack check.
+
+ Stack checking is designed to detect infinite recursion and stack
+ overflows for Ada programs. Furthermore stack checking tries to ensure
+ in that scenario that enough stack space is left to run a signal handler.
+
+ -fstack-check= does not prevent stack-clash style attacks. For that
+ you want -fstack-clash-protection. */
enum stack_check_type
{
/* Do not check the stack. */
diff -N -urp a/gcc/function.c b/gcc/function.c
--- a/gcc/function.c 2017-08-08 21:21:12.755378000 +0800
+++ b/gcc/function.c 2019-01-10 17:07:17.414523742 +0800
@@ -5695,6 +5695,58 @@ get_arg_pointer_save_area (void)
return ret;
}
+
+/* If debugging dumps are requested, dump information about how the
+ target handled -fstack-check=clash for the prologue.
+
+ PROBES describes what if any probes were emitted.
+
+ RESIDUALS indicates if the prologue had any residual allocation
+ (i.e. total allocation was not a multiple of PROBE_INTERVAL). */
+
+void
+dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals)
+{
+ if (!dump_file)
+ return;
+
+ switch (probes)
+ {
+ case NO_PROBE_NO_FRAME:
+ fprintf (dump_file,
+ "Stack clash no probe no stack adjustment in prologue.\n");
+ break;
+ case NO_PROBE_SMALL_FRAME:
+ fprintf (dump_file,
+ "Stack clash no probe small stack adjustment in prologue.\n");
+ break;
+ case PROBE_INLINE:
+ fprintf (dump_file, "Stack clash inline probes in prologue.\n");
+ break;
+ case PROBE_LOOP:
+ fprintf (dump_file, "Stack clash probe loop in prologue.\n");
+ break;
+ }
+
+ if (residuals)
+ fprintf (dump_file, "Stack clash residual allocation in prologue.\n");
+ else
+ fprintf (dump_file, "Stack clash no residual allocation in prologue.\n");
+
+ if (frame_pointer_needed)
+ fprintf (dump_file, "Stack clash frame pointer needed.\n");
+ else
+ fprintf (dump_file, "Stack clash no frame pointer needed.\n");
+
+ if (TREE_THIS_VOLATILE (cfun->decl))
+ fprintf (dump_file,
+ "Stack clash noreturn prologue, assuming no implicit"
+ " probes in caller.\n");
+ else
+ fprintf (dump_file,
+ "Stack clash not noreturn prologue.\n");
+}
+
/* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
for the first time. */
diff -N -urp a/gcc/function.h b/gcc/function.h
--- a/gcc/function.h 2017-01-25 01:07:36.015431000 +0800
+++ b/gcc/function.h 2019-01-10 17:08:12.806525200 +0800
@@ -553,6 +553,14 @@ do { \
((TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) \
? MAX (FUNCTION_BOUNDARY, 2 * BITS_PER_UNIT) : FUNCTION_BOUNDARY)
+enum stack_clash_probes {
+ NO_PROBE_NO_FRAME,
+ NO_PROBE_SMALL_FRAME,
+ PROBE_INLINE,
+ PROBE_LOOP
+};
+
+extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool);
extern void push_function_context (void);
diff -N -urp a/gcc/params.def b/gcc/params.def
--- a/gcc/params.def 2019-01-10 13:33:20.894185827 +0800
+++ b/gcc/params.def 2019-01-10 16:43:15.414485782 +0800
@@ -213,6 +213,16 @@ DEFPARAM(PARAM_STACK_FRAME_GROWTH,
"Maximal stack frame growth due to inlining (in percent).",
1000, 0, 0)
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
+ "stack-clash-protection-guard-size",
+ "Size of the stack guard expressed as a power of two.",
+ 12, 12, 30)
+
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
+ "stack-clash-protection-probe-interval",
+ "Interval in which to probe the stack expressed as a power of two.",
+ 12, 10, 16)
+
/* The GCSE optimization will be disabled if it would require
significantly more memory than this value. */
DEFPARAM(PARAM_MAX_GCSE_MEMORY,
diff -N -urp a/gcc/reg-notes.def b/gcc/reg-notes.def
--- a/gcc/reg-notes.def 2017-03-28 05:00:35.674561000 +0800
+++ b/gcc/reg-notes.def 2019-01-10 17:12:11.678531488 +0800
@@ -223,6 +223,10 @@ REG_NOTE (ARGS_SIZE)
pseudo reg. */
REG_NOTE (RETURNED)
+/* Indicates the instruction is a stack check probe that should not
+ be combined with other stack adjustments. */
+REG_NOTE (STACK_CHECK)
+
/* Used to mark a call with the function decl called by the call.
The decl might not be available in the call due to splitting of the call
insn. This note is a SYMBOL_REF. */
diff -N -urp a/gcc/rtl.h b/gcc/rtl.h
--- a/gcc/rtl.h 2017-03-14 20:47:42.745690000 +0800
+++ b/gcc/rtl.h 2019-01-10 16:59:15.574511058 +0800
@@ -2707,6 +2707,7 @@ get_full_set_src_cost (rtx x, machine_mo
/* In explow.c */
extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, machine_mode);
extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
+extern HOST_WIDE_INT get_stack_check_protect (void);
/* In rtl.c */
extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL);
diff -N -urp a/gcc/sched-deps.c b/gcc/sched-deps.c
--- a/gcc/sched-deps.c 2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/sched-deps.c 2019-01-10 17:13:37.470533746 +0800
@@ -4717,6 +4717,11 @@ parse_add_or_inc (struct mem_inc_info *m
if (RTX_FRAME_RELATED_P (insn) || !pat)
return false;
+ /* Do not allow breaking data dependencies for insns that are marked
+ with REG_STACK_CHECK. */
+ if (find_reg_note (insn, REG_STACK_CHECK, NULL))
+ return false;
+
/* Result must be single reg. */
if (!REG_P (SET_DEST (pat)))
return false;
diff -N -urp a/gcc/target.def b/gcc/target.def
--- a/gcc/target.def 2019-01-10 13:33:20.762185824 +0800
+++ b/gcc/target.def 2019-01-10 17:01:49.146515100 +0800
@@ -5490,6 +5490,12 @@ these registers when the target switches
void, (void),
hook_void_void)
+DEFHOOK
+(stack_clash_protection_final_dynamic_probe,
+ "Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.",
+ bool, (rtx residual),
+ default_stack_clash_protection_final_dynamic_probe)
+
/* Functions specific to the C family of frontends. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_C_"
diff -N -urp a/gcc/targhooks.c b/gcc/targhooks.c
--- a/gcc/targhooks.c 2017-02-07 19:29:06.644837000 +0800
+++ b/gcc/targhooks.c 2019-01-10 17:03:23.818517592 +0800
@@ -2107,4 +2107,10 @@ default_excess_precision (enum excess_pr
return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
}
+bool
+default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSED)
+{
+ return 0;
+}
+
#include "gt-targhooks.h"
diff -N -urp a/gcc/targhooks.h b/gcc/targhooks.h
--- a/gcc/targhooks.h 2017-04-05 01:52:27.193766000 +0800
+++ b/gcc/targhooks.h 2019-01-10 17:04:11.438518846 +0800
@@ -263,5 +263,6 @@ extern unsigned int default_min_arithmet
extern enum flt_eval_method
default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED);
+extern bool default_stack_clash_protection_final_dynamic_probe (rtx);
#endif /* GCC_TARGHOOKS_H */
diff -N -urp a/gcc/toplev.c b/gcc/toplev.c
--- a/gcc/toplev.c 2017-09-15 16:18:34.015147000 +0800
+++ b/gcc/toplev.c 2019-01-10 16:45:33.626489420 +0800
@@ -1573,6 +1573,26 @@ process_options (void)
flag_associative_math = 0;
}
+ /* -fstack-clash-protection is not currently supported on targets
+ where the stack grows up. */
+ if (flag_stack_clash_protection && !STACK_GROWS_DOWNWARD)
+ {
+ warning_at (UNKNOWN_LOCATION, 0,
+ "%<-fstack-clash-protection%> is not supported on targets "
+ "where the stack grows from lower to higher addresses");
+ flag_stack_clash_protection = 0;
+ }
+
+ /* We can not support -fstack-check= and -fstack-clash-protection at
+ the same time. */
+ if (flag_stack_check != NO_STACK_CHECK && flag_stack_clash_protection)
+ {
+ warning_at (UNKNOWN_LOCATION, 0,
+ "%<-fstack-check=%> and %<-fstack-clash_protection%> are "
+ "mutually exclusive. Disabling %<-fstack-check=%>");
+ flag_stack_check = NO_STACK_CHECK;
+ }
+
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
if (flag_cx_limited_range)
flag_complex_method = 0;

View File

@ -1,12 +0,0 @@
diff --git a/gcc/graphite.h b/gcc/graphite.h
index 4e0e58c..be0a22b 100644 (file)
--- a/gcc/graphite.h
+++ b/gcc/graphite.h
@@ -37,6 +37,8 @@ along with GCC; see the file COPYING3. If not see
#include <isl/schedule.h>
#include <isl/ast_build.h>
#include <isl/schedule_node.h>
+#include <isl/id.h>
+#include <isl/space.h>
typedef struct poly_dr *poly_dr_p;

3391
gcc.spec

File diff suppressed because it is too large Load Diff

171
generate-csel.patch Normal file
View File

@ -0,0 +1,171 @@
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+unsigned test(unsigned k, unsigned b) {
+ unsigned a[2];
+ if (b < a[k]) {
+ a[k] = b;
+ }
+ return a[0]+a[1];
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int c;
+unsigned test(unsigned k, unsigned b) {
+ unsigned a[2];
+ a[k] = c;
+ if (b < a[k]) {
+ a[k] = b;
+ }
+ return a[0]+a[1];
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+unsigned a[2];
+unsigned test(unsigned k, unsigned b) {
+ if (b < a[k]) {
+ a[k] = b;
+ }
+ return a[0]+a[1];
+}
+
+/* { dg-final { scan-tree-dump-not "Conditional store replacement" "cselim" } } */
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int *p;
+unsigned test(unsigned k, unsigned b) {
+ unsigned a[2];
+ p = a;
+ if (b < a[k]) {
+ a[k] = b;
+ }
+ return a[0]+a[1];
+}
+
+/* { dg-final { scan-tree-dump-not "Conditional store replacement" "cselim" } } */
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int test(int b, int k) {
+ struct {
+ int data[2];
+ } a;
+
+ if (b < a.data[k]) {
+ a.data[k] = b;
+ }
+
+ return a.data[0] + a.data[1];
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int test(int b, int k) {
+ typedef struct {
+ int x;
+ } SS;
+ struct {
+ SS data[2];
+ } a;
+
+ if (b < a.data[k].x) {
+ a.data[k].x = b;
+ }
+
+ return a.data[0].x + a.data[1].x;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -2196,7 +2196,8 @@ get_non_trapping (void)
We check that MIDDLE_BB contains only one store, that that store
doesn't trap (not via NOTRAP, but via checking if an access to the same
- memory location dominates us) and that the store has a "simple" RHS. */
+ memory location dominates us, or the store is to a local addressable
+ object) and that the store has a "simple" RHS. */
static bool
cond_store_replacement (basic_block middle_bb, basic_block join_bb,
@@ -2218,8 +2219,9 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
locus = gimple_location (assign);
lhs = gimple_assign_lhs (assign);
rhs = gimple_assign_rhs1 (assign);
- if (TREE_CODE (lhs) != MEM_REF
- || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
+ if ((TREE_CODE (lhs) != MEM_REF
+ && TREE_CODE (lhs) != ARRAY_REF
+ && TREE_CODE (lhs) != COMPONENT_REF)
|| !is_gimple_reg_type (TREE_TYPE (lhs)))
return false;
@@ -2227,7 +2229,13 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
TREE_THIS_NOTRAP here, but in that case we also could move stores,
whose value is not available readily, which we want to avoid. */
if (!nontrap->contains (lhs))
- return false;
+ {
+ /* If LHS is a local variable without address-taken, we could
+ always safely move down the store. */
+ tree base = get_base_address (lhs);
+ if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+ return false;
+ }
/* Now we've checked the constraints, so do the transformation:
1) Remove the single store. */
@@ -2280,6 +2288,14 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
else
gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nConditional store replacement happened!");
+ fprintf (dump_file, "\nReplaced the store with a load.");
+ fprintf (dump_file, "\nInserted a new PHI statement in joint block:\n");
+ print_gimple_stmt (dump_file, new_stmt, 0, TDF_VOPS|TDF_MEMSYMS);
+ }
+
return true;
}

Binary file not shown.

178
ivopts-1.patch Normal file
View File

@ -0,0 +1,178 @@
diff -urpN a/gcc/testsuite/gfortran.dg/graphite/pr90240.f b/gcc/testsuite/gfortran.dg/graphite/pr90240.f
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/pr90240.f
@@ -0,0 +1,18 @@
+! { dg-do compile }
+! { dg-options "-O1 -floop-nest-optimize" }
+
+ PARAMETER (n=1335, N2=1335)
+ COMMON a(n,N2), b(n,N2), c(n,N2),
+ * d(n,N2),
+ 2 e(n,N2), f(n,N2),
+ * g(n,N2), h(n,N2)
+ DO 200 j=1,i
+ DO 300 k=1,l
+ a(k,j) = c(k,j)*g(k,j)*f(k+1,m)+f(k,m)+f(k,j)
+ 2 +f(k+1,j)*h(k+1,j)
+ b(k,j+1) = d(k,j+1)*g(k,m)+g(k,j+1)
+ 1 *e(k,m)+e(k,j+1)+e(k,j)+e(k+1,j)
+ 2 *h(k,j+1)-h(k,j)
+ 300 ENDDO
+ 200 ENDDO
+ END
diff -urpN a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -4557,22 +4557,25 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
static comp_cost
get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
{
- int loop_freq = data->current_loop->header->count.to_frequency (cfun);
- int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
- if (loop_freq != 0)
- {
- gcc_assert (cost.scratch <= cost.cost);
- int scaled_cost
- = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
+ if (data->speed
+ && data->current_loop->header->count.to_frequency (cfun) > 0)
+ {
+ basic_block bb = gimple_bb (at);
+ gcc_assert (cost.scratch <= cost.cost);
+ int scale_factor = (int)(intptr_t) bb->aux;
+ if (scale_factor == 1)
+ return cost;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Scaling cost based on bb prob "
- "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
- 1.0f * bb_freq / loop_freq, cost.cost,
- cost.scratch, scaled_cost, bb_freq, loop_freq);
+ int scaled_cost
+ = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
- cost.cost = scaled_cost;
- }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Scaling cost based on bb prob "
+ "by %2.2f: %d (scratch: %d) -> %d\n",
+ 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
+
+ cost.cost = scaled_cost;
+ }
return cost;
}
@@ -6678,9 +6681,8 @@ try_improve_iv_set (struct ivopts_data *data,
}
iv_ca_delta_commit (data, ivs, best_delta, true);
- gcc_assert (best_cost == iv_ca_cost (ivs));
iv_ca_delta_free (&best_delta);
- return true;
+ return best_cost == iv_ca_cost (ivs);
}
/* Attempts to find the optimal set of induction variables. We do simple
@@ -6717,6 +6719,14 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
}
}
+ /* If the set has infinite_cost, it can't be optimal. */
+ if (iv_ca_cost (set).infinite_cost_p ())
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Overflow to infinite cost in try_improve_iv_set.\n");
+ iv_ca_free (&set);
+ }
return set;
}
@@ -7522,6 +7532,49 @@ loop_body_includes_call (basic_block *body, unsigned num_nodes)
return false;
}
+/* Determine cost scaling factor for basic blocks in loop. */
+#define COST_SCALING_FACTOR_BOUND (20)
+
+static void
+determine_scaling_factor (struct ivopts_data *data, basic_block *body)
+{
+ int lfreq = data->current_loop->header->count.to_frequency (cfun);
+ if (!data->speed || lfreq <= 0)
+ return;
+
+ int max_freq = lfreq;
+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
+ {
+ body[i]->aux = (void *)(intptr_t) 1;
+ if (max_freq < body[i]->count.to_frequency (cfun))
+ max_freq = body[i]->count.to_frequency (cfun);
+ }
+ if (max_freq > lfreq)
+ {
+ int divisor, factor;
+ /* Check if scaling factor itself needs to be scaled by the bound. This
+ is to avoid overflow when scaling cost according to profile info. */
+ if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
+ {
+ divisor = max_freq;
+ factor = COST_SCALING_FACTOR_BOUND;
+ }
+ else
+ {
+ divisor = lfreq;
+ factor = 1;
+ }
+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
+ {
+ int bfreq = body[i]->count.to_frequency (cfun);
+ if (bfreq <= lfreq)
+ continue;
+
+ body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
+ }
+ }
+}
+
/* Optimizes the LOOP. Returns true if anything changed. */
static bool
@@ -7560,7 +7613,6 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
body = get_loop_body (loop);
data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
- free (body);
data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
@@ -7574,6 +7626,9 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
goto finish;
+ /* Determine cost scaling factor for basic blocks in loop. */
+ determine_scaling_factor (data, body);
+
/* Finds candidates for the induction variables (item 2). */
find_iv_candidates (data);
@@ -7584,6 +7639,9 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
/* Find the optimal set of induction variables (item 3, part 2). */
iv_ca = find_optimal_iv_set (data);
+ /* Cleanup basic block aux field. */
+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
+ body[i]->aux = NULL;
if (!iv_ca)
goto finish;
changed = true;
@@ -7599,6 +7657,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
remove_unused_ivs (data, toremove);
finish:
+ free (body);
free_loop_data (data);
return changed;

407
ivopts-2.patch Normal file
View File

@ -0,0 +1,407 @@
diff -urpN a/gcc/testsuite/g++.dg/tree-ssa/pr90078.C b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C
@@ -0,0 +1,199 @@
+// { dg-do compile }
+// { dg-options "-std=c++14 -O2 -ftemplate-depth=1000000" }
+
+template <class T, int Dim0, int Dim1, int Dim2> struct Tensor3;
+template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j,
+ char k>
+struct Tensor3_Expr;
+
+template <class T, int Dim0, int Dim1, int Dim2, int Dim3> struct Tensor4;
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
+ char j, char k, char l>
+struct Tensor4_Expr;
+
+template <char i, int Dim> struct Index
+{};
+template <const int N> struct Number
+{
+ Number(){};
+ operator int() const { return N; }
+};
+
+template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2>
+struct Tensor3
+{
+ T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2];
+
+ T operator()(const int N1, const int N2, const int N3) const
+ {
+ return data[N1][N2][N3];
+ }
+
+ template <char i, char j, char k, int Dim0, int Dim1, int Dim2>
+ Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T,
+ Dim0, Dim1, Dim2, i, j, k>
+ operator()(const Index<i, Dim0>, const Index<j, Dim1>,
+ const Index<k, Dim2>) const
+ {
+ return Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>,
+ T, Dim0, Dim1, Dim2, i, j, k>(*this);
+ }
+};
+
+template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j,
+ char k>
+struct Tensor3_Expr
+{
+ A iter;
+
+ Tensor3_Expr(const A &a) : iter(a) {}
+ T operator()(const int N1, const int N2, const int N3) const
+ {
+ return iter(N1, N2, N3);
+ }
+};
+
+template <class A, class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2,
+ int Dim0, int Dim1, int Dim2, char i, char j, char k>
+struct Tensor3_Expr<Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T, Dim0,
+ Dim1, Dim2, i, j, k>
+{
+ Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &iter;
+
+ Tensor3_Expr(Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &a) : iter(a)
+ {}
+ T operator()(const int N1, const int N2, const int N3) const
+ {
+ return iter(N1, N2, N3);
+ }
+};
+
+template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23,
+ int Dim4, int Dim5, char i, char j, char k, char l, char m>
+struct Tensor3_times_Tensor3_21
+{
+ Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> iterA;
+ Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> iterB;
+
+ template <int CurrentDim>
+ T eval(const int N1, const int N2, const int N3, const int N4,
+ const Number<CurrentDim> &) const
+ {
+ return iterA(N1, N2, CurrentDim - 1) * iterB(CurrentDim - 1, N3, N4)
+ + eval(N1, N2, N3, N4, Number<CurrentDim - 1>());
+ }
+ T eval(const int N1, const int N2, const int N3, const int N4,
+ const Number<1> &) const
+ {
+ return iterA(N1, N2, 0) * iterB(0, N3, N4);
+ }
+
+ Tensor3_times_Tensor3_21(
+ const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a,
+ const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b)
+ : iterA(a), iterB(b)
+ {}
+ T operator()(const int &N1, const int &N2, const int &N3,
+ const int &N4) const
+ {
+ return eval(N1, N2, N3, N4, Number<Dim23>());
+ }
+};
+
+template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23,
+ int Dim4, int Dim5, char i, char j, char k, char l, char m>
+Tensor4_Expr<Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23, Dim4,
+ Dim5, i, j, k, l, m>,
+ T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>
+operator*(const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a,
+ const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b)
+{
+ using TensorExpr = Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23,
+ Dim4, Dim5, i, j, k, l, m>;
+ return Tensor4_Expr<TensorExpr, T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>(
+ TensorExpr(a, b));
+};
+
+template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2,
+ int Tensor_Dim3>
+struct Tensor4
+{
+ T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2][Tensor_Dim3];
+
+ Tensor4() {}
+ T &operator()(const int N1, const int N2, const int N3, const int N4)
+ {
+ return data[N1][N2][N3][N4];
+ }
+
+ template <char i, char j, char k, char l, int Dim0, int Dim1, int Dim2,
+ int Dim3>
+ Tensor4_Expr<Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>,
+ T, Dim0, Dim1, Dim2, Dim3, i, j, k, l>
+ operator()(const Index<i, Dim0>, const Index<j, Dim1>, const Index<k, Dim2>,
+ const Index<l, Dim3>)
+ {
+ return Tensor4_Expr<
+ Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>, T, Dim0,
+ Dim1, Dim2, Dim3, i, j, k, l>(*this);
+ };
+};
+
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
+ char j, char k, char l>
+struct Tensor4_Expr
+{
+ A iter;
+
+ Tensor4_Expr(const A &a) : iter(a) {}
+ T operator()(const int N1, const int N2, const int N3, const int N4) const
+ {
+ return iter(N1, N2, N3, N4);
+ }
+};
+
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
+ char j, char k, char l>
+struct Tensor4_Expr<Tensor4<A, Dim0, Dim1, Dim2, Dim3>, T, Dim0, Dim1, Dim2,
+ Dim3, i, j, k, l>
+{
+ Tensor4<A, Dim0, Dim1, Dim2, Dim3> &iter;
+
+ Tensor4_Expr(Tensor4<A, Dim0, Dim1, Dim2, Dim3> &a) : iter(a) {}
+ T operator()(const int N1, const int N2, const int N3, const int N4) const
+ {
+ return iter(N1, N2, N3, N4);
+ }
+
+ template <class B, class U, int Dim1_0, int Dim1_1, int Dim1_2, int Dim1_3,
+ char i_1, char j_1, char k_1, char l_1>
+ auto &operator=(const Tensor4_Expr<B, U, Dim1_0, Dim1_1, Dim1_2, Dim1_3, i_1,
+ j_1, k_1, l_1> &rhs)
+ {
+ for(int ii = 0; ii < Dim0; ++ii)
+ for(int jj = 0; jj < Dim1; ++jj)
+ for(int kk = 0; kk < Dim2; ++kk)
+ for(int ll = 0; ll < Dim3; ++ll)
+ {
+ iter(ii, jj, kk, ll) = rhs(ii, jj, kk, ll);
+ }
+ return *this;
+ }
+};
+
+int main()
+{
+ Tensor3<float, 100, 100, 1000> t1;
+ Tensor3<float, 1000, 100, 100> t2;
+
+ Index<'l', 100> l;
+ Index<'m', 100> m;
+ Index<'k', 1000> k;
+ Index<'n', 100> n;
+ Index<'o', 100> o;
+
+ Tensor4<float, 100, 100, 100, 100> res;
+ res(l, m, n, o) = t1(l, m, k) * t2(k, n, o);
+ return 0;
+}
+
diff -urpN a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -114,7 +114,7 @@ along with GCC; see the file COPYING3. If not see
interface between the GIMPLE and RTL worlds. */
/* The infinite cost. */
-#define INFTY 10000000
+#define INFTY 1000000000
/* Returns the expected number of loop iterations for LOOP.
The average trip count is computed from profile data if it
@@ -180,7 +180,7 @@ struct comp_cost
comp_cost (): cost (0), complexity (0), scratch (0)
{}
- comp_cost (int cost, unsigned complexity, int scratch = 0)
+ comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
: cost (cost), complexity (complexity), scratch (scratch)
{}
@@ -220,16 +220,16 @@ struct comp_cost
/* Returns true if COST1 is smaller or equal than COST2. */
friend bool operator<= (comp_cost cost1, comp_cost cost2);
- int cost; /* The runtime cost. */
+ int64_t cost; /* The runtime cost. */
unsigned complexity; /* The estimate of the complexity of the code for
the computation (in no concrete units --
complexity field should be larger for more
complex expressions and addressing modes). */
- int scratch; /* Scratch used during cost computation. */
+ int64_t scratch; /* Scratch used during cost computation. */
};
static const comp_cost no_cost;
-static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
+static const comp_cost infinite_cost (INFTY, 0, INFTY);
bool
comp_cost::infinite_cost_p ()
@@ -243,6 +243,7 @@ operator+ (comp_cost cost1, comp_cost cost2)
if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
return infinite_cost;
+ gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
cost1.cost += cost2.cost;
cost1.complexity += cost2.complexity;
@@ -256,6 +257,7 @@ operator- (comp_cost cost1, comp_cost cost2)
return infinite_cost;
gcc_assert (!cost2.infinite_cost_p ());
+ gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
cost1.cost -= cost2.cost;
cost1.complexity -= cost2.complexity;
@@ -276,6 +278,7 @@ comp_cost::operator+= (HOST_WIDE_INT c)
if (infinite_cost_p ())
return *this;
+ gcc_assert (this->cost + c < infinite_cost.cost);
this->cost += c;
return *this;
@@ -287,6 +290,7 @@ comp_cost::operator-= (HOST_WIDE_INT c)
if (infinite_cost_p ())
return *this;
+ gcc_assert (this->cost - c < infinite_cost.cost);
this->cost -= c;
return *this;
@@ -295,6 +299,7 @@ comp_cost::operator-= (HOST_WIDE_INT c)
comp_cost
comp_cost::operator/= (HOST_WIDE_INT c)
{
+ gcc_assert (c != 0);
if (infinite_cost_p ())
return *this;
@@ -309,6 +314,7 @@ comp_cost::operator*= (HOST_WIDE_INT c)
if (infinite_cost_p ())
return *this;
+ gcc_assert (this->cost * c < infinite_cost.cost);
this->cost *= c;
return *this;
@@ -638,7 +644,7 @@ struct iv_ca
comp_cost cand_use_cost;
/* Total cost of candidates. */
- unsigned cand_cost;
+ int64_t cand_cost;
/* Number of times each invariant variable is used. */
unsigned *n_inv_var_uses;
@@ -4025,16 +4031,16 @@ get_computation_at (struct loop *loop, gimple *at,
if we're optimizing for speed, amortize it over the per-iteration cost.
If ROUND_UP_P is true, the result is round up rather than to zero when
optimizing for speed. */
-static unsigned
-adjust_setup_cost (struct ivopts_data *data, unsigned cost,
+static int64_t
+adjust_setup_cost (struct ivopts_data *data, int64_t cost,
bool round_up_p = false)
{
if (cost == INFTY)
return cost;
else if (optimize_loop_for_speed_p (data->current_loop))
{
- HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
- return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
+ int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
+ return (cost + (round_up_p ? niters - 1 : 0)) / niters;
}
else
return cost;
@@ -4305,7 +4311,7 @@ enum ainc_type
struct ainc_cost_data
{
- unsigned costs[AINC_NONE];
+ int64_t costs[AINC_NONE];
};
static comp_cost
@@ -4566,12 +4572,12 @@ get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
if (scale_factor == 1)
return cost;
- int scaled_cost
+ int64_t scaled_cost
= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Scaling cost based on bb prob "
- "by %2.2f: %d (scratch: %d) -> %d\n",
+ fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
+ "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
cost.cost = scaled_cost;
@@ -5539,7 +5545,7 @@ determine_group_iv_costs (struct ivopts_data *data)
|| group->cost_map[j].cost.infinite_cost_p ())
continue;
- fprintf (dump_file, " %d\t%d\t%d\t",
+ fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
group->cost_map[j].cand->id,
group->cost_map[j].cost.cost,
group->cost_map[j].cost.complexity);
@@ -5569,7 +5575,7 @@ static void
determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
{
comp_cost cost_base;
- unsigned cost, cost_step;
+ int64_t cost, cost_step;
tree base;
gcc_assert (cand->iv != NULL);
@@ -6139,11 +6145,11 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
unsigned i;
comp_cost cost = iv_ca_cost (ivs);
- fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
+ fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
cost.complexity);
- fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
- ivs->cand_cost, ivs->cand_use_cost.cost,
- ivs->cand_use_cost.complexity);
+ fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
+ "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
+ ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
bitmap_print (file, ivs->cands, " candidates: ","\n");
for (i = 0; i < ivs->upto; i++)
@@ -6151,9 +6157,9 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
struct iv_group *group = data->vgroups[i];
struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
if (cp)
- fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
- group->id, cp->cand->id, cp->cost.cost,
- cp->cost.complexity);
+ fprintf (file, " group:%d --> iv_cand:%d, cost=("
+ "%" PRId64 ",%d)\n", group->id, cp->cand->id,
+ cp->cost.cost, cp->cost.complexity);
else
fprintf (file, " group:%d --> ??\n", group->id);
}
@@ -6751,9 +6757,9 @@ find_optimal_iv_set (struct ivopts_data *data)
if (dump_file && (dump_flags & TDF_DETAILS))
{
- fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
+ fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
origcost.cost, origcost.complexity);
- fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
+ fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
cost.cost, cost.complexity);
}

160
loop-finite-bugfix.patch Normal file
View File

@ -0,0 +1,160 @@
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 6b6c754ad86..58ba0948e79 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -989,6 +989,10 @@ c_common_post_options (const char **pfilename)
if (!global_options_set.x_flag_new_ttp)
flag_new_ttp = (cxx_dialect >= cxx17);
+ /* C++11 guarantees forward progress. */
+ if (!global_options_set.x_flag_finite_loops)
+ flag_finite_loops = (optimize >= 2 && cxx_dialect >= cxx11);
+
if (cxx_dialect >= cxx11)
{
/* If we're allowing C++0x constructs, don't warn about C++98
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 1c49a8b8c2d..18b404e292f 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -226,6 +226,10 @@ public:
/* True if the loop is part of an oacc kernels region. */
unsigned in_oacc_kernels_region : 1;
+ /* True if the loop is known to be finite. This is a localized
+ flag_finite_loops or similar pragmas state. */
+ unsigned finite_p : 1;
+
/* The number of times to unroll the loop. 0 means no information given,
just do what we always do. A value of 1 means do not unroll the loop.
A value of USHRT_MAX means unroll with no specific unrolling factor.
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index c9375565f62..50c7267ec49 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -1023,6 +1023,7 @@ copy_loop_info (class loop *loop, class loop *target)
target->dont_vectorize = loop->dont_vectorize;
target->force_vectorize = loop->force_vectorize;
target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
+ target->finite_p = loop->finite_p;
target->unroll = loop->unroll;
target->owned_clique = loop->owned_clique;
}
diff --git a/gcc/common.opt b/gcc/common.opt
index 4368910cb54..bb2ea4c905d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1490,7 +1490,7 @@ Common Report Var(flag_finite_math_only) Optimization SetByCombined
Assume no NaNs or infinities are generated.
ffinite-loops
-Common Report Var(flag_finite_loops) Optimization
+Common Report Var(flag_finite_loops) Optimization Init(0)
Assume that loops with an exit will terminate and not loop indefinitely.
ffixed-
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e9e1683e9a8..e3e652ff6c1 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10432,7 +10432,8 @@ Assume that a loop with an exit will eventually take the exit and not loop
indefinitely. This allows the compiler to remove loops that otherwise have
no side-effects, not considering eventual endless looping as such.
-This option is enabled by default at @option{-O2}.
+This option is enabled by default at @option{-O2} for C++ with -std=c++11
+or higher.
@item -ftree-dominator-opts
@opindex ftree-dominator-opts
diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c
index 9566e5ee102..244f5b8aa5c 100644
--- a/gcc/lto-streamer-in.c
+++ b/gcc/lto-streamer-in.c
@@ -821,6 +821,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in,
loop->owned_clique = streamer_read_hwi (ib);
loop->dont_vectorize = streamer_read_hwi (ib);
loop->force_vectorize = streamer_read_hwi (ib);
+ loop->finite_p = streamer_read_hwi (ib);
loop->simduid = stream_read_tree (ib, data_in);
place_new_loop (fn, loop);
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index a219c1d0dd1..52ef94718db 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -1950,6 +1950,7 @@ output_cfg (struct output_block *ob, struct function *fn)
streamer_write_hwi (ob, loop->owned_clique);
streamer_write_hwi (ob, loop->dont_vectorize);
streamer_write_hwi (ob, loop->force_vectorize);
+ streamer_write_hwi (ob, loop->finite_p);
stream_write_tree (ob, loop->simduid, true);
}
diff --git a/gcc/opts.c b/gcc/opts.c
index 5dc7d65dedd..d4df8627bf7 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -478,7 +478,6 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
- { OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 },
diff --git a/gcc/testsuite/gcc.dg/torture/pr94392.c b/gcc/testsuite/gcc.dg/torture/pr94392.c
new file mode 100644
index 00000000000..373f18ce983
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr94392.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-skip-if "finite loops" { *-*-* } { "-ffinite-loops" } } */
+/* { dg-skip-if "LTO optimizes the test" { *-*-* } { "-flto" } } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+int a, b;
+
+int
+main()
+{
+ while (1)
+ {
+ /* Try really hard. */
+ if (a != b)
+ return 1;
+ }
+ return 0;
+}
+
+/* ISO C does not guarantee forward progress like C++ does so we
+ cannot assume the loop is finite and optimize it to return 1. */
+/* { dg-final { scan-tree-dump "if" "optimized" } } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index f7b817d94e6..e99fb9ff5d1 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -324,6 +324,9 @@ replace_loop_annotate (void)
/* Then look into the latch, if any. */
if (loop->latch)
replace_loop_annotate_in_block (loop->latch, loop);
+
+ /* Push the global flag_finite_loops state down to individual loops. */
+ loop->finite_p = flag_finite_loops;
}
/* Remove IFN_ANNOTATE. Safeguard for the case loop->latch == NULL. */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 6e6df0bfdb8..7d61ef080eb 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2834,7 +2834,7 @@ finite_loop_p (class loop *loop)
return true;
}
- if (flag_finite_loops)
+ if (loop->finite_p)
{
unsigned i;
vec<edge> exits = get_loop_exit_edges (loop);

367
loop-finite.patch Normal file
View File

@ -0,0 +1,367 @@
diff --git a/gcc/common.opt b/gcc/common.opt
index e1404165feb..a1544d06824 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1437,6 +1437,10 @@ ffinite-math-only
Common Report Var(flag_finite_math_only) Optimization SetByCombined
Assume no NaNs or infinities are generated.
+ffinite-loops
+Common Report Var(flag_finite_loops) Optimization
+Assume that loops with an exit will terminate and not loop indefinitely.
+
ffixed-
Common Joined RejectNegative Var(common_deferred_options) Defer
-ffixed-<register> Mark <register> as being unavailable to the compiler.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 090d606b3ba..bf9da0f0a6e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -413,6 +413,7 @@ Objective-C and Objective-C++ Dialects}.
-fdevirtualize-at-ltrans -fdse @gol
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffat-lto-objects @gol
-ffast-math -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
+-ffinite-loops @gol
-fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
-fgcse-sm -fhoist-adjacent-loads -fif-conversion @gol
@@ -8303,6 +8304,7 @@ also turns on the following optimization flags:
-fdelete-null-pointer-checks @gol
-fdevirtualize -fdevirtualize-speculatively @gol
-fexpensive-optimizations @gol
+-ffinite-loops @gol
-fgcse -fgcse-lm @gol
-fhoist-adjacent-loads @gol
-finline-small-functions @gol
@@ -9524,6 +9526,15 @@ that may set @code{errno} but are otherwise free of side effects. This flag is
enabled by default at @option{-O2} and higher if @option{-Os} is not also
specified.
+@item -ffinite-loops
+@opindex ffinite-loops
+@opindex fno-finite-loops
+Assume that a loop with an exit will eventually take the exit and not loop
+indefinitely. This allows the compiler to remove loops that otherwise have
+no side-effects, not considering eventual endless looping as such.
+
+This option is enabled by default at @option{-O2}.
+
@item -ftree-dominator-opts
@opindex ftree-dominator-opts
Perform a variety of simple scalar cleanups (constant/copy
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 97ae47b3135..c8a281c6d28 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call)
tree chunk_size = NULL_TREE;
unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
tree lhs = gimple_call_lhs (call);
- tree type = TREE_TYPE (lhs);
+ tree type = NULL_TREE;
tree diff_type = TREE_TYPE (range);
tree r = NULL_TREE;
gimple_seq seq = NULL;
@@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call)
unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
+ /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
+ if (!lhs)
+ {
+ gsi_replace_with_seq (&gsi, seq, true);
+ return;
+ }
+
+ type = TREE_TYPE (lhs);
+
#ifdef ACCEL_COMPILER
chunk_size = gimple_call_arg (call, 4);
if (integer_minus_onep (chunk_size) /* Force static allocation. */
diff --git a/gcc/opts.c b/gcc/opts.c
index 64f94ac8ffd..b38bfb15a56 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -494,6 +494,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
new file mode 100644
index 00000000000..6b1e879e6a9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loops" } */
+
+#include <string>
+#include <vector>
+#include <list>
+#include <set>
+#include <map>
+
+using namespace std;
+
+int foo (vector<string> &v, list<string> &l, set<string> &s, map<int, string> &m)
+{
+ for (vector<string>::iterator it = v.begin (); it != v.end (); ++it)
+ it->length();
+
+ for (list<string>::iterator it = l.begin (); it != l.end (); ++it)
+ it->length();
+
+ for (map<int, string>::iterator it = m.begin (); it != m.end (); ++it)
+ it->first + it->second.length();
+
+ for (set<string>::iterator it0 = s.begin (); it0 != s.end(); ++it0)
+ for (vector<string>::reverse_iterator it1 = v.rbegin(); it1 != v.rend(); ++it1)
+ {
+ it0->length();
+ it1->length();
+ }
+
+ return 0;
+}
+/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */
+
diff --git a/gcc/testsuite/gcc.dg/const-1.c b/gcc/testsuite/gcc.dg/const-1.c
index a5b2b167728..2e95bd8e2ea 100644
--- a/gcc/testsuite/gcc.dg/const-1.c
+++ b/gcc/testsuite/gcc.dg/const-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target nonpic } } */
-/* { dg-options "-O2 -Wsuggest-attribute=const" } */
+/* { dg-options "-O2 -Wsuggest-attribute=const -fno-finite-loops" } */
extern int extern_const(int a) __attribute__ ((const));
diff --git a/gcc/testsuite/gcc.dg/graphite/graphite.exp b/gcc/testsuite/gcc.dg/graphite/graphite.exp
index ea6144607e2..523a955e82d 100644
--- a/gcc/testsuite/gcc.dg/graphite/graphite.exp
+++ b/gcc/testsuite/gcc.dg/graphite/graphite.exp
@@ -56,7 +56,7 @@ set vect_files [lsort [glob -nocomplain $srcdir/$subdir/vect-*.c ] ]
# Tests to be compiled.
set dg-do-what-default compile
-dg-runtest $scop_files "" "-O2 -fgraphite -fdump-tree-graphite-all"
+dg-runtest $scop_files "" "-O2 -fgraphite -fdump-tree-graphite-all -fno-finite-loops"
dg-runtest $id_files "" "-O2 -fgraphite-identity -ffast-math -fdump-tree-graphite-details"
# Tests to be run.
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-1.c b/gcc/testsuite/gcc.dg/loop-unswitch-1.c
index f6fc41d6bcc..de2fb2c0e4b 100644
--- a/gcc/testsuite/gcc.dg/loop-unswitch-1.c
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-1.c
@@ -1,6 +1,6 @@
/* For PR rtl-optimization/27735 */
/* { dg-do compile } */
-/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details -fno-finite-loops" } */
void set_color(void);
void xml_colorize_line(unsigned int *p, int state)
diff --git a/gcc/testsuite/gcc.dg/predict-9.c b/gcc/testsuite/gcc.dg/predict-9.c
index 7e5ba085ece..f491c511bd9 100644
--- a/gcc/testsuite/gcc.dg/predict-9.c
+++ b/gcc/testsuite/gcc.dg/predict-9.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate" } */
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate -fno-finite-loops" } */
extern int global;
extern int global2;
diff --git a/gcc/testsuite/gcc.dg/pure-2.c b/gcc/testsuite/gcc.dg/pure-2.c
index fe6e2bce695..318cfd18630 100644
--- a/gcc/testsuite/gcc.dg/pure-2.c
+++ b/gcc/testsuite/gcc.dg/pure-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Wsuggest-attribute=pure" } */
+/* { dg-options "-O2 -Wsuggest-attribute=pure -fno-finite-loops" } */
/* { dg-add-options bind_pic_locally } */
extern int extern_const(int a) __attribute__ ((pure));
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c
index d289e5d0f55..a9bdf26931a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-cddce2" } */
+/* { dg-options "-O2 -fdump-tree-cddce2 -fno-finite-loops" } */
struct rtx_def;
typedef struct rtx_def *rtx;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c
new file mode 100644
index 00000000000..18c1ddb819e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce1 -ffinite-loops" } */
+
+typedef struct list {
+ char pad[15];
+ struct list *next;
+} list;
+
+int data;
+
+list *head, *tail;
+
+int __attribute__((pure)) pfn (int);
+
+int foo (unsigned u, int s)
+{
+ unsigned i;
+ list *p;
+ int j;
+
+ for (i = 0; i < u; i += 2)
+ ;
+
+ for (p = head; p; p = p->next)
+ ;
+
+ for (j = data; j & s; j = pfn (j + 3))
+ ;
+
+ for (p = head; p != tail; p = p->next)
+ for (j = data + 1; j > s; j = pfn (j + 2))
+ ;
+
+ return 0;
+}
+/* { dg-final { scan-tree-dump-not "if" "cddce1"} } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c
index a29c9fb2501..3d05ad2d073 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fno-finite-loops" } */
/* { dg-require-effective-target int32plus } */
int bar (void);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
index e9b4f2628d5..187c08407d5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */
+/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w -fno-finite-loops" } */
struct __sFILE
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
index d829b04d177..67526762f2c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details" } */
+/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops" } */
/* { dg-final { scan-tree-dump "FSM" "thread2" } } */
/* { dg-final { scan-tree-dump "FSM" "thread3" } } */
/* { dg-final { scan-tree-dump "FSM" "thread4" { xfail *-*-* } } } */
diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
index 2478219d873..a38899edd6c 100644
--- a/gcc/tree-ssa-dce.c
+++ b/gcc/tree-ssa-dce.c
@@ -245,6 +245,17 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
mark_stmt_necessary (stmt, true);
return;
}
+ /* IFN_GOACC_LOOP calls are necessary in that they are used to
+ represent parameter (i.e. step, bound) of a lowered OpenACC
+ partitioned loop. But this kind of partitioned loop might not
+ survive from aggressive loop removal for it has loop exit and
+ is assumed to be finite. Therefore, we need to explicitly mark
+ these calls. (An example is libgomp.oacc-c-c++-common/pr84955.c) */
+ if (gimple_call_internal_p (stmt, IFN_GOACC_LOOP))
+ {
+ mark_stmt_necessary (stmt, true);
+ return;
+ }
if (!gimple_call_lhs (stmt))
return;
break;
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 84e6e313c85..f51385900ed 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2830,6 +2830,27 @@ finite_loop_p (struct loop *loop)
loop->num);
return true;
}
+
+ if (flag_finite_loops)
+ {
+ unsigned i;
+ vec<edge> exits = get_loop_exit_edges (loop);
+ edge ex;
+
+ /* If the loop has a normal exit, we can assume it will terminate. */
+ FOR_EACH_VEC_ELT (exits, i, ex)
+ if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE)))
+ {
+ exits.release ();
+ if (dump_file)
+ fprintf (dump_file, "Assume loop %i to be finite: it has an exit "
+ "and -ffinite-loops is on.\n", loop->num);
+ return true;
+ }
+
+ exits.release ();
+ }
+
return false;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c
new file mode 100644
index 00000000000..44767cd27c3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loops" } */
+
+int
+f1 (void)
+{
+ int i, j;
+
+#pragma acc parallel loop tile(2,3)
+ for (i = 1; i < 10; i++)
+ for (j = 1; j < 10; j++)
+ for (;;)
+ ;
+
+ return i + j;
+}
+
+int
+f2 (void)
+{
+ int i, j, k;
+
+#pragma acc parallel loop tile(2,3)
+ for (i = 1; i < 10; i++)
+ for (j = 1; j < 10; j++)
+ for (k = 1; k < 10; k++)
+ ;
+
+ return i + j;
+}
+/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */

1276
loop-split.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +0,0 @@
diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
--- a/gcc/config/aarch64/aarch64.md 2019-05-30 16:12:52.950606040 +0800
+++ b/gcc/config/aarch64/aarch64.md 2019-05-30 16:15:56.606599549 +0800
@@ -3110,7 +3110,8 @@
(define_insn_and_split "*compare_cstore<mode>_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
(EQL:GPI (match_operand:GPI 1 "register_operand" "r")
- (match_operand:GPI 2 "aarch64_imm24" "n")))]
+ (match_operand:GPI 2 "aarch64_imm24" "n")))
+ (clobber (reg:CC CC_REGNUM))]
"!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
&& !aarch64_plus_operand (operands[2], <MODE>mode)
&& !reload_completed"

View File

@ -1,108 +0,0 @@
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2018-09-19 17:11:42.583520820 +0800
+++ b/gcc/config/aarch64/aarch64.c 2018-09-19 17:10:22.715520820 +0800
@@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym)
void
aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
{
- if (!TARGET_LONG_CALLS)
+ if (flag_fentry)
{
- fprintf (file, "\tmov\tx9, x30\n");
- fprintf (file, "\tbl\t__fentry__\n");
- fprintf (file, "\tmov\tx30, x9\n");
- }
- else
- {
- if (flag_pic)
+ if (!TARGET_LONG_CALLS)
{
fprintf (file, "\tmov\tx9, x30\n");
- fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
- fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
- fprintf (file, "\tblr\tx10\n");
+ fprintf (file, "\tbl\t__fentry__\n");
fprintf (file, "\tmov\tx30, x9\n");
}
else
{
- fprintf (file, "\tmov\tx9, x30\n");
- fprintf (file, "\tadrp\tx10, __fentry__\n");
- fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
- fprintf (file, "\tblr\tx10\n");
- fprintf (file, "\tmov\tx30, x9\n");
+ if (flag_pic)
+ {
+ fprintf (file, "\tmov\tx9, x30\n");
+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
+ fprintf (file, "\tblr\tx10\n");
+ fprintf (file, "\tmov\tx30, x9\n");
+ }
+ else
+ {
+ fprintf (file, "\tmov\tx9, x30\n");
+ fprintf (file, "\tadrp\tx10, __fentry__\n");
+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
+ fprintf (file, "\tblr\tx10\n");
+ fprintf (file, "\tmov\tx30, x9\n");
+ }
}
}
}
@@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn)
add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
}
+/* Return true, if profiling code should be emitted before
+ prologue. Otherwise it returns false.
+ Note: For x86 with "hotfix" it is sorried. */
+static bool
+aarch64_profile_before_prologue (void)
+{
+ return flag_fentry != 0;
+}
+
/* Expand a compare and swap pattern. */
void
@@ -14952,6 +14964,9 @@ aarch64_run_selftests (void)
#undef TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue
+
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
hook_bool_const_tree_hwi_hwi_const_tree_true
diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
--- a/gcc/config/aarch64/aarch64.h 2018-09-19 17:11:42.587520820 +0800
+++ b/gcc/config/aarch64/aarch64.h 2018-09-19 17:10:22.715520820 +0800
@@ -850,9 +850,12 @@ typedef struct
{ \
rtx fun, lr; \
const rtx_insn* tmp = get_insns (); \
- lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
- emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
+ if (!flag_fentry) \
+ { \
+ lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
+ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
+ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
+ } \
if (TARGET_LONG_CALLS) \
{ \
emit_insn (gen_blockage ()); \
diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
--- a/gcc/config/aarch64/aarch64.opt 2018-09-19 17:11:42.587520820 +0800
+++ b/gcc/config/aarch64/aarch64.opt 2018-09-19 17:10:22.715520820 +0800
@@ -192,3 +192,7 @@ single precision and to 32 bits for doub
mverbose-cost-dump
Common Undocumented Var(flag_aarch64_verbose_cost)
Enables verbose cost model dumping in the debug dump files.
+
+mfentry
+Target Report Var(flag_fentry) Init(0)
+Emit profiling counter call at function entry immediately after prologue.

View File

@ -1,362 +0,0 @@
diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
--- a/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:43:27.862079389 +0800
+++ b/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:44:34.930081154 +0800
@@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void);
const char *aarch64_mangle_builtin_type (const_tree);
const char *aarch64_output_casesi (rtx *);
+extern void aarch64_pr_long_calls (struct cpp_reader *);
+extern void aarch64_pr_no_long_calls (struct cpp_reader *);
+extern void aarch64_pr_long_calls_off (struct cpp_reader *);
+
enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
enum reg_class aarch64_regno_regclass (unsigned);
@@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool);
void aarch64_expand_mov_immediate (rtx, rtx);
void aarch64_expand_prologue (void);
void aarch64_expand_vector_init (rtx, rtx);
+void aarch64_function_profiler (FILE *, int);
void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
const_tree, unsigned);
void aarch64_init_expanders (void);
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2018-11-06 10:43:27.870079389 +0800
+++ b/gcc/config/aarch64/aarch64.c 2018-11-06 10:44:34.934081154 +0800
@@ -70,6 +70,9 @@
/* This file should be included last. */
#include "target-def.h"
+static void aarch64_set_default_type_attributes (tree);
+static int aarch64_comp_type_attributes (const_tree, const_tree);
+
/* Defined for convenience. */
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns
return choose_hard_reg_mode (regno, nregs, false);
}
+/* Table of machine attributes. */
+static const struct attribute_spec aarch64_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+ affects_type_identity }. */
+ /* Function calls made to this symbol must be done indirectly, because
+ it may lie outside of the 26 bit addressing range of a normal function
+ call. */
+ { "long_call", 0, 0, false, true, true, NULL, false },
+ /* Whereas these functions are always known to reside within the 26 bit
+ addressing range. */
+ { "short_call", 0, 0, false, true, true, NULL, false },
+ { NULL, 0, 0, false, false, false, NULL, false }
+};
+
+/* Encode the current state of the #pragma[no_]long_calls. */
+typedef enum
+{
+ OFF, /* No #pragma[no_]long_calls is in effect. */
+ LONG, /* #pragma long_calls is in effect. */
+ SHORT /* #pragma no_long_calls is in effect. */
+} aarch64_pragma_enum;
+
+static aarch64_pragma_enum aarch64_pragma_long_calls = OFF;
+
+void
+aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+ aarch64_pragma_long_calls = LONG;
+}
+
+void
+aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+ aarch64_pragma_long_calls = SHORT;
+}
+
+void
+aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+ aarch64_pragma_long_calls = OFF;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+ are compatible. */
+static int
+aarch64_comp_type_attributes (const_tree type1, const_tree type2)
+{
+ int l1, l2, s1, s2;
+
+ /* Check for mismatch of non-default calling convention. */
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
+ return 1;
+
+ /* Check for mismatched call attributes. */
+ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+ l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+ s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+ s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
+
+ /* Only bother to check if an attribute is defined. */
+ if (l1 | l2 | s1 | s2)
+ {
+ /* If one type has an attribute, the other
+ must have the same attribute. */
+ if ((l1 != l2) || (s1 != s2))
+ {
+ return 0;
+ }
+
+ /* Disallow mixed attributes. */
+ if ((l1 && s2) || (l2 && s1))
+ {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/* Assigns default attributes to newly defined type. This is used to
+ set short_call/long_call attributes for function types of
+ functions defined inside corresponding #pragma scopes. */
+static void
+aarch64_set_default_type_attributes (tree type)
+{
+ /* Add __attribute__ ((long_call)) to all functions, when
+ inside #pragma long_calls or __attribute__ ((short_call)),
+ when inside #pragma no_long_calls. */
+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
+ {
+ tree type_attr_list = NULL;
+ tree attr_name = NULL;
+ type_attr_list = TYPE_ATTRIBUTES (type);
+
+ if (aarch64_pragma_long_calls == LONG)
+ {
+ attr_name = get_identifier ("long_call");
+ }
+ else if (aarch64_pragma_long_calls == SHORT)
+ {
+ attr_name = get_identifier ("short_call");
+ }
+ else
+ {
+ return;
+ }
+
+ type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
+ TYPE_ATTRIBUTES (type) = type_attr_list;
+ }
+}
+
+/* Return true if DECL is known to be linked into section SECTION. */
+static bool
+aarch64_function_in_section_p (tree decl, section *section)
+{
+ /* We can only be certain about the prevailing symbol definition. */
+ if (!decl_binds_to_current_def_p (decl))
+ return false;
+
+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
+ if (!DECL_SECTION_NAME (decl))
+ {
+ /* Make sure that we will not create a unique section for DECL. */
+ if (flag_function_sections || DECL_COMDAT_GROUP (decl))
+ return false;
+ }
+
+ return function_section (decl) == section;
+}
+
/* Return true if calls to DECL should be treated as
long-calls (ie called via a register). */
static bool
-aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
+aarch64_decl_is_long_call_p (tree decl)
{
- return false;
+ tree attrs = NULL;
+
+ if (!decl)
+ return TARGET_LONG_CALLS;
+
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("short_call", attrs))
+ return false;
+
+ /* For "f", be conservative, and only cater for cases in which the
+ whole of the current function is placed in the same section. */
+ if (!flag_reorder_blocks_and_partition
+ && TREE_CODE (decl) == FUNCTION_DECL
+ && aarch64_function_in_section_p (decl, current_function_section ()))
+ return false;
+
+ if (lookup_attribute ("long_call", attrs))
+ return true;
+
+ return TARGET_LONG_CALLS;
}
/* Return true if calls to symbol-ref SYM should be treated as
@@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym)
return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
}
+void
+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+ if (!TARGET_LONG_CALLS)
+ {
+ fprintf (file, "\tmov\tx9, x30\n");
+ fprintf (file, "\tbl\t__fentry__\n");
+ fprintf (file, "\tmov\tx30, x9\n");
+ }
+ else
+ {
+ if (flag_pic)
+ {
+ fprintf (file, "\tmov\tx9, x30\n");
+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
+ fprintf (file, "\tblr\tx10\n");
+ fprintf (file, "\tmov\tx30, x9\n");
+ }
+ else
+ {
+ fprintf (file, "\tmov\tx9, x30\n");
+ fprintf (file, "\tadrp\tx10, __fentry__\n");
+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
+ fprintf (file, "\tblr\tx10\n");
+ fprintf (file, "\tmov\tx30, x9\n");
+ }
+ }
+}
+
/* Return true if calls to symbol-ref SYM should not go through
plt stubs. */
@@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_SCHED_CAN_SPECULATE_INSN
#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
+
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
--- a/gcc/config/aarch64/aarch64.h 2018-11-06 10:43:27.870079389 +0800
+++ b/gcc/config/aarch64/aarch64.h 2018-11-06 10:49:29.574088911 +0800
@@ -28,7 +28,6 @@
-#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
/* Target machine storage layout. */
@@ -659,6 +658,14 @@ typedef struct
} CUMULATIVE_ARGS;
#endif
+/* Handle pragmas for compatibility with Intel's compilers. */
+#define REGISTER_TARGET_PRAGMAS() do { \
+ c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \
+ c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \
+ c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \
+ aarch64_register_pragmas (); \
+} while (0)
+
#define FUNCTION_ARG_PADDING(MODE, TYPE) \
(aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward)
@@ -842,13 +849,20 @@ typedef struct
#define PROFILE_HOOK(LABEL) \
{ \
rtx fun, lr; \
+ const rtx_insn* tmp = get_insns (); \
lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
+ if (TARGET_LONG_CALLS) \
+ { \
+ emit_insn (gen_blockage ()); \
+ emit_insn_after (gen_blockage (), NEXT_INSN (tmp)); \
+ } \
}
/* All the work done in PROFILE_HOOK, but still required. */
-#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+#define FUNCTION_PROFILER(STREAM, LABELNO) \
+ aarch64_function_profiler (STREAM, LABELNO)
/* For some reason, the Linux headers think they know how to define
these macros. They don't!!! */
diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
--- a/gcc/config/aarch64/aarch64.md 2018-11-06 10:43:27.874079389 +0800
+++ b/gcc/config/aarch64/aarch64.md 2018-11-06 10:44:34.934081154 +0800
@@ -850,9 +850,10 @@
{
rtx pat;
rtx callee = XEXP (operands[0], 0);
- if (!REG_P (callee)
- && ((GET_CODE (callee) != SYMBOL_REF)
- || aarch64_is_noplt_call_p (callee)))
+
+ if (GET_CODE (callee) == SYMBOL_REF
+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
+ : !REG_P (callee))
XEXP (operands[0], 0) = force_reg (Pmode, callee);
if (operands[2] == NULL_RTX)
@@ -881,9 +882,10 @@
{
rtx pat;
rtx callee = XEXP (operands[1], 0);
- if (!REG_P (callee)
- && ((GET_CODE (callee) != SYMBOL_REF)
- || aarch64_is_noplt_call_p (callee)))
+
+ if (GET_CODE (callee) == SYMBOL_REF
+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
+ : !REG_P (callee))
XEXP (operands[1], 0) = force_reg (Pmode, callee);
if (operands[3] == NULL_RTX)
diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
--- a/gcc/config/aarch64/aarch64.opt 2018-11-06 10:43:27.874079389 +0800
+++ b/gcc/config/aarch64/aarch64.opt 2018-11-06 10:44:34.934081154 +0800
@@ -80,6 +80,10 @@ mlittle-endian
Target Report RejectNegative InverseMask(BIG_END)
Assume target CPU is configured as little endian.
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Generate call insns as indirect calls, if necessary.
+
mcmodel=
Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save
Specify the code model.
diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
--- a/gcc/config/aarch64/predicates.md 2018-11-06 10:43:27.878079389 +0800
+++ b/gcc/config/aarch64/predicates.md 2018-11-06 10:44:34.938081154 +0800
@@ -27,8 +27,9 @@
)
(define_predicate "aarch64_call_insn_operand"
- (ior (match_code "symbol_ref")
- (match_operand 0 "register_operand")))
+ (ior (and (match_code "symbol_ref")
+ (match_test "!aarch64_is_long_call_p (op)"))
+ (match_operand 0 "register_operand")))
;; Return true if OP a (const_int 0) operand.
(define_predicate "const0_operand"

View File

@ -0,0 +1,321 @@
diff -uprN a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11895,12 +11895,6 @@ of iterations of a loop known, it adds a bonus of
@option{ipa-cp-loop-hint-bonus} to the profitability score of
the candidate.
-@item ipa-cp-array-index-hint-bonus
-When IPA-CP determines that a cloning candidate would make the index of
-an array access known, it adds a bonus of
-@option{ipa-cp-array-index-hint-bonus} to the profitability
-score of the candidate.
-
@item ipa-max-aa-steps
During its analysis of function bodies, IPA-CP employs alias analysis
in order to track values pointed to by function parameters. In order
diff -uprN a/gcc/ipa-cp.c b/gcc/ipa-cp.c
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -2607,8 +2607,6 @@ hint_time_bonus (ipa_hints hints)
int result = 0;
if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
result += PARAM_VALUE (PARAM_IPA_CP_LOOP_HINT_BONUS);
- if (hints & INLINE_HINT_array_index)
- result += PARAM_VALUE (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS);
return result;
}
diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -134,11 +134,6 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
hints &= ~INLINE_HINT_declared_inline;
fprintf (f, " declared_inline");
}
- if (hints & INLINE_HINT_array_index)
- {
- hints &= ~INLINE_HINT_array_index;
- fprintf (f, " array_index");
- }
if (hints & INLINE_HINT_known_hot)
{
hints &= ~INLINE_HINT_known_hot;
@@ -549,8 +544,6 @@ ipa_fn_summary::~ipa_fn_summary ()
edge_predicate_pool.remove (loop_iterations);
if (loop_stride)
edge_predicate_pool.remove (loop_stride);
- if (array_index)
- edge_predicate_pool.remove (array_index);
vec_free (conds);
vec_free (size_time_table);
}
@@ -703,8 +696,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
possible_truths);
remap_hint_predicate_after_duplication (&info->loop_stride,
possible_truths);
- remap_hint_predicate_after_duplication (&info->array_index,
- possible_truths);
/* If inliner or someone after inliner will ever start producing
non-trivial clones, we will get trouble with lack of information
@@ -727,12 +718,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
info->loop_stride = NULL;
set_hint_predicate (&info->loop_stride, p);
}
- if (info->array_index)
- {
- predicate p = *info->array_index;
- info->array_index = NULL;
- set_hint_predicate (&info->array_index, p);
- }
}
if (!dst->global.inlined_to)
ipa_update_overall_fn_summary (dst);
@@ -894,11 +879,6 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
fprintf (f, " loop stride:");
s->loop_stride->dump (f, s->conds);
}
- if (s->array_index)
- {
- fprintf (f, " array index:");
- s->array_index->dump (f, s->conds);
- }
fprintf (f, " calls:\n");
dump_ipa_call_summary (f, 4, node, s);
fprintf (f, "\n");
@@ -1824,27 +1804,6 @@ predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
}
-/* Return predicate specifying when array index in access OP becomes non-constant. */
-
-static predicate
-array_index_predicate (ipa_fn_summary *info,
- vec< predicate> nonconstant_names, tree op)
-{
- predicate p = false;
- while (handled_component_p (op))
- {
- if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
- {
- if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
- p = p.or_with (info->conds,
- nonconstant_names[SSA_NAME_VERSION
- (TREE_OPERAND (op, 1))]);
- }
- op = TREE_OPERAND (op, 0);
- }
- return p;
-}
-
/* For a typical usage of __builtin_expect (a<b, 1), we
may introduce an extra relation stmt:
With the builtin, we have
@@ -2001,7 +1960,6 @@ analyze_function_body (struct cgraph_node *node, bool early)
vec<predicate> nonconstant_names = vNULL;
int nblocks, n;
int *order;
- predicate array_index = true;
gimple *fix_builtin_expect_stmt;
gcc_assert (my_function && my_function->cfg);
@@ -2146,26 +2104,6 @@ analyze_function_body (struct cgraph_node *node, bool early)
this_time);
}
- if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
- {
- predicate this_array_index;
- this_array_index =
- array_index_predicate (info, nonconstant_names,
- gimple_assign_rhs1 (stmt));
- if (this_array_index != false)
- array_index &= this_array_index;
- }
- if (gimple_store_p (stmt) && nonconstant_names.exists ())
- {
- predicate this_array_index;
- this_array_index =
- array_index_predicate (info, nonconstant_names,
- gimple_get_lhs (stmt));
- if (this_array_index != false)
- array_index &= this_array_index;
- }
-
-
if (is_gimple_call (stmt)
&& !gimple_call_internal_p (stmt))
{
@@ -2273,14 +2211,40 @@ analyze_function_body (struct cgraph_node *node, bool early)
if (dump_file)
fprintf (dump_file, " fp_expression set\n");
}
+ }
- gcc_assert (time >= 0);
- gcc_assert (size >= 0);
+ /* Account cost of address calculations in the statements. */
+ for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
+ {
+ for (tree op = gimple_op (stmt, i);
+ op && handled_component_p (op);
+ op = TREE_OPERAND (op, 0))
+ if ((TREE_CODE (op) == ARRAY_REF
+ || TREE_CODE (op) == ARRAY_RANGE_REF)
+ && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
+ {
+ predicate p = bb_predicate;
+ if (fbi.info)
+ p = p & will_be_nonconstant_expr_predicate
+ (&fbi, info, TREE_OPERAND (op, 1),
+ nonconstant_names);
+ if (p != false)
+ {
+ time += freq;
+ size += 1;
+ if (dump_file)
+ fprintf (dump_file,
+ "\t\tAccounting address calculation.\n");
+ info->account_size_time (ipa_fn_summary::size_scale,
+ freq,
+ bb_predicate,
+ p);
+ }
+ }
}
+
}
}
- set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index,
- array_index);
free (order);
if (nonconstant_names.exists () && !early)
@@ -2783,9 +2747,6 @@ estimate_node_size_and_time (struct cgraph_node *node,
if (info->loop_stride
&& !info->loop_stride->evaluate (possible_truths))
hints |= INLINE_HINT_loop_stride;
- if (info->array_index
- && !info->array_index->evaluate (possible_truths))
- hints |= INLINE_HINT_array_index;
if (info->scc_no)
hints |= INLINE_HINT_in_scc;
if (DECL_DECLARED_INLINE_P (node->decl))
@@ -3106,9 +3067,6 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
remap_hint_predicate (info, callee_info,
&callee_info->loop_stride,
operand_map, offset_map, clause, &toplev_predicate);
- remap_hint_predicate (info, callee_info,
- &callee_info->array_index,
- operand_map, offset_map, clause, &toplev_predicate);
ipa_call_summary *s = ipa_call_summaries->get (edge);
inline_update_callee_summaries (edge->callee, s->loop_depth);
@@ -3366,9 +3324,6 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data,
p.stream_in (&ib);
if (info)
set_hint_predicate (&info->loop_stride, p);
- p.stream_in (&ib);
- if (info)
- set_hint_predicate (&info->array_index, p);
for (e = node->callees; e; e = e->next_callee)
read_ipa_call_summary (&ib, e, info != NULL);
for (e = node->indirect_calls; e; e = e->next_callee)
@@ -3517,10 +3472,6 @@ ipa_fn_summary_write (void)
info->loop_stride->stream_out (ob);
else
streamer_write_uhwi (ob, 0);
- if (info->array_index)
- info->array_index->stream_out (ob);
- else
- streamer_write_uhwi (ob, 0);
for (edge = cnode->callees; edge; edge = edge->next_callee)
write_ipa_call_summary (ob, edge);
for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
diff -uprN a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
--- a/gcc/ipa-fnsummary.h
+++ b/gcc/ipa-fnsummary.h
@@ -48,11 +48,8 @@ enum ipa_hints_vals {
if functions are in different modules, inlining may not be so important.
Set by simple_edge_hints in ipa-inline-analysis.c. */
INLINE_HINT_cross_module = 64,
- /* If array indexes of loads/stores become known there may be room for
- further optimization. */
- INLINE_HINT_array_index = 128,
/* We know that the callee is hot by profile. */
- INLINE_HINT_known_hot = 256
+ INLINE_HINT_known_hot = 128
};
typedef int ipa_hints;
@@ -97,7 +94,7 @@ public:
fp_expressions (false), estimated_stack_size (false),
stack_frame_offset (false), time (0), size (0), conds (NULL),
size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL),
- array_index (NULL), growth (0), scc_no (0)
+ growth (0), scc_no (0)
{
}
@@ -111,7 +108,7 @@ public:
stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size),
conds (s.conds), size_time_table (s.size_time_table),
loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
- array_index (s.array_index), growth (s.growth), scc_no (s.scc_no)
+ growth (s.growth), scc_no (s.scc_no)
{}
/* Default constructor. */
@@ -157,8 +154,6 @@ public:
/* Predicate on when some loop in the function becomes to have known
stride. */
predicate * GTY((skip)) loop_stride;
- /* Predicate on when some array indexes become constants. */
- predicate * GTY((skip)) array_index;
/* Estimated growth for inlining all copies of the function before start
of small functions inlining.
This value will get out of date as the callers are duplicated, but
diff -uprN a/gcc/ipa-inline.c b/gcc/ipa-inline.c
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -807,7 +807,6 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
|| (!(hints & (INLINE_HINT_indirect_call
| INLINE_HINT_known_hot
| INLINE_HINT_loop_iterations
- | INLINE_HINT_array_index
| INLINE_HINT_loop_stride))
&& !(big_speedup = big_speedup_p (e)))))
{
@@ -833,7 +832,6 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
&& !(hints & INLINE_HINT_known_hot)
&& growth >= ((hints & (INLINE_HINT_indirect_call
| INLINE_HINT_loop_iterations
- | INLINE_HINT_array_index
| INLINE_HINT_loop_stride))
? MAX (MAX_INLINE_INSNS_AUTO,
MAX_INLINE_INSNS_SINGLE)
@@ -1227,7 +1225,6 @@ edge_badness (struct cgraph_edge *edge, bool dump)
badness = badness.shift (badness > 0 ? 4 : -4);
if ((hints & (INLINE_HINT_indirect_call
| INLINE_HINT_loop_iterations
- | INLINE_HINT_array_index
| INLINE_HINT_loop_stride))
|| callee_info->growth <= 0)
badness = badness.shift (badness > 0 ? -2 : 2);
diff -uprN a/gcc/params.def b/gcc/params.def
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1109,12 +1109,6 @@ DEFPARAM (PARAM_IPA_CP_LOOP_HINT_BONUS,
"bounds or strides known.",
64, 0, 0)
-DEFPARAM (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS,
- "ipa-cp-array-index-hint-bonus",
- "Compile-time bonus IPA-CP assigns to candidates which make an array "
- "index known.",
- 48, 0, 0)
-
DEFPARAM (PARAM_IPA_MAX_AA_STEPS,
"ipa-max-aa-steps",
"Maximum number of statements that will be visited by IPA formal "

View File

@ -1,33 +0,0 @@
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
index 858bb21..de18e56 100644 (file)
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -157,7 +157,6 @@ typedef struct user_fpregs elf_fpregset_t;
# include <sys/procfs.h>
#endif
#include <sys/user.h>
-#include <sys/ustat.h>
#include <linux/cyclades.h>
#include <linux/if_eql.h>
#include <linux/if_plip.h>
@@ -250,7 +249,19 @@ namespace __sanitizer {
#endif // SANITIZER_LINUX || SANITIZER_FREEBSD
#if SANITIZER_LINUX && !SANITIZER_ANDROID
- unsigned struct_ustat_sz = sizeof(struct ustat);
+ // Use pre-computed size of struct ustat to avoid <sys/ustat.h> which
+ // has been removed from glibc 2.28.
+#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
+ || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
+ || defined(__x86_64__)
+#define SIZEOF_STRUCT_USTAT 32
+#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
+ || defined(__powerpc__) || defined(__s390__)
+#define SIZEOF_STRUCT_USTAT 20
+#else
+#error Unknown size of struct ustat
+#endif
+ unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
#endif // SANITIZER_LINUX && !SANITIZER_ANDROID

View File

@ -0,0 +1,14 @@
diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -2078,8 +2078,8 @@ analyze_function_body (struct cgraph_node *node, bool early)
fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
- for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
- gsi_next (&bsi))
+ for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
+ !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
{
gimple *stmt = gsi_stmt (bsi);
int this_size = estimate_num_insns (stmt, &eni_size_weights);

View File

@ -1,11 +0,0 @@
--- a/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:05:43.841181211 +0800
+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:03:17.545185153 +0800
@@ -726,7 +726,7 @@ try_unroll_loop_completely (struct loop
edge_to_cancel = NULL;
}
- if (!n_unroll_found)
+ if (!n_unroll_found || SCEV_NOT_KNOWN == TREE_CODE (niter))
return false;
if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))

View File

@ -1,25 +0,0 @@
diff -N -urp a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
--- a/gcc/common/config/aarch64/aarch64-common.c 2019-07-02 09:28:49.798701181 +0800
+++ b/gcc/common/config/aarch64/aarch64-common.c 2019-07-02 09:30:15.436282799 +0800
@@ -51,6 +51,10 @@ static const struct default_options aarc
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
/* Enable redundant extension instructions removal at -O2 and higher. */
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
+#if (TARGET_DEFAULT_ASYNC_UNWIND_TABLES == 1)
+ { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
+ { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
+#endif
{ OPT_LEVELS_NONE, 0, NULL, 0 }
};
diff -N -urp a/gcc/config.gcc b/gcc/config.gcc
--- a/gcc/config.gcc 2019-07-02 09:28:50.114701170 +0800
+++ b/gcc/config.gcc 2019-07-02 09:31:50.636196118 +0800
@@ -966,6 +966,7 @@ aarch64*-*-linux*)
tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
+ tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
case $target in
aarch64_be-*)
tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"