package init as gcc-7.3.0

2020-01-19 17:29:40 +08:00 · 2020-01-19 17:29:40 +08:00 · 523d5a7c93
commit 523d5a7c93
25 changed files with 10216 additions and 0 deletions
--- a/Big-endian-union-bitfield-bugfix.patch
+++ b/Big-endian-union-bitfield-bugfix.patch
@ -0,0 +1,126 @@
+From 900ccfa89dda3ab5f7e44a0dd4d1e9d108b5dc8b Mon Sep 17 00:00:00 2001
+From: rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
+Date: Tue, 26 Mar 2019 13:18:23 +0000
+Subject: [PATCH] 2019-02-26  Richard Biener  <rguenther@suse.de>
+
+	Backport from mainline
+	2019-02-12  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/89253
+	* tree-ssa-loop-split.c (tree_ssa_split_loops): Check we can
+	duplicate the loop.
+
+	* gfortran.dg/pr89253.f: New testcase.
+
+	2019-02-08  Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/89223
+	* tree-data-ref.c (initialize_matrix_A): Fail if constant
+	doesn't fit in HWI.
+	(analyze_subscript_affine_affine): Handle failure from
+	initialize_matrix_A.
+
+	* gcc.dg/torture/pr89223.c: New testcase.
+
+	2019-01-28  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/88739
+	* tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid generating
+	BIT_FIELD_REFs of non-mode-precision integral operands.
+
+	* gcc.c-torture/execute/pr88739.c: New test.
+
+
+git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-7-branch@269942 138bc75d-0d04-0410-961f-82ee72b054a4
+---
+diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
+index 2480f4e..a349e3e 100644
+--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
+@@ -2118,6 +2118,8 @@ initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
+   switch (TREE_CODE (chrec))
+     {
+     case POLYNOMIAL_CHREC:
+      if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+	return chrec_dont_know;
+       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
+       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
+ 
+@@ -2499,7 +2501,7 @@ analyze_subscript_affine_affine (tree chrec_a,
+ 				 tree *last_conflicts)
+ {
+   unsigned nb_vars_a, nb_vars_b, dim;
+-  HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
+  HOST_WIDE_INT gamma, gcd_alpha_beta;
+   lambda_matrix A, U, S;
+   struct obstack scratch_obstack;
+ 
+@@ -2536,9 +2538,20 @@ analyze_subscript_affine_affine (tree chrec_a,
+   A = lambda_matrix_new (dim, 1, &scratch_obstack);
+   S = lambda_matrix_new (dim, 1, &scratch_obstack);
+ 
+-  init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
+-  init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
+-  gamma = init_b - init_a;
+  tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
+  tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+  if (init_a == chrec_dont_know
+      || init_b == chrec_dont_know)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "affine-affine test failed: "
+		 "representation issue.\n");
+      *overlaps_a = conflict_fn_not_known ();
+      *overlaps_b = conflict_fn_not_known ();
+      *last_conflicts = chrec_dont_know;
+      goto end_analyze_subs_aa;
+    }
+  gamma = int_cst_value (init_b) - int_cst_value (init_a);
+ 
+   /* Don't do all the hard work of solving the Diophantine equation
+      when we already know the solution: for example,
+diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
+index fd97213..3992597 100644
+--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
+@@ -649,7 +649,8 @@ tree_ssa_split_loops (void)
+ 					false, true)
+ 	  && niter.cmp != ERROR_MARK
+ 	  /* We can't yet handle loops controlled by a != predicate.  */
+-	  && niter.cmp != NE_EXPR)
+	  && niter.cmp != NE_EXPR
+	  && can_duplicate_loop_p (loop))
+ 	{
+ 	  if (split_loop (loop, &niter))
+ 	    {
+diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+index c93f1f2..a2e3ce2 100644
+--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
+@@ -2029,6 +2029,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
+       base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
+ 				       &offset2, &size2, &maxsize2,
+ 				       &reverse);
+      tree def_rhs = gimple_assign_rhs1 (def_stmt);
+       if (!reverse
+ 	  && maxsize2 != -1
+ 	  && maxsize2 == size2
+@@ -2041,11 +2042,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
+ 	     according to endianness.  */
+ 	  && (! INTEGRAL_TYPE_P (vr->type)
+ 	      || ref->size == TYPE_PRECISION (vr->type))
+-	  && ref->size % BITS_PER_UNIT == 0)
+	  && ref->size % BITS_PER_UNIT == 0
+	  && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
+	      || (TYPE_PRECISION (TREE_TYPE (def_rhs))
+		  == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (def_rhs))))))
+ 	{
+ 	  code_helper rcode = BIT_FIELD_REF;
+ 	  tree ops[3];
+-	  ops[0] = SSA_VAL (gimple_assign_rhs1 (def_stmt));
+	  ops[0] = SSA_VAL (def_rhs);
+ 	  ops[1] = bitsize_int (ref->size);
+ 	  ops[2] = bitsize_int (offset - offset2);
+ 	  tree val = vn_nary_build_or_lookup (rcode, vr->type, ops);
+-- 
+2.9.3
--- a/CVE-2018-12886.patch
+++ b/CVE-2018-12886.patch
@ -0,0 +1,655 @@
+diff -urpN a/gcc/cfgexpand.c b/gcc/cfgexpand.c
+--- a/gcc/cfgexpand.c	2019-05-30 16:58:45.350508770 +0800
+++ b/gcc/cfgexpand.c	2019-05-30 11:53:13.315156625 +0800
+@@ -6094,6 +6094,23 @@ stack_protect_prologue (void)
+   rtx x, y;
+ 
+   x = expand_normal (crtl->stack_protect_guard);
+
+  if (targetm.have_stack_protect_combined_set () && guard_decl)
+    {
+      gcc_assert (DECL_P (guard_decl));
+      y = DECL_RTL (guard_decl);
+
+      /* Allow the target to compute address of Y and copy it to X without
+	 leaking Y into a register.  This combined address + copy pattern
+	 allows the target to prevent spilling of any intermediate results by
+	 splitting it after register allocator.  */
+      if (rtx_insn *insn = targetm.gen_stack_protect_combined_set (x, y))
+	{
+	  emit_insn (insn);
+	  return;
+	}
+    }
+
+   if (guard_decl)
+     y = expand_normal (guard_decl);
+   else
+diff -urpN a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
+--- a/gcc/config/arm/arm.c	2019-05-30 16:58:45.354508770 +0800
+++ b/gcc/config/arm/arm.c	2019-05-30 16:59:05.058508073 +0800
+@@ -7236,21 +7236,34 @@ legitimate_pic_operand_p (rtx x)
+   return 1;
+ }
+ 
+-/* Record that the current function needs a PIC register.  Initialize
+-   cfun->machine->pic_reg if we have not already done so.  */
+/* Record that the current function needs a PIC register.  If PIC_REG is null,
+   a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
+   both case cfun->machine->pic_reg is initialized if we have not already done
+   so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
+   PIC register is reloaded in the current position of the instruction stream
+   irregardless of whether it was loaded before.  Otherwise, it is only loaded
+   if not already done so (crtl->uses_pic_offset_table is null).  Note that
+   nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
+   is only supported iff COMPUTE_NOW is false.  */
+ 
+ static void
+-require_pic_register (void)
+require_pic_register (rtx pic_reg, bool compute_now)
+ {
+  gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
+   /* A lot of the logic here is made obscure by the fact that this
+      routine gets called as part of the rtx cost estimation process.
+      We don't want those calls to affect any assumptions about the real
+      function; and further, we can't call entry_of_function() until we
+      start the real expansion process.  */
+-  if (!crtl->uses_pic_offset_table)
+  if (!crtl->uses_pic_offset_table || compute_now)
+     {
+-      gcc_assert (can_create_pseudo_p ());
+      gcc_assert (can_create_pseudo_p ()
+		  || (pic_reg != NULL_RTX
+		      && REG_P (pic_reg)
+		      && GET_MODE (pic_reg) == Pmode));
+       if (arm_pic_register != INVALID_REGNUM
+	  && !compute_now
+ 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
+ 	{
+ 	  if (!cfun->machine->pic_reg)
+@@ -7266,8 +7279,19 @@ require_pic_register (void)
+ 	{
+ 	  rtx_insn *seq, *insn;
+ 
+-	  if (!cfun->machine->pic_reg)
+-	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+	  if (pic_reg == NULL_RTX && cfun->machine->pic_reg == NULL_RTX)
+	    {
+	      pic_reg = gen_reg_rtx (Pmode);
+	      cfun->machine->pic_reg = pic_reg;
+	    }
+	  else if (pic_reg == NULL_RTX)
+	    {
+	      pic_reg = cfun->machine->pic_reg;
+	    }
+	  else if (cfun->machine->pic_reg == NULL_RTX)
+	    {
+	      cfun->machine->pic_reg = pic_reg;
+	    }
+ 
+ 	  /* Play games to avoid marking the function as needing pic
+ 	     if we are being called as part of the cost-estimation
+@@ -7278,11 +7306,12 @@ require_pic_register (void)
+ 	      start_sequence ();
+ 
+ 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
+-		  && arm_pic_register > LAST_LO_REGNUM)
+		  && arm_pic_register > LAST_LO_REGNUM
+		  && !compute_now)
+ 		emit_move_insn (cfun->machine->pic_reg,
+ 				gen_rtx_REG (Pmode, arm_pic_register));
+ 	      else
+-		arm_load_pic_register (0UL);
+		arm_load_pic_register (0UL, pic_reg);
+ 
+ 	      seq = get_insns ();
+ 	      end_sequence ();
+@@ -7295,16 +7324,33 @@ require_pic_register (void)
+ 	         we can't yet emit instructions directly in the final
+ 		 insn stream.  Queue the insns on the entry edge, they will
+ 		 be committed after everything else is expanded.  */
+-	      insert_insn_on_edge (seq,
+-				   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+	      if (currently_expanding_to_rtl)
+		insert_insn_on_edge (seq,
+				     single_succ_edge
+				     (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+	      else
+		emit_insn (seq);
+ 	    }
+ 	}
+     }
+ }
+ 
+/* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
+   created to hold the result of the load.  If not NULL, PIC_REG indicates
+   which register to use as PIC register, otherwise it is decided by register
+   allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
+   location in the instruction stream, irregardless of whether it was loaded
+   previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
+   true and null PIC_REG is only supported iff COMPUTE_NOW is false.
+
+   Returns the register REG into which the PIC load is performed.  */
+
+ rtx
+-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
+			bool compute_now)
+ {
+  gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
+   if (GET_CODE (orig) == SYMBOL_REF
+       || GET_CODE (orig) == LABEL_REF)
+     {
+@@ -7337,9 +7383,12 @@ legitimize_pic_address (rtx orig, machin
+ 	  rtx mem;
+ 
+ 	  /* If this function doesn't have a pic register, create one now.  */
+-	  require_pic_register ();
+	  require_pic_register (pic_reg, compute_now);
+
+	  if (pic_reg == NULL_RTX)
+	    pic_reg = cfun->machine->pic_reg;
+ 
+-	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
+	  pat = gen_calculate_pic_address (reg, pic_reg, orig);
+ 
+ 	  /* Make the MEM as close to a constant as possible.  */
+ 	  mem = SET_SRC (pat);
+@@ -7388,9 +7437,11 @@ legitimize_pic_address (rtx orig, machin
+ 
+       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+ 
+-      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
+				     pic_reg, compute_now);
+       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+-				       base == reg ? 0 : reg);
+				       base == reg ? 0 : reg, pic_reg,
+				       compute_now);
+ 
+       if (CONST_INT_P (offset))
+ 	{
+@@ -7490,16 +7541,17 @@ static GTY(()) int pic_labelno;
+    low register.  */
+ 
+ void
+-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
+ {
+-  rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+  rtx l1, labelno, pic_tmp, pic_rtx;
+ 
+   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
+     return;
+ 
+   gcc_assert (flag_pic);
+ 
+-  pic_reg = cfun->machine->pic_reg;
+  if (pic_reg == NULL_RTX)
+    pic_reg = cfun->machine->pic_reg;
+   if (TARGET_VXWORKS_RTP)
+     {
+       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+@@ -8558,7 +8610,8 @@ arm_legitimize_address (rtx x, rtx orig_
+     {
+       /* We need to find and carefully transform any SYMBOL and LABEL
+ 	 references; so go back to the original address expression.  */
+-      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+					  false /*compute_now*/);
+ 
+       if (new_x != orig_x)
+ 	x = new_x;
+@@ -8626,7 +8679,8 @@ thumb_legitimize_address (rtx x, rtx ori
+     {
+       /* We need to find and carefully transform any SYMBOL and LABEL
+ 	 references; so go back to the original address expression.  */
+-      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+					  false /*compute_now*/);
+ 
+       if (new_x != orig_x)
+ 	x = new_x;
+@@ -17800,7 +17854,7 @@ arm_emit_call_insn (rtx pat, rtx addr, b
+ 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
+ 	  : !SYMBOL_REF_LOCAL_P (addr)))
+     {
+-      require_pic_register ();
+      require_pic_register (NULL_RTX, false /*compute_now*/);
+       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
+     }
+ 
+@@ -21706,7 +21760,7 @@ arm_expand_prologue (void)
+       mask &= THUMB2_WORK_REGS;
+       if (!IS_NESTED (func_type))
+ 	mask |= (1 << IP_REGNUM);
+-      arm_load_pic_register (mask);
+      arm_load_pic_register (mask, NULL_RTX);
+     }
+ 
+   /* If we are profiling, make sure no instructions are scheduled before
+@@ -24909,7 +24963,7 @@ thumb1_expand_prologue (void)
+   /* Load the pic register before setting the frame pointer,
+      so we can use r7 as a temporary work register.  */
+   if (flag_pic && arm_pic_register != INVALID_REGNUM)
+-    arm_load_pic_register (live_regs_mask);
+    arm_load_pic_register (live_regs_mask, NULL_RTX);
+ 
+   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
+     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
+diff -urpN a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
+--- a/gcc/config/arm/arm.md	2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/arm.md	2019-05-30 11:52:58.491157149 +0800
+@@ -6051,7 +6051,8 @@
+       operands[1] = legitimize_pic_address (operands[1], SImode,
+ 					    (!can_create_pseudo_p ()
+ 					     ? operands[0]
+-					     : 0));
+					     : NULL_RTX), NULL_RTX,
+					    false /*compute_now*/);
+   }
+   "
+ )
+@@ -6340,7 +6341,7 @@
+   /* r3 is clobbered by set/longjmp, so we can use it as a scratch
+      register.  */
+   if (arm_pic_register != INVALID_REGNUM)
+-    arm_load_pic_register (1UL << 3);
+    arm_load_pic_register (1UL << 3, NULL_RTX);
+   DONE;
+ }")
+ 
+@@ -8666,6 +8667,164 @@
+    (set_attr "conds" "clob")]
+ )
+ 
+;; Named patterns for stack smashing protection.
+(define_expand "stack_protect_combined_set"
+  [(parallel
+     [(set (match_operand:SI 0 "memory_operand" "")
+	   (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
+		      UNSPEC_SP_SET))
+      (clobber (match_scratch:SI 2 ""))
+      (clobber (match_scratch:SI 3 ""))])]
+  ""
+  ""
+)
+
+;; Use a separate insn from the above expand to be able to have the mem outside
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
+;; try to reload the guard since we need to control how PIC access is done in
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
+;; legitimize_pic_address ()).
+(define_insn_and_split "*stack_protect_combined_set_insn"
+  [(set (match_operand:SI 0 "memory_operand" "=m,m")
+	(unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
+		   UNSPEC_SP_SET))
+   (clobber (match_scratch:SI 2 "=&l,&r"))
+   (clobber (match_scratch:SI 3 "=&l,&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:SI [(mem:SI (match_dup 2))]
+					    UNSPEC_SP_SET))
+	      (clobber (match_dup 2))])]
+  "
+{
+  if (flag_pic)
+    {
+      /* Forces recomputing of GOT base now.  */
+      legitimize_pic_address (operands[1], SImode, operands[2], operands[3],
+			      true /*compute_now*/);
+    }
+  else
+    {
+      if (address_operand (operands[1], SImode))
+	operands[2] = operands[1];
+      else
+	{
+	  rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+	  emit_move_insn (operands[2], mem);
+	}
+    }
+}"
+  [(set_attr "arch" "t1,32")]
+)
+
+(define_insn "*stack_protect_set_insn"
+  [(set (match_operand:SI 0 "memory_operand" "=m,m")
+	(unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))]
+	 UNSPEC_SP_SET))
+   (clobber (match_dup 1))]
+  ""
+  "@
+   ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0
+   ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0"
+  [(set_attr "length" "8,12")
+   (set_attr "conds" "clob,nocond")
+   (set_attr "type" "multiple")
+   (set_attr "arch" "t1,32")]
+)
+
+(define_expand "stack_protect_combined_test"
+  [(parallel
+     [(set (pc)
+	   (if_then_else
+		(eq (match_operand:SI 0 "memory_operand" "")
+		    (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
+			       UNSPEC_SP_TEST))
+		(label_ref (match_operand 2))
+		(pc)))
+      (clobber (match_scratch:SI 3 ""))
+      (clobber (match_scratch:SI 4 ""))
+      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+;; Use a separate insn from the above expand to be able to have the mem outside
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
+;; try to reload the guard since we need to control how PIC access is done in
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
+;; legitimize_pic_address ()).
+(define_insn_and_split "*stack_protect_combined_test_insn"
+  [(set (pc)
+	(if_then_else
+		(eq (match_operand:SI 0 "memory_operand" "m,m")
+		    (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
+			       UNSPEC_SP_TEST))
+		(label_ref (match_operand 2))
+		(pc)))
+   (clobber (match_scratch:SI 3 "=&l,&r"))
+   (clobber (match_scratch:SI 4 "=&l,&r"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx eq;
+
+  if (flag_pic)
+    {
+      /* Forces recomputing of GOT base now.  */
+      legitimize_pic_address (operands[1], SImode, operands[3], operands[4],
+			      true /*compute_now*/);
+    }
+  else
+    {
+      if (address_operand (operands[1], SImode))
+	operands[3] = operands[1];
+      else
+	{
+	  rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+	  emit_move_insn (operands[3], mem);
+	}
+    }
+  if (TARGET_32BIT)
+    {
+      emit_insn (gen_arm_stack_protect_test_insn (operands[4], operands[0],
+						  operands[3]));
+      rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+      eq = gen_rtx_EQ (CC_Zmode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_arm_cond_branch (operands[2], eq, cc_reg));
+    }
+  else
+    {
+      emit_insn (gen_thumb1_stack_protect_test_insn (operands[4], operands[0],
+						     operands[3]));
+      eq = gen_rtx_EQ (VOIDmode, operands[4], const0_rtx);
+      emit_jump_insn (gen_cbranchsi4 (eq, operands[4], const0_rtx,
+				      operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "arch" "t1,32")]
+)
+
+(define_insn "arm_stack_protect_test_insn"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (unspec:SI [(match_operand:SI 1 "memory_operand" "m,m")
+				  (mem:SI (match_operand:SI 2 "register_operand" "+l,r"))]
+				 UNSPEC_SP_TEST)
+		      (const_int 0)))
+   (clobber (match_operand:SI 0 "register_operand" "=&l,&r"))
+   (clobber (match_dup 2))]
+  "TARGET_32BIT"
+  "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
+  [(set_attr "length" "8,12")
+   (set_attr "conds" "set")
+   (set_attr "type" "multiple")
+   (set_attr "arch" "t,32")]
+)
+
+ (define_expand "casesi"
+   [(match_operand:SI 0 "s_register_operand" "")	; index to jump on
+    (match_operand:SI 1 "const_int_operand" "")	; lower bound
+diff -urpN a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
+--- a/gcc/config/arm/arm-protos.h	2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/arm-protos.h	2019-05-30 11:52:58.491157149 +0800
+@@ -28,7 +28,7 @@ extern enum unwind_info_type arm_except_
+ extern int use_return_insn (int, rtx);
+ extern bool use_simple_return_p (void);
+ extern enum reg_class arm_regno_class (int);
+-extern void arm_load_pic_register (unsigned long);
+extern void arm_load_pic_register (unsigned long, rtx);
+ extern int arm_volatile_func (void);
+ extern void arm_expand_prologue (void);
+ extern void arm_expand_epilogue (bool);
+@@ -69,7 +69,7 @@ extern int const_ok_for_dimode_op (HOST_
+ extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
+ 			       HOST_WIDE_INT, rtx, rtx, int);
+ extern int legitimate_pic_operand_p (rtx);
+-extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
+extern rtx legitimize_pic_address (rtx, machine_mode, rtx, rtx, bool);
+ extern rtx legitimize_tls_address (rtx, rtx);
+ extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
+ extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
+diff -urpN a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
+--- a/gcc/config/arm/predicates.md	2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/predicates.md	2019-05-30 11:52:58.491157149 +0800
+@@ -31,6 +31,23 @@
+ 	      || REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
+ })
+ 
+; Predicate for stack protector guard's address in
+; stack_protect_combined_set_insn and stack_protect_combined_test_insn patterns
+(define_predicate "guard_addr_operand"
+  (match_test "true")
+{
+  return (CONSTANT_ADDRESS_P (op)
+	  || !targetm.cannot_force_const_mem (mode, op));
+})
+
+; Predicate for stack protector guard in stack_protect_combined_set and
+; stack_protect_combined_test patterns
+(define_predicate "guard_operand"
+  (match_code "mem")
+{
+  return guard_addr_operand (XEXP (op, 0), mode);
+})
+
+ (define_predicate "imm_for_neon_inv_logic_operand"
+   (match_code "const_vector")
+ {
+diff -urpN a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
+--- a/gcc/config/arm/thumb1.md	2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/thumb1.md	2019-05-30 11:52:58.491157149 +0800
+@@ -1964,4 +1964,17 @@
+   }"
+   [(set_attr "type" "mov_reg")]
+ )
+
+(define_insn "thumb1_stack_protect_test_insn"
+  [(set (match_operand:SI 0 "register_operand" "=&l")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+		    (mem:SI (match_operand:SI 2 "register_operand" "+l"))]
+	 UNSPEC_SP_TEST))
+   (clobber (match_dup 2))]
+  "TARGET_THUMB1"
+  "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
+  [(set_attr "length" "8")
+   (set_attr "conds" "set")
+   (set_attr "type" "multiple")]
+)
+ 
+diff -urpN a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
+--- a/gcc/config/arm/unspecs.md	2019-05-30 16:58:45.358508769 +0800
+++ b/gcc/config/arm/unspecs.md	2019-05-30 11:52:58.491157149 +0800
+@@ -86,6 +86,9 @@
+   UNSPEC_PROBE_STACK    ; Probe stack memory reference
+   UNSPEC_NONSECURE_MEM	; Represent non-secure memory in ARMv8-M with
+ 			; security extension
+  UNSPEC_SP_SET		; Represent the setting of stack protector's canary
+  UNSPEC_SP_TEST	; Represent the testing of stack protector's canary
+			; against the guard.
+ ])
+ 
+ (define_c_enum "unspec" [
+diff -urpN a/gcc/doc/md.texi b/gcc/doc/md.texi
+--- a/gcc/doc/md.texi	2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/doc/md.texi	2019-05-30 11:52:58.491157149 +0800
+@@ -6955,22 +6955,61 @@ builtins.
+ The get/set patterns have a single output/input operand respectively,
+ with @var{mode} intended to be @code{Pmode}.
+ 
+@cindex @code{stack_protect_combined_set} instruction pattern
+@item @samp{stack_protect_combined_set}
+This pattern, if defined, moves a @code{ptr_mode} value from an address
+whose declaration RTX is given in operand 1 to the memory in operand 0
+without leaving the value in a register afterward.  If several
+instructions are needed by the target to perform the operation (eg. to
+load the address from a GOT entry then load the @code{ptr_mode} value
+and finally store it), it is the backend's responsibility to ensure no
+intermediate result gets spilled.  This is to avoid leaking the value
+some place that an attacker might use to rewrite the stack guard slot
+after having clobbered it.
+
+If this pattern is not defined, then the address declaration is
+expanded first in the standard way and a @code{stack_protect_set}
+pattern is then generated to move the value from that address to the
+address in operand 0.
+
+ @cindex @code{stack_protect_set} instruction pattern
+ @item @samp{stack_protect_set}
+-This pattern, if defined, moves a @code{ptr_mode} value from the memory
+-in operand 1 to the memory in operand 0 without leaving the value in
+-a register afterward.  This is to avoid leaking the value some place
+-that an attacker might use to rewrite the stack guard slot after
+-having clobbered it.
+This pattern, if defined, moves a @code{ptr_mode} value from the valid
+memory location in operand 1 to the memory in operand 0 without leaving
+the value in a register afterward.  This is to avoid leaking the value
+some place that an attacker might use to rewrite the stack guard slot
+after having clobbered it.
+
+Note: on targets where the addressing modes do not allow to load
+directly from stack guard address, the address is expanded in a standard
+way first which could cause some spills.
+ 
+ If this pattern is not defined, then a plain move pattern is generated.
+ 
+@cindex @code{stack_protect_combined_test} instruction pattern
+@item @samp{stack_protect_combined_test}
+This pattern, if defined, compares a @code{ptr_mode} value from an
+address whose declaration RTX is given in operand 1 with the memory in
+operand 0 without leaving the value in a register afterward and
+branches to operand 2 if the values were equal.  If several
+instructions are needed by the target to perform the operation (eg. to
+load the address from a GOT entry then load the @code{ptr_mode} value
+and finally store it), it is the backend's responsibility to ensure no
+intermediate result gets spilled.  This is to avoid leaking the value
+some place that an attacker might use to rewrite the stack guard slot
+after having clobbered it.
+
+If this pattern is not defined, then the address declaration is
+expanded first in the standard way and a @code{stack_protect_test}
+pattern is then generated to compare the value from that address to the
+value at the memory in operand 0.
+
+ @cindex @code{stack_protect_test} instruction pattern
+ @item @samp{stack_protect_test}
+ This pattern, if defined, compares a @code{ptr_mode} value from the
+-memory in operand 1 with the memory in operand 0 without leaving the
+-value in a register afterward and branches to operand 2 if the values
+-were equal.
+valid memory location in operand 1 with the memory in operand 0 without
+leaving the value in a register afterward and branches to operand 2 if
+the values were equal.
+ 
+ If this pattern is not defined, then a plain compare pattern and
+ conditional branch pattern is used.
+diff -urpN a/gcc/function.c b/gcc/function.c
+--- a/gcc/function.c	2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/function.c	2019-05-30 11:53:14.071156599 +0800
+@@ -5065,18 +5065,34 @@ stack_protect_epilogue (void)
+   tree guard_decl = targetm.stack_protect_guard ();
+   rtx_code_label *label = gen_label_rtx ();
+   rtx x, y;
+-  rtx_insn *seq;
+  rtx_insn *seq = NULL;
+ 
+   x = expand_normal (crtl->stack_protect_guard);
+-  if (guard_decl)
+-    y = expand_normal (guard_decl);
+
+  if (targetm.have_stack_protect_combined_test () && guard_decl)
+    {
+      gcc_assert (DECL_P (guard_decl));
+      y = DECL_RTL (guard_decl);
+      /* Allow the target to compute address of Y and compare it with X without
+	 leaking Y into a register.  This combined address + compare pattern
+	 allows the target to prevent spilling of any intermediate results by
+	 splitting it after register allocator.  */
+      seq = targetm.gen_stack_protect_combined_test (x, y, label);
+    }
+   else
+-    y = const0_rtx;
+    {
+      if (guard_decl)
+	y = expand_normal (guard_decl);
+      else
+	y = const0_rtx;
+
+      /* Allow the target to compare Y with X without leaking either into
+	 a register.  */
+      if (targetm.have_stack_protect_test ())
+	seq = targetm.gen_stack_protect_test (x, y, label);
+    }
+ 
+-  /* Allow the target to compare Y with X without leaking either into
+-     a register.  */
+-  if (targetm.have_stack_protect_test ()
+-      && ((seq = targetm.gen_stack_protect_test (x, y, label)) != NULL_RTX))
+  if (seq)
+     emit_insn (seq);
+   else
+     emit_cmp_and_jump_insns (x, y, EQ, NULL_RTX, ptr_mode, 1, label);
+diff -urpN a/gcc/genpreds.c b/gcc/genpreds.c
+--- a/gcc/genpreds.c	2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/genpreds.c	2019-05-30 11:53:14.163156595 +0800
+@@ -1581,7 +1581,8 @@ write_insn_preds_c (void)
+ #include \"reload.h\"\n\
+ #include \"regs.h\"\n\
+ #include \"emit-rtl.h\"\n\
+-#include \"tm-constrs.h\"\n");
+#include \"tm-constrs.h\"\n\
+#include \"target.h\"\n");
+ 
+   FOR_ALL_PREDICATES (p)
+     write_one_predicate_function (p);
+diff -urpN a/gcc/target-insns.def b/gcc/target-insns.def
+--- a/gcc/target-insns.def	2019-05-30 16:58:45.362508769 +0800
+++ b/gcc/target-insns.def	2019-05-30 11:52:58.495157149 +0800
+@@ -96,7 +96,9 @@ DEF_TARGET_INSN (sibcall_value, (rtx x0,
+ DEF_TARGET_INSN (simple_return, (void))
+ DEF_TARGET_INSN (split_stack_prologue, (void))
+ DEF_TARGET_INSN (split_stack_space_check, (rtx x0, rtx x1))
+DEF_TARGET_INSN (stack_protect_combined_set, (rtx x0, rtx x1))
+ DEF_TARGET_INSN (stack_protect_set, (rtx x0, rtx x1))
+DEF_TARGET_INSN (stack_protect_combined_test, (rtx x0, rtx x1, rtx x2))
+ DEF_TARGET_INSN (stack_protect_test, (rtx x0, rtx x1, rtx x2))
+ DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
+ DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
--- a/CVE-2019-15847.patch
+++ b/CVE-2019-15847.patch
@ -0,0 +1,51 @@
+diff -urpN a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
+--- a/gcc/config/rs6000/altivec.md	2018-01-15 01:47:30.483964000 +0800
+++ b/gcc/config/rs6000/altivec.md	2019-09-09 00:01:25.770835633 +0800
+@@ -74,9 +74,6 @@
+    UNSPEC_VUNPACK_LO_SIGN_DIRECT
+    UNSPEC_VUPKHPX
+    UNSPEC_VUPKLPX
+-   UNSPEC_DARN
+-   UNSPEC_DARN_32
+-   UNSPEC_DARN_RAW
+    UNSPEC_DST
+    UNSPEC_DSTT
+    UNSPEC_DSTST
+@@ -3770,21 +3767,21 @@
+ 
+ (define_insn "darn_32"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+-        (unspec:SI [(const_int 0)] UNSPEC_DARN_32))]
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_DARN_32))]
+   "TARGET_P9_MISC"
+   "darn %0,0"
+   [(set_attr "type" "integer")])
+ 
+ (define_insn "darn_raw"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+-        (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))]
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN_RAW))]
+   "TARGET_P9_MISC && TARGET_64BIT"
+   "darn %0,2"
+   [(set_attr "type" "integer")])
+ 
+ (define_insn "darn"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+-        (unspec:DI [(const_int 0)] UNSPEC_DARN))]
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN))]
+   "TARGET_P9_MISC && TARGET_64BIT"
+   "darn %0,1"
+   [(set_attr "type" "integer")])
+diff -urpN a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
+--- a/gcc/config/rs6000/rs6000.md	2018-01-21 21:32:58.843504000 +0800
+++ b/gcc/config/rs6000/rs6000.md	2019-09-08 23:53:13.122859153 +0800
+@@ -163,6 +163,9 @@
+    UNSPECV_EH_RR		; eh_reg_restore
+    UNSPECV_ISYNC		; isync instruction
+    UNSPECV_MFTB			; move from time base
+   UNSPECV_DARN			; darn 1 (deliver a random number)
+   UNSPECV_DARN_32		; darn 2
+   UNSPECV_DARN_RAW		; darn 0
+    UNSPECV_NLGR			; non-local goto receiver
+    UNSPECV_MFFS			; Move from FPSCR
+    UNSPECV_MTFSF		; Move to FPSCR Fields
--- a/aarch64-fix-tls-negative-offset.patch
+++ b/aarch64-fix-tls-negative-offset.patch
@ -0,0 +1,24 @@
+diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2018-10-09 11:49:19.000000000 +0800
+++ b/gcc/config/aarch64/aarch64.c	2018-10-09 13:42:15.000000000 +0800
+@@ -1619,7 +1619,7 @@ aarch64_load_symref_appropriately (rtx d
+     case SYMBOL_SMALL_TLSDESC:
+       {
+ 	machine_mode mode = GET_MODE (dest);
+-	rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
+	rtx x0 = gen_rtx_REG (ptr_mode, R0_REGNUM);
+ 	rtx tp;
+ 
+ 	gcc_assert (mode == Pmode || mode == ptr_mode);
+@@ -1635,6 +1635,11 @@ aarch64_load_symref_appropriately (rtx d
+ 	if (mode != Pmode)
+ 	  tp = gen_lowpart (mode, tp);
+ 
+	if (mode != ptr_mode)
+	  {
+	    x0 = force_reg (mode, gen_rtx_SIGN_EXTEND (mode, x0));
+	  }
+
+ 	emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
+ 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+ 	return;
--- a/aarch64-ilp32-call-addr-dimode.patch
+++ b/aarch64-ilp32-call-addr-dimode.patch
@ -0,0 +1,31 @@
+diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+--- a/gcc/config/aarch64/aarch64.md	2018-10-09 11:30:50.000000000 +0800
+++ b/gcc/config/aarch64/aarch64.md	2018-10-09 11:52:54.000000000 +0800
+@@ -857,6 +857,13 @@
+ 	: !REG_P (callee))
+       XEXP (operands[0], 0) = force_reg (Pmode, callee);
+ 
+  if (TARGET_ILP32
+    && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+    && GET_MODE (XEXP (operands[0], 0)) == SImode)
+      XEXP (operands[0], 0) = convert_memory_address (DImode,
+		XEXP (operands[0], 0));
+
+
+     if (operands[2] == NULL_RTX)
+       operands[2] = const0_rtx;
+ 
+@@ -889,6 +896,13 @@
+ 	: !REG_P (callee))
+       XEXP (operands[1], 0) = force_reg (Pmode, callee);
+ 
+    if (TARGET_ILP32
+    && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+    && GET_MODE (XEXP (operands[1], 0)) == SImode)
+      XEXP (operands[1], 0) = convert_memory_address (DImode,
+		XEXP (operands[1], 0));
+
+
+     if (operands[3] == NULL_RTX)
+       operands[3] = const0_rtx;
+ 
--- a/add-tsv110-pipeline-scheduling.patch
+++ b/add-tsv110-pipeline-scheduling.patch
@ -0,0 +1,780 @@
+diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2019-04-15 14:50:25.866378665 +0800
+++ b/gcc/config/aarch64/aarch64.c	2019-04-15 14:49:21.986376983 +0800
+@@ -554,6 +554,31 @@ static const struct tune_params generic_
+   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+ };
+ 
+static const struct tune_params tsv110_tunings =
+{
+  &cortexa57_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  4, /* memmov_cost  */
+  4, /* issue_rate  */
+  AARCH64_FUSE_NOTHING, /* fusible_ops  */
+  16,	/* function_align.  */
+  16,	/* jump_align.  */
+  8,	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
+  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+};
+
+ static const struct tune_params cortexa35_tunings =
+ {
+   &cortexa53_extra_costs,
+diff -urpN a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+--- a/gcc/config/aarch64/aarch64-cores.def	2017-02-15 08:09:28.845771000 +0800
+++ b/gcc/config/aarch64/aarch64-cores.def	2019-04-15 14:49:21.986376983 +0800
+@@ -78,6 +78,8 @@ AARCH64_CORE("xgene1",      xgene1,    x
+ AARCH64_CORE("thunderx2t99p1",  thunderx2t99p1, thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ AARCH64_CORE("vulcan",  vulcan, thunderx2t99, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+ 
+AARCH64_CORE("tsv110", tsv110, tsv110, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, tsv110, 0x48, 0xd01, -1)
+
+ /* V8 big.LITTLE implementations.  */
+ 
+ AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+--- a/gcc/config/aarch64/aarch64.md	2019-04-15 14:50:25.870378665 +0800
+++ b/gcc/config/aarch64/aarch64.md	2019-04-15 14:49:21.986376983 +0800
+@@ -226,6 +226,7 @@
+ (include "thunderx.md")
+ (include "../arm/xgene1.md")
+ (include "thunderx2t99.md")
+(include "tsv110.md")
+ 
+ ;; -------------------------------------------------------------------
+ ;; Jumps and other miscellaneous insns
+diff -urpN a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
+--- a/gcc/config/aarch64/aarch64-tune.md	2017-02-15 08:09:28.845771000 +0800
+++ b/gcc/config/aarch64/aarch64-tune.md	2019-04-15 14:49:21.986376983 +0800
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,tsv110,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+ 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
+--- a/gcc/config/aarch64/tsv110.md	1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/config/aarch64/tsv110.md	2019-04-15 14:55:30.420081420 +0800
+@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+  "neon_arith_acc, neon_arith_acc_q,
+   neon_arith_basic, neon_arith_complex,
+   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+   neon_shift_imm_complex,
+   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+   neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+   neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+   neon_bitops, neon_bitops_q, neon_from_gp,
+   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+   neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+   unknown"
+  (cond [
+	  (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+			   neon_reduc_add_acc_q")
+	    (const_string "neon_arith_acc")
+	  (eq_attr "type" "neon_arith_acc_q")
+	    (const_string "neon_arith_acc_q")
+	  (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+			   neon_add_widen, neon_neg, neon_neg_q,\
+			   neon_reduc_add, neon_reduc_add_q,\
+			   neon_reduc_add_long, neon_sub, neon_sub_q,\
+			   neon_sub_long, neon_sub_widen, neon_logic,\
+			   neon_logic_q, neon_tst, neon_tst_q,\
+			   neon_compare, neon_compare_q,\
+			   neon_compare_zero, neon_compare_zero_q,\
+			   neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+			   neon_reduc_minmax_q")
+	    (const_string "neon_arith_basic")
+	  (eq_attr "type" "neon_add_halve_narrow_q,\
+			   neon_add_halve, neon_add_halve_q,\
+			   neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+			   neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+			   neon_qneg_q, neon_qsub, neon_qsub_q,\
+			   neon_sub_halve_narrow_q")
+	    (const_string "neon_arith_complex")
+
+	  (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+			   neon_mul_h_scalar, neon_mul_s_scalar,\
+			   neon_sat_mul_b, neon_sat_mul_h,\
+			   neon_sat_mul_s, neon_sat_mul_h_scalar,\
+			   neon_sat_mul_s_scalar,\
+			   neon_mul_b_long, neon_mul_h_long,\
+			   neon_mul_s_long,\
+			   neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
+			   neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+			   neon_sat_mul_s_scalar_long,\
+			   neon_mla_b, neon_mla_h, neon_mla_s,\
+			   neon_mla_h_scalar, neon_mla_s_scalar,\
+			   neon_mla_b_long, neon_mla_h_long,\
+			   neon_mla_s_long,\
+			   neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+			   neon_sat_mla_b_long, neon_sat_mla_h_long,\
+			   neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+			   neon_sat_mla_s_scalar_long")
+	    (const_string "neon_multiply")
+	  (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+			   neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+			   neon_sat_mul_b_q, neon_sat_mul_h_q,\
+			   neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+			   neon_sat_mul_s_scalar_q,\
+			   neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+			   neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+	    (const_string "neon_multiply_q")
+
+	  (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+	    (const_string "neon_shift_acc")
+	  (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+			   neon_shift_imm_narrow_q, neon_shift_imm_long")
+	    (const_string "neon_shift_imm_basic")
+	  (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+			   neon_sat_shift_imm_narrow_q")
+	    (const_string "neon_shift_imm_complex")
+	  (eq_attr "type" "neon_shift_reg")
+	    (const_string "neon_shift_reg_basic")
+	  (eq_attr "type" "neon_shift_reg_q")
+	    (const_string "neon_shift_reg_basic_q")
+	  (eq_attr "type" "neon_sat_shift_reg")
+	    (const_string "neon_shift_reg_complex")
+	  (eq_attr "type" "neon_sat_shift_reg_q")
+	    (const_string "neon_shift_reg_complex_q")
+
+	  (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+			   neon_fp_abs_s, neon_fp_abs_s_q,\
+			   neon_fp_neg_d, neon_fp_neg_d_q,\
+			   neon_fp_abs_d, neon_fp_abs_d_q,\
+			   neon_fp_minmax_s,neon_fp_minmax_d,\
+			   neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+	    (const_string "neon_fp_negabs")
+	  (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+			   neon_fp_reduc_add_s, neon_fp_compare_s,\
+			   neon_fp_round_s,\
+			   neon_fp_addsub_d, neon_fp_abd_d,\
+			   neon_fp_reduc_add_d, neon_fp_compare_d,\
+			   neon_fp_round_d")
+	    (const_string "neon_fp_arith")
+	  (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+			   neon_fp_minmax_s_q, neon_fp_round_s_q,\
+			   neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+			   neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+			   neon_fp_minmax_d_q, neon_fp_round_d_q")
+	    (const_string "neon_fp_arith_q")
+	  (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+			   neon_fp_reduc_minmax_d_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+	    (const_string "neon_fp_reductions_q")
+	  (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+			   neon_fp_to_int_d, neon_int_to_fp_d")
+	    (const_string "neon_fp_cvt_int")
+	  (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+			   neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+	    (const_string "neon_fp_cvt_int_q")
+	  (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+	    (const_string "neon_fp_cvt16")
+	  (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+			   neon_fp_mul_d")
+	    (const_string "neon_fp_mul")
+	  (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+			   neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+	    (const_string "neon_fp_mul_q")
+	  (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+			   neon_fp_mla_d")
+	    (const_string "neon_fp_mla")
+	  (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+			   neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+	    (const_string "neon_fp_mla_q")
+	  (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+			   neon_fp_recpx_s,\
+			   neon_fp_recpe_d, neon_fp_rsqrte_d,\
+			   neon_fp_recpx_d")
+	    (const_string "neon_fp_recpe_rsqrte")
+	  (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+			   neon_fp_recpx_s_q,\
+			   neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+			   neon_fp_recpx_d_q")
+	    (const_string "neon_fp_recpe_rsqrte_q")
+	  (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+			   neon_fp_recps_d, neon_fp_rsqrts_d")
+	    (const_string "neon_fp_recps_rsqrts")
+	  (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+			   neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+	    (const_string "neon_fp_recps_rsqrts_q")
+	  (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+			   neon_rev, neon_permute, neon_rbit,\
+			   neon_tbl1, neon_tbl2, neon_zip,\
+			   neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+			   neon_move, neon_move_q, neon_move_narrow_q")
+	    (const_string "neon_bitops")
+	  (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+			   neon_rev_q, neon_permute_q, neon_rbit_q")
+	    (const_string "neon_bitops_q")
+	  (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+	    (const_string "neon_from_gp")
+	  (eq_attr "type" "neon_from_gp_q")
+	    (const_string "neon_from_gp_q")
+
+	  (eq_attr "type" "f_loads, f_loadd,\
+			   neon_load1_1reg, neon_load1_1reg_q,\
+			   neon_load1_2reg, neon_load1_2reg_q")
+	    (const_string "neon_load_a")
+	  (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+			   neon_load1_4reg, neon_load1_4reg_q")
+	    (const_string "neon_load_b")
+	  (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+			   neon_load1_all_lanes, neon_load1_all_lanes_q,\
+			   neon_load2_2reg, neon_load2_2reg_q,\
+			   neon_load2_all_lanes, neon_load2_all_lanes_q")
+	    (const_string "neon_load_c")
+	  (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+			   neon_load3_3reg, neon_load3_3reg_q,\
+			   neon_load3_one_lane, neon_load3_one_lane_q,\
+			   neon_load4_4reg, neon_load4_4reg_q")
+	    (const_string "neon_load_d")
+	  (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+			   neon_load3_all_lanes, neon_load3_all_lanes_q,\
+			   neon_load4_all_lanes, neon_load4_all_lanes_q")
+	    (const_string "neon_load_e")
+	  (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+	    (const_string "neon_load_f")
+
+	  (eq_attr "type" "f_stores, f_stored,\
+			   neon_store1_1reg")
+	    (const_string "neon_store_a")
+	  (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+	    (const_string "neon_store_b")
+	  (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+			   neon_store3_3reg, neon_store3_3reg_q,\
+			   neon_store2_4reg, neon_store2_4reg_q,\
+			   neon_store4_4reg, neon_store4_4reg_q,\
+			   neon_store2_2reg, neon_store2_2reg_q,\
+			   neon_store3_one_lane, neon_store3_one_lane_q,\
+			   neon_store4_one_lane, neon_store4_one_lane_q,\
+			   neon_store1_4reg, neon_store1_4reg_q,\
+			   neon_store1_one_lane, neon_store1_one_lane_q,\
+			   neon_store2_one_lane, neon_store2_one_lane_q")
+	    (const_string "neon_store_complex")]
+	  (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1.  Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2.  One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3.  Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+		 "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+		 "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4.  Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5.  Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+				  + tsv110_mdu_issue + tsv110_alu1_issue
+				  + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "alu_imm,logic_imm,\
+			alu_sreg,logic_reg,\
+			adc_imm,adc_reg,\
+			adr,bfm,clz,rbit,rev,\
+			shift_imm,shift_reg,\
+			mov_imm,mov_reg,\
+			mvn_imm,mvn_reg,\
+			mrs,multiple,no_insn"))
+  "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+  
+(define_insn_reservation "tsv110_alus" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "alus_imm,logics_imm,\
+			alus_sreg,logics_reg,\
+			adcs_imm,adcs_reg"))
+  "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "extend,\
+			alu_shift_imm,alu_shift_reg,\
+			crc,logic_shift_imm,logic_shift_reg,\
+			mov_shift,mvn_shift,\
+			mov_shift_reg,mvn_shift_reg"))
+  "tsv110_mdu")
+  
+(define_insn_reservation "tsv110_alus_shift" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+			logics_shift_imm,logics_shift_reg"))
+  "tsv110_alu2|tsv110_alu3")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+  (and (eq_attr "tune" "tsv110")
+       (ior (eq_attr "mul32" "yes")
+	    (eq_attr "mul64" "yes")))
+  "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "udiv,sdiv"))
+  "tsv110_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "block"))
+  "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "branch"))
+  "tsv110_alu2|tsv110_alu3")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "load1,load2"))
+  "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "store1,store2"))
+  "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation  "tsv110_neon_abd_aba" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_abd,neon_arith_acc"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation  "tsv110_neon_abd_aba_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_arith_acc_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation  "tsv110_neon_arith_basic" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation  "tsv110_neon_arith_complex" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_multiply"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_mul_d_long"))
+  "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+  "tsv110_fsu1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_shift_acc" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+	   neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+	   neon_shift_reg_complex"))
+  "tsv110_fsu1")
+
+(define_insn_reservation
+  "tsv110_neon_shift_acc_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+	   neon_shift_reg_complex_q"))
+  "tsv110_fsu1")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_fp_negabs" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+  "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_fp_arith" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+  "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_fp_arith_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+  
+(define_insn_reservation
+  "tsv110_neon_fp_minmax_q" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_reductions_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_cvt_int" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mul" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mul_q" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mla" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+	   neon_fp_recps_rsqrts"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_recpe_rsqrte" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_mla_q" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+	   neon_fp_recps_rsqrts_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_fp_recpe_rsqrte_q" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_bitops" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_bitops"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_dup" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_from_gp,f_mcr"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_mov" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_mcrr"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_bitops_q" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_from_gp_q" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+  "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_to_gp" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+  "tsv110_fsu1")
+
+;; Load Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_ld1_lane" 8
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+	   neon_load1_all_lanes,neon_load1_all_lanes_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg1" 6
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg2" 6
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg3" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld1_reg4" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+  "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+  "tsv110_neon_ld2" 8
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+	   neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+	   neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld3" 9
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+	   neon_load3_one_lane,neon_load3_one_lane_q,\
+	   neon_load3_all_lanes,neon_load3_all_lanes_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld4_lane" 9
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+	   neon_load4_one_lane,neon_load4_one_lane_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+  "tsv110_neon_ld4_reg" 11
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+	   neon_load4_one_lane,neon_load4_one_lane_q"))
+  "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+;; Store Instructions.
+
+(define_insn_reservation
+  "tsv110_neon_store_a" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_store_a"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+  "tsv110_neon_store_b" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_store_b"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+  "tsv110_neon_store_complex" 0
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "tsv110_neon_type" "neon_store_complex"))
+  "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fconsts,fconstd,fmov"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_cvt"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_cvtf2i"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "f_cvti2f"))
+  "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fcmps,fcmpd"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "ffariths,ffarithd"))
+  "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+	   neon_fp_div_s_q,neon_fp_div_d_q"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_sqrts" 24
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+	   neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+  "tsv110_fsu2")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_aese,crypto_aesmc"))
+  "tsv110_fsu1")
+  
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+  "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_sha256_fast"))
+  "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+  "tsv110_fsu1")
+
+;; We lie with calls.  They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+  (and (eq_attr "tune" "tsv110")
+       (eq_attr "type" "call"))
+  "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+    +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+	         "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+	         "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+		 "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+		 "tsv110_call,tsv110_branch")
--- a/arm-adjust-be-ldrd-strd.patch
+++ b/arm-adjust-be-ldrd-strd.patch
@ -0,0 +1,60 @@
+diff -urp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
+--- a/gcc/config/arm/arm.c	2019-01-18 11:25:20.840179114 +0800
+++ b/gcc/config/arm/arm.c	2019-01-18 11:25:47.548179817 +0800
+@@ -14306,18 +14306,36 @@ gen_movmem_ldrd_strd (rtx *operands)
+         emit_move_insn (reg0, src);
+       else
+ 	{
+-	  emit_insn (gen_unaligned_loadsi (low_reg, src));
+-	  src = next_consecutive_mem (src);
+-	  emit_insn (gen_unaligned_loadsi (hi_reg, src));
+	  if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
+	    {
+	      emit_insn (gen_unaligned_loadsi (hi_reg, src));
+	      src = next_consecutive_mem (src);
+	      emit_insn (gen_unaligned_loadsi (low_reg, src));
+	    }
+	  else
+	    {
+	      emit_insn (gen_unaligned_loadsi (low_reg, src));
+	      src = next_consecutive_mem (src);
+	      emit_insn (gen_unaligned_loadsi (hi_reg, src));
+	    }
+ 	}
+ 
+       if (dst_aligned)
+         emit_move_insn (dst, reg0);
+       else
+ 	{
+-	  emit_insn (gen_unaligned_storesi (dst, low_reg));
+-	  dst = next_consecutive_mem (dst);
+-	  emit_insn (gen_unaligned_storesi (dst, hi_reg));
+	  if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
+	    {
+	      emit_insn (gen_unaligned_storesi (dst, hi_reg));
+	      dst = next_consecutive_mem (dst);
+	      emit_insn (gen_unaligned_storesi (dst, low_reg));
+	    }
+	  else
+	    {
+	      emit_insn (gen_unaligned_storesi (dst, low_reg));
+	      dst = next_consecutive_mem (dst);
+	      emit_insn (gen_unaligned_storesi (dst, hi_reg));
+	    }
+ 	}
+ 
+       src = next_consecutive_mem (src);
+diff -urp a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
+--- a/gcc/config/arm/arm.opt	2019-01-18 11:25:20.840179114 +0800
+++ b/gcc/config/arm/arm.opt	2019-01-18 11:28:51.744184666 +0800
+@@ -274,6 +274,10 @@ masm-syntax-unified
+ Target Report Var(inline_asm_unified) Init(0) Save
+ Assume unified syntax for inline assembly code.
+ 
+mlsrd-be-adjust
+Target Report Var(flag_lsrd_be_adjust) Init(1)
+Adjust ldrd/strd splitting order when it's big-endian.
+
+ mpure-code
+ Target Report Var(target_pure_code) Init(0)
+ Do not allow constant data to be placed in code sections.
--- a/arm-bigendian-disable-interleaved-LS-vectorize.patch
+++ b/arm-bigendian-disable-interleaved-LS-vectorize.patch
@ -0,0 +1,19 @@
+diff -urpN gcc-7.3.0-bak/gcc/config/arm/arm.c gcc-7.3.0/gcc/config/arm/arm.c
+--- gcc-7.3.0-bak/gcc/config/arm/arm.c	2018-11-13 14:23:21.362347728 +0800
+++ gcc-7.3.0/gcc/config/arm/arm.c	2018-11-13 14:31:15.722360215 +0800
+@@ -26853,7 +26853,14 @@ static bool
+ arm_array_mode_supported_p (machine_mode mode,
+ 			    unsigned HOST_WIDE_INT nelems)
+ {
+-  if (TARGET_NEON
+
+
+  /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
+    for now, as the lane-swapping logic needs to be extended in the expanders.
+    See PR target/82518.  */
+
+
+  if (TARGET_NEON && !BYTES_BIG_ENDIAN
+       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+       && (nelems >= 2 && nelems <= 4))
+     return true;
--- a/arm-fix-push-minipool.patch
+++ b/arm-fix-push-minipool.patch
@ -0,0 +1,25 @@
+diff -Nurp a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
+--- a/gcc/config/arm/arm.md	2019-08-10 00:21:12.658523444 +0800
+++ b/gcc/config/arm/arm.md	2019-08-10 00:21:53.478521496 +0800
+@@ -5337,7 +5337,9 @@
+    #
+    ldrh%?\\t%0, %1"
+   [(set_attr "type" "alu_shift_reg,load_byte")
+-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+ )
+ 
+ (define_insn "*arm_zero_extendhisi2_v6"
+@@ -5348,7 +5350,9 @@
+    uxth%?\\t%0, %1
+    ldrh%?\\t%0, %1"
+   [(set_attr "predicable" "yes")
+-   (set_attr "type" "extend,load_byte")]
+   (set_attr "type" "extend,load_byte")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+ )
+ 
+ (define_insn "*arm_zero_extendhisi2addsi"
--- a/cloog-0.18.4.tar.gz
+++ b/cloog-0.18.4.tar.gz
--- a/constructor-priority-bugfix.patch
+++ b/constructor-priority-bugfix.patch
@ -0,0 +1,21 @@
+diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2018-11-16 18:02:11.000000000 +0800
+++ b/gcc/config/aarch64/aarch64.c	2018-11-16 18:07:39.000000000 +0800
+@@ -6102,7 +6102,7 @@ aarch64_elf_asm_constructor (rtx symbol,
+          -Wformat-truncation false positive, use a larger size.  */
+       char buf[23];
+       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
+-      s = get_section (buf, SECTION_WRITE, NULL);
+      s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
+       switch_to_section (s);
+       assemble_align (POINTER_SIZE);
+       assemble_aligned_integer (POINTER_BYTES, symbol);
+@@ -6122,7 +6122,7 @@ aarch64_elf_asm_destructor (rtx symbol,
+          -Wformat-truncation false positive, use a larger size.  */
+       char buf[23];
+       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
+-      s = get_section (buf, SECTION_WRITE, NULL);
+      s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
+       switch_to_section (s);
+       assemble_align (POINTER_SIZE);
+       assemble_aligned_integer (POINTER_BYTES, symbol);
--- a/fix-operand-size-mismatch-for-i386-sse.patch
+++ b/fix-operand-size-mismatch-for-i386-sse.patch
@ -0,0 +1,155 @@
+diff -N -urp a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+--- a/gcc/config/i386/sse.md	2019-10-30 10:02:45.894920908 +0800
+++ b/gcc/config/i386/sse.md	2019-10-30 10:17:39.682887612 +0800
+@@ -16012,9 +16012,11 @@
+   switch (INTVAL (operands[4]))
+     {
+     case 3:
+-      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+        gas changed what it requires incompatibly.  */
+      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+     case 2:
+-      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+     default:
+       gcc_unreachable ();
+     }
+@@ -16057,9 +16059,11 @@
+   switch (INTVAL (operands[4]))
+     {
+     case 3:
+-      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+        gas changed what it requires incompatibly.  */
+      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+     case 2:
+-      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+     default:
+       gcc_unreachable ();
+     }
+@@ -16103,10 +16107,12 @@
+     {
+     case 3:
+     case 7:
+-      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+        gas changed what it requires incompatibly.  */
+      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+     case 2:
+     case 6:
+-      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
+     default:
+       gcc_unreachable ();
+     }
+@@ -16150,10 +16156,12 @@
+     {
+     case 3:
+     case 7:
+-      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+       gas changed what it requires incompatibly.  */
+      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+     case 2:
+     case 6:
+-      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
+     default:
+       gcc_unreachable ();
+     }
+@@ -19153,12 +19161,6 @@
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "<sseinsnmode>")])
+ 
+-;; Memory operand override for -masm=intel of the v*gatherq* patterns.
+-(define_mode_attr gatherq_mode
+-  [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
+-   (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
+-   (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
+-
+ (define_expand "<avx512>_gathersi<mode>"
+   [(parallel [(set (match_operand:VI48F 0 "register_operand")
+ 		   (unspec:VI48F
+@@ -19192,7 +19194,9 @@
+ 	  UNSPEC_GATHER))
+    (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
+   "TARGET_AVX512F"
+-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
+   [(set_attr "type" "ssemov")
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<sseinsnmode>")])
+@@ -19211,7 +19215,9 @@
+ 	  UNSPEC_GATHER))
+    (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
+   "TARGET_AVX512F"
+-  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
+   [(set_attr "type" "ssemov")
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<sseinsnmode>")])
+@@ -19250,9 +19256,9 @@
+ 	  UNSPEC_GATHER))
+    (clobber (match_scratch:QI 2 "=&Yk"))]
+   "TARGET_AVX512F"
+-{
+-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
+-}
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
+   [(set_attr "type" "ssemov")
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<sseinsnmode>")])
+@@ -19272,14 +19278,16 @@
+    (clobber (match_scratch:QI 1 "=&Yk"))]
+   "TARGET_AVX512F"
+ {
+  /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+    gas changed what it requires incompatibly.  */
+   if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+     {
+       if (<MODE_SIZE> != 64)
+-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
+	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
+       else
+-	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
+	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
+     }
+-  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
+  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
+ }
+   [(set_attr "type" "ssemov")
+    (set_attr "prefix" "evex")
+@@ -19316,7 +19324,9 @@
+ 	  UNSPEC_SCATTER))
+    (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
+   "TARGET_AVX512F"
+-  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+   [(set_attr "type" "ssemov")
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<sseinsnmode>")])
+@@ -19352,11 +19362,9 @@
+ 	  UNSPEC_SCATTER))
+    (clobber (match_scratch:QI 1 "=&Yk"))]
+   "TARGET_AVX512F"
+-{
+-  if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
+-    return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
+-  return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
+-}
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
+;; gas changed what it requires incompatibly.
+  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
+   [(set_attr "type" "ssemov")
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<sseinsnmode>")])
--- a/floop-interchange.patch
+++ b/floop-interchange.patch
--- a/floop-unroll-and-jam.patch
+++ b/floop-unroll-and-jam.patch
@ -0,0 +1,905 @@
+diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in
+--- a/gcc/Makefile.in	2018-11-07 11:37:24.615223860 +0800
+++ b/gcc/Makefile.in	2018-11-07 11:38:26.155223860 +0800
+@@ -1292,6 +1292,7 @@ OBJS = \
+ 	gimple-iterator.o \
+ 	gimple-fold.o \
+ 	gimple-laddress.o \
+	gimple-loop-jam.o \
+ 	gimple-low.o \
+ 	gimple-pretty-print.o \
+ 	gimple-ssa-backprop.o \
+diff -N -urp a/gcc/cfgloop.c b/gcc/cfgloop.c
+--- a/gcc/cfgloop.c	2018-11-07 11:37:24.947223860 +0800
+++ b/gcc/cfgloop.c	2018-11-07 11:38:26.155223860 +0800
+@@ -296,13 +296,25 @@ establish_preds (struct loop *loop, stru
+ 
+ /* Add LOOP to the loop hierarchy tree where FATHER is father of the
+    added loop.  If LOOP has some children, take care of that their
+-   pred field will be initialized correctly.  */
+   pred field will be initialized correctly.  If AFTER is non-null
+   then it's expected it's a pointer into FATHERs inner sibling
+   list and LOOP is added behind AFTER, otherwise it's added in front
+   of FATHERs siblings.  */
+ 
+ void
+-flow_loop_tree_node_add (struct loop *father, struct loop *loop)
+flow_loop_tree_node_add (struct loop *father, struct loop *loop,
+					      struct loop *after)
+ {
+-  loop->next = father->inner;
+-  father->inner = loop;
+  if (after)
+    {
+      loop->next = after->next;
+      after->next = loop;
+    }
+  else
+    {
+      loop->next = father->inner;
+      father->inner = loop;
+    }
+ 
+   establish_preds (loop, father);
+ }
+diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h
+--- a/gcc/cfgloop.h	2018-11-07 11:37:24.331223860 +0800
+++ b/gcc/cfgloop.h	2018-11-07 11:38:26.155223860 +0800
+@@ -324,7 +324,8 @@ void record_loop_exits (void);
+ void rescan_loop_exit (edge, bool, bool);
+ 
+ /* Loop data structure manipulation/querying.  */
+-extern void flow_loop_tree_node_add (struct loop *, struct loop *);
+extern void flow_loop_tree_node_add (struct loop *, struct loop *,
+						    struct loop * = NULL);
+ extern void flow_loop_tree_node_remove (struct loop *);
+ extern bool flow_loop_nested_p	(const struct loop *, const struct loop *);
+ extern bool flow_bb_inside_loop_p (const struct loop *, const_basic_block);
+diff -N -urp a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
+--- a/gcc/cfgloopmanip.c	2018-11-07 11:37:24.847223860 +0800
+++ b/gcc/cfgloopmanip.c	2018-11-07 11:38:26.155223860 +0800
+@@ -1026,9 +1026,11 @@ copy_loop_info (struct loop *loop, struc
+ }
+ 
+ /* Copies copy of LOOP as subloop of TARGET loop, placing newly
+-   created loop into loops structure.  */
+   created loop into loops structure.  If AFTER is non-null
+   the new loop is added at AFTER->next, otherwise in front of TARGETs
+   sibling list.  */
+ struct loop *
+-duplicate_loop (struct loop *loop, struct loop *target)
+duplicate_loop (struct loop *loop, struct loop *target, struct loop *after)
+ {
+   struct loop *cloop;
+   cloop = alloc_loop ();
+@@ -1040,36 +1042,46 @@ duplicate_loop (struct loop *loop, struc
+   set_loop_copy (loop, cloop);
+ 
+   /* Add it to target.  */
+-  flow_loop_tree_node_add (target, cloop);
+  flow_loop_tree_node_add (target, cloop, after);
+ 
+   return cloop;
+ }
+ 
+ /* Copies structure of subloops of LOOP into TARGET loop, placing
+-   newly created loops into loop tree.  */
+   newly created loops into loop tree at the end of TARGETs sibling
+   list in the original order.  */
+ void
+ duplicate_subloops (struct loop *loop, struct loop *target)
+ {
+-  struct loop *aloop, *cloop;
+  struct loop *aloop, *cloop, *tail;
+ 
+  for (tail = target->inner; tail && tail->next; tail = tail->next)
+    ;
+   for (aloop = loop->inner; aloop; aloop = aloop->next)
+     {
+-      cloop = duplicate_loop (aloop, target);
+      cloop = duplicate_loop (aloop, target, tail);
+      tail = cloop;
+      gcc_assert (!tail->next);
+       duplicate_subloops (aloop, cloop);
+     }
+ }
+ 
+ /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
+-   into TARGET loop, placing newly created loops into loop tree.  */
+   into TARGET loop, placing newly created loops into loop tree adding
+   them to TARGETs sibling list at the end in order.  */
+ static void
+ copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
+ {
+-  struct loop *aloop;
+  struct loop *aloop, *tail;
+   int i;
+ 
+  for (tail = target->inner; tail && tail->next; tail = tail->next)
+    ;
+   for (i = 0; i < n; i++)
+     {
+-      aloop = duplicate_loop (copied_loops[i], target);
+      aloop = duplicate_loop (copied_loops[i], target, tail);
+      tail = aloop;
+      gcc_assert (!tail->next);
+       duplicate_subloops (copied_loops[i], aloop);
+     }
+ }
+@@ -1133,14 +1145,15 @@ set_zero_probability (edge e)
+ }
+ 
+ /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
+-   loop structure and dominators.  E's destination must be LOOP header for
+-   this to work, i.e. it must be entry or latch edge of this loop; these are
+-   unique, as the loops must have preheaders for this function to work
+-   correctly (in case E is latch, the function unrolls the loop, if E is entry
+-   edge, it peels the loop).  Store edges created by copying ORIG edge from
+-   copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
+-   original LOOP body, the other copies are numbered in order given by control
+-   flow through them) into TO_REMOVE array.  Returns false if duplication is
+   loop structure and dominators (order of inner subloops is retained).
+   E's destination must be LOOP header for this to work, i.e. it must be entry
+   or latch edge of this loop; these are unique, as the loops must have
+   preheaders for this function to work correctly (in case E is latch, the
+   function unrolls the loop, if E is entry edge, it peels the loop).  Store
+   edges created by copying ORIG edge from copies corresponding to set bits in
+   WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
+   are numbered in order given by control flow through them) into TO_REMOVE
+   array.  Returns false if duplication is
+    impossible.  */
+ 
+ bool
+diff -N -urp a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
+--- a/gcc/cfgloopmanip.h	2018-11-07 11:37:24.939223860 +0800
+++ b/gcc/cfgloopmanip.h	2018-11-07 11:38:26.155223860 +0800
+@@ -47,7 +47,8 @@ extern struct loop *loopify (edge, edge,
+ 			     unsigned, unsigned);
+ extern void unloop (struct loop *, bool *, bitmap);
+ extern void copy_loop_info (struct loop *loop, struct loop *target);
+-extern struct loop * duplicate_loop (struct loop *, struct loop *);
+extern struct loop * duplicate_loop (struct loop *, struct loop *,
+						    struct loop * = NULL);
+ extern void duplicate_subloops (struct loop *, struct loop *);
+ extern bool can_duplicate_loop_p (const struct loop *loop);
+ extern bool duplicate_loop_to_header_edge (struct loop *, edge,
+diff -N -urp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2018-11-07 11:37:24.859223860 +0800
+++ b/gcc/common.opt	2018-11-07 11:38:26.159223860 +0800
+@@ -1496,8 +1496,8 @@ Common Alias(floop-nest-optimize)
+ Enable loop nest transforms.  Same as -floop-nest-optimize.
+ 
+ floop-unroll-and-jam
+-Common Alias(floop-nest-optimize)
+-Enable loop nest transforms.  Same as -floop-nest-optimize.
+Common Report Var(flag_unroll_jam) Optimization
+Perform unroll-and-jam on loops.
+ 
+ fgnu-tm
+ Common Report Var(flag_tm)
+diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+--- a/gcc/doc/invoke.texi	2018-11-07 11:37:24.915223860 +0800
+++ b/gcc/doc/invoke.texi	2018-11-07 11:39:49.031223860 +0800
+@@ -7120,7 +7120,8 @@ Optimize yet more.  @option{-O3} turns o
+ by @option{-O2} and also turns on the @option{-finline-functions},
+ @option{-funswitch-loops}, @option{-fpredictive-commoning},
+ @option{-fgcse-after-reload}, @option{-ftree-loop-vectorize},
+-@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths}
+@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths},
+@option{-floop-unroll-and-jam},
+ @option{-ftree-slp-vectorize}, @option{-fvect-cost-model},
+ @option{-ftree-partial-pre}, @option{-fpeel-loops}
+ and @option{-fipa-cp-clone} options.
+@@ -8226,12 +8227,10 @@ at @option{-O} and higher.
+ @itemx -floop-interchange
+ @itemx -floop-strip-mine
+ @itemx -floop-block
+-@itemx -floop-unroll-and-jam
+ @opindex ftree-loop-linear
+ @opindex floop-interchange
+ @opindex floop-strip-mine
+ @opindex floop-block
+-@opindex floop-unroll-and-jam
+ Perform loop nest optimizations.  Same as
+ @option{-floop-nest-optimize}.  To use this code transformation, GCC has
+ to be configured with @option{--with-isl} to enable the Graphite loop
+@@ -8323,6 +8322,12 @@ ENDDO
+ @end smallexample
+ and the initialization loop is transformed into a call to memset zero.
+ 
+@item -floop-unroll-and-jam
+@opindex floop-unroll-and-jam
+Apply unroll and jam transformations on feasible loops.  In a loop
+nest this unrolls the outer loop by some factor and fuses the resulting
+multiple inner loops.  This flag is enabled by default at @option{-O3}.
+
+ @item -ftree-loop-im
+ @opindex ftree-loop-im
+ Perform loop invariant motion on trees.  This pass moves only invariants that
+@@ -10353,13 +10358,13 @@ loop in the loop nest by a given number
+ length can be changed using the @option{loop-block-tile-size}
+ parameter.  The default value is 51 iterations.
+ 
+-@item loop-unroll-jam-size
+-Specify the unroll factor for the @option{-floop-unroll-and-jam} option.  The 
+-default value is 4.
+-
+-@item loop-unroll-jam-depth
+-Specify the dimension to be unrolled (counting from the most inner loop)
+-for the  @option{-floop-unroll-and-jam}.  The default value is 2.
+@item unroll-jam-min-percent
+The minimum percentage of memory references that must be optimized
+away for the unroll-and-jam transformation to be considered profitable.
+
+@item unroll-jam-max-unroll
+The maximum number of times the outer loop should be unrolled by
+the unroll-and-jam transformation.
+ 
+ @item ipa-cp-value-list-size
+ IPA-CP attempts to track all possible values and types passed to a function's
+diff -N -urp a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c
+--- a/gcc/gimple-loop-jam.c	1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/gimple-loop-jam.c	2018-11-07 11:38:26.167223860 +0800
+@@ -0,0 +1,598 @@
+/* Loop unroll-and-jam.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "params.h"
+#include "tree-pass.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "ssa.h"
+#include "fold-const.h"
+#include "tree-cfg.h"
+#include "tree-ssa.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-ssa-loop-manip.h"
+#include "cfgloop.h"
+#include "tree-scalar-evolution.h"
+#include "gimple-iterator.h"
+#include "cfghooks.h"
+#include "tree-data-ref.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-vectorizer.h"
+
+/* Unroll and Jam transformation
+
+   This is a combination of two transformations, where the second
+   is not always valid.  It's applicable if a loop nest has redundancies
+   over the iterations of an outer loop while not having that with
+   an inner loop.
+
+   Given this nest:
+       for (i) {
+	 for (j) {
+	   B (i,j)
+	 }
+       }
+
+   first unroll:
+       for (i by 2) {
+	 for (j) {
+	   B (i,j)
+	 }
+	 for (j) {
+	   B (i+1,j)
+	 }
+       }
+
+   then fuse the two adjacent inner loops resulting from that:
+       for (i by 2) {
+	 for (j) {
+	   B (i,j)
+	   B (i+1,j)
+	 }
+       }
+
+   As the order of evaluations of the body B changes this is valid
+   only in certain situations: all distance vectors need to be forward.
+   Additionally if there are multiple induction variables than just
+   a counting control IV (j above) we can also deal with some situations.
+
+   The validity is checked by unroll_jam_possible_p, and the data-dep
+   testing below.
+
+   A trivial example where the fusion is wrong would be when
+   B (i,j) == x[j-1] = x[j];
+       for (i by 2) {
+	 for (j) {
+	   x[j-1] = x[j];
+	 }
+	 for (j) {
+	   x[j-1] = x[j];
+	 }
+       }  effect: move content to front by two elements
+       -->
+       for (i by 2) {
+	 for (j) {
+	   x[j-1] = x[j];
+	   x[j-1] = x[j];
+	 }
+       }  effect: move content to front by one element
+*/
+
+/* Modify the loop tree for the fact that all code once belonging
+   to the OLD loop or the outer loop of OLD now is inside LOOP.  */
+
+static void
+merge_loop_tree (struct loop *loop, struct loop *old)
+{
+  basic_block *bbs;
+  int i, n;
+  struct loop *subloop;
+  edge e;
+  edge_iterator ei;
+
+  /* Find its nodes.  */
+  bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
+
+  for (i = 0; i < n; i++)
+    {
+      /* If the block was direct child of OLD loop it's now part
+	 of LOOP.  If it was outside OLD, then it moved into LOOP
+	 as well.  This avoids changing the loop father for BBs
+	 in inner loops of OLD.  */
+      if (bbs[i]->loop_father == old
+	  || loop_depth (bbs[i]->loop_father) < loop_depth (old))
+	{
+	  remove_bb_from_loops (bbs[i]);
+	  add_bb_to_loop (bbs[i], loop);
+	  continue;
+	}
+
+      /* If we find a direct subloop of OLD, move it to LOOP.  */
+      subloop = bbs[i]->loop_father;
+      if (loop_outer (subloop) == old && subloop->header == bbs[i])
+	{
+	  flow_loop_tree_node_remove (subloop);
+	  flow_loop_tree_node_add (loop, subloop);
+	}
+    }
+
+  /* Update the information about loop exit edges.  */
+  for (i = 0; i < n; i++)
+    {
+      FOR_EACH_EDGE (e, ei, bbs[i]->succs)
+	{
+	  rescan_loop_exit (e, false, false);
+	}
+    }
+
+  loop->num_nodes = n;
+
+  free (bbs);
+}
+
+/* BB is part of the outer loop of an unroll-and-jam situation.
+   Check if any statements therein would prevent the transformation.  */
+
+static bool
+bb_prevents_fusion_p (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+  /* BB is duplicated by outer unrolling and then all N-1 first copies
+     move into the body of the fused inner loop.  If BB exits the outer loop
+     the last copy still does so, and the first N-1 copies are cancelled
+     by loop unrolling, so also after fusion it's the exit block.
+     But there might be other reasons that prevent fusion:
+       * stores or unknown side-effects prevent fusion
+       * loads don't
+       * computations into SSA names: these aren't problematic.  Their
+	 result will be unused on the exit edges of the first N-1 copies
+	 (those aren't taken after unrolling).  If they are used on the
+	 other edge (the one leading to the outer latch block) they are
+	 loop-carried (on the outer loop) and the Nth copy of BB will
+	 compute them again (i.e. the first N-1 copies will be dead).  */
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *g = gsi_stmt (gsi);
+      if (gimple_vdef (g) || gimple_has_side_effects (g))
+	return true;
+    }
+  return false;
+}
+
+/* Given an inner loop LOOP (of some OUTER loop) determine if
+   we can safely fuse copies of it (generated by outer unrolling).
+   If so return true, otherwise return false.  */
+
+static bool
+unroll_jam_possible_p (struct loop *outer, struct loop *loop)
+{
+  basic_block *bbs;
+  int i, n;
+  struct tree_niter_desc niter;
+
+  /* When fusing the loops we skip the latch block
+     of the first one, so it mustn't have any effects to
+     preserve.  */
+  if (!empty_block_p (loop->latch))
+    return false;
+
+  if (!single_exit (loop))
+    return false;
+
+  /* We need a perfect nest.  Quick check for adjacent inner loops.  */
+  if (outer->inner != loop || loop->next)
+    return false;
+
+  /* Prevent head-controlled inner loops, that we usually have.
+     The guard block would need to be accepted
+     (invariant condition either entering or skipping the loop),
+     without also accepting arbitrary control flow.  When unswitching
+     ran before us (as with -O3) this won't be a problem because its
+     outer loop unswitching will have moved out the invariant condition.
+
+     If we do that we need to extend fuse_loops () to cope with this
+     by threading through the (still invariant) copied condition
+     between the two loop copies.  */
+  if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
+    return false;
+
+  /* The number of iterations of the inner loop must be loop invariant
+     with respect to the outer loop.  */
+  if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
+				 false, true)
+      || niter.cmp == ERROR_MARK
+      || !integer_zerop (niter.may_be_zero)
+      || !expr_invariant_in_loop_p (outer, niter.niter))
+    return false;
+
+  /* If the inner loop produces any values that are used inside the
+     outer loop (except the virtual op) then it can flow
+     back (perhaps indirectly) into the inner loop.  This prevents
+     fusion: without fusion the value at the last iteration is used,
+     with fusion the value after the initial iteration is used.
+
+     If all uses are outside the outer loop this doesn't prevent fusion;
+     the value of the last iteration is still used (and the values from
+     all intermediate iterations are dead).  */
+  gphi_iterator psi;
+  for (psi = gsi_start_phis (single_exit (loop)->dest);
+       !gsi_end_p (psi); gsi_next (&psi))
+    {
+      imm_use_iterator imm_iter;
+      use_operand_p use_p;
+      tree op = gimple_phi_result (psi.phi ());
+      if (virtual_operand_p (op))
+	continue;
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (!is_gimple_debug (use_stmt)
+	      && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
+	    return false;
+	}
+    }
+
+  /* And check blocks belonging to just outer loop.  */
+  bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
+
+  for (i = 0; i < n; i++)
+    if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i]))
+      break;
+  free (bbs);
+  if (i != n)
+    return false;
+
+  /* For now we can safely fuse copies of LOOP only if all
+     loop carried variables are inductions (or the virtual op).
+
+     We could handle reductions as well (the initial value in the second
+     body would be the after-iter value of the first body) if it's over
+     an associative and commutative operation.  We wouldn't
+     be able to handle unknown cycles.  */
+  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
+    {
+      affine_iv iv;
+      tree op = gimple_phi_result (psi.phi ());
+
+      if (virtual_operand_p (op))
+	continue;
+      if (!simple_iv (loop, loop, op, &iv, true))
+	return false;
+      /* The inductions must be regular, loop invariant step and initial
+	 value.  */
+      if (!expr_invariant_in_loop_p (outer, iv.step)
+	  || !expr_invariant_in_loop_p (outer, iv.base))
+	return false;
+      /* XXX With more effort we could also be able to deal with inductions
+	 where the initial value is loop variant but a simple IV in the
+	 outer loop.  The initial value for the second body would be
+	 the original initial value plus iv.base.step.  The next value
+	 for the fused loop would be the original next value of the first
+	 copy, _not_ the next value of the second body.  */
+    }
+
+  return true;
+}
+
+/* Fuse LOOP with all further neighbors.  The loops are expected to
+   be in appropriate form.  */
+
+static void
+fuse_loops (struct loop *loop)
+{
+  struct loop *next = loop->next;
+
+  while (next)
+    {
+      edge e;
+
+      remove_branch (single_pred_edge (loop->latch));
+      /* Make delete_basic_block not fiddle with the loop structure.  */
+      basic_block oldlatch = loop->latch;
+      loop->latch = NULL;
+      delete_basic_block (oldlatch);
+      e = redirect_edge_and_branch (loop_latch_edge (next),
+				    loop->header);
+      loop->latch = e->src;
+      flush_pending_stmts (e);
+
+      gcc_assert (EDGE_COUNT (next->header->preds) == 1);
+
+      /* The PHI nodes of the second body (single-argument now)
+	 need adjustments to use the right values: either directly
+	 the value of the corresponding PHI in the first copy or
+	 the one leaving the first body which unrolling did for us.
+
+	 See also unroll_jam_possible_p () for further possibilities.  */
+      gphi_iterator psi_first, psi_second;
+      e = single_pred_edge (next->header);
+      for (psi_first = gsi_start_phis (loop->header),
+	   psi_second = gsi_start_phis (next->header);
+	   !gsi_end_p (psi_first);
+	   gsi_next (&psi_first), gsi_next (&psi_second))
+	{
+	  gphi *phi_first = psi_first.phi ();
+	  gphi *phi_second = psi_second.phi ();
+	  tree firstop = gimple_phi_result (phi_first);
+	  /* The virtual operand is correct already as it's
+	     always live at exit, hence has a LCSSA node and outer
+	     loop unrolling updated SSA form.  */
+	  if (virtual_operand_p (firstop))
+	    continue;
+
+	  /* Due to unroll_jam_possible_p () we know that this is
+	     an induction.  The second body goes over the same
+	     iteration space.  */
+	  add_phi_arg (phi_second, firstop, e,
+		       gimple_location (phi_first));
+	}
+      gcc_assert (gsi_end_p (psi_second));
+
+      merge_loop_tree (loop, next);
+      gcc_assert (!next->num_nodes);
+      struct loop *ln = next->next;
+      delete_loop (next);
+      next = ln;
+    }
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+}
+
+/* Returns true if the distance in DDR can be determined and adjusts
+   the unroll factor in *UNROLL to make unrolling valid for that distance.
+   Otherwise return false.
+
+   If this data dep can lead to a removed memory reference, increment
+   *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
+   for this to happen.  */
+
+static bool
+adjust_unroll_factor (struct data_dependence_relation *ddr,
+		      unsigned *unroll, unsigned *profit_unroll,
+		      unsigned *removed)
+{
+  bool ret = false;
+  if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
+    {
+      if (DDR_NUM_DIST_VECTS (ddr) == 0)
+	return false;
+      unsigned i;
+      lambda_vector dist_v;
+      FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+	{
+	  /* A distance (a,b) is at worst transformed into (a/N,b) by the
+	     unrolling (factor N), so the transformation is valid if
+	     a >= N, or b > 0, or b is zero and a > 0.  Otherwise the unroll
+	     factor needs to be limited so that the first condition holds.
+	     That may limit the factor down to zero in the worst case.  */
+	  int dist = dist_v[0];
+	  if (dist < 0)
+	    gcc_unreachable ();
+	  else if ((unsigned)dist >= *unroll)
+	    ;
+	  else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
+		   || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
+		       && dist > 0))
+	    ;
+	  else
+	    *unroll = dist;
+
+	  /* With a distance (a,0) it's always profitable to unroll-and-jam
+	     (by a+1), because one memory reference will go away.  With
+	     (a,b) and b != 0 that's less clear.  We will increase the
+	     number of streams without lowering the number of mem refs.
+	     So for now only handle the first situation.  */
+	  if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
+	    {
+	      *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
+	      (*removed)++;
+	    }
+
+	  ret = true;
+	}
+    }
+  return ret;
+}
+
+/* Main entry point for the unroll-and-jam transformation
+   described above.  */
+
+static unsigned int
+tree_loop_unroll_and_jam (void)
+{
+  struct loop *loop;
+  bool changed = false;
+
+  gcc_assert (scev_initialized_p ());
+
+  /* Go through all innermost loops.  */
+  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
+    {
+      struct loop *outer = loop_outer (loop);
+
+      if (loop_depth (loop) < 2
+	  || optimize_loop_nest_for_size_p (outer))
+	continue;
+
+      if (!unroll_jam_possible_p (outer, loop))
+	continue;
+
+      vec<data_reference_p> datarefs;
+      vec<ddr_p> dependences;
+      unsigned unroll_factor, profit_unroll, removed;
+      struct tree_niter_desc desc;
+      bool unroll = false;
+
+      auto_vec<loop_p, 3> loop_nest;
+      dependences.create (10);
+      datarefs.create (10);
+      if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
+					       &datarefs, &dependences))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Cannot analyze data dependencies\n");
+	  free_data_refs (datarefs);
+	  free_dependence_relations (dependences);
+	  return false;
+	}
+      if (!datarefs.length ())
+	continue;
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	dump_data_dependence_relations (dump_file, dependences);
+
+      unroll_factor = (unsigned)-1;
+      profit_unroll = 1;
+      removed = 0;
+
+      /* Check all dependencies.  */
+      unsigned i;
+      struct data_dependence_relation *ddr;
+      FOR_EACH_VEC_ELT (dependences, i, ddr)
+	{
+	  struct data_reference *dra, *drb;
+
+	  /* If the refs are independend there's nothing to do.  */
+	  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+	    continue;
+	  dra = DDR_A (ddr);
+	  drb = DDR_B (ddr);
+	  /* Nothing interesting for the self dependencies.  */
+	  if (dra == drb)
+	    continue;
+
+	  /* Now check the distance vector, for determining a sensible
+	     outer unroll factor, and for validity of merging the inner
+	     loop copies.  */
+	  if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
+				     &removed))
+	    {
+	      /* Couldn't get the distance vector.  For two reads that's
+		 harmless (we assume we should unroll).  For at least
+		 one write this means we can't check the dependence direction
+		 and hence can't determine safety.  */
+
+	      if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
+		{
+		  unroll_factor = 0;
+		  break;
+		}
+	    }
+	}
+
+      /* We regard a user-specified minimum percentage of zero as a request
+	 to ignore all profitability concerns and apply the transformation
+	 always.  */
+      if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
+	profit_unroll = 2;
+      else if (removed * 100 / datarefs.length ()
+	  < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
+	profit_unroll = 1;
+      if (unroll_factor > profit_unroll)
+	unroll_factor = profit_unroll;
+      if (unroll_factor > (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL))
+	unroll_factor = PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL);
+      unroll = (unroll_factor > 1
+		&& can_unroll_loop_p (outer, unroll_factor, &desc));
+
+      if (unroll)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
+			     find_loop_location (outer),
+			     "applying unroll and jam with factor %d\n",
+			     unroll_factor);
+	  initialize_original_copy_tables ();
+	  tree_unroll_loop (outer, unroll_factor, single_dom_exit (outer),
+			    &desc);
+	  free_original_copy_tables ();
+	  fuse_loops (outer->inner);
+	  changed = true;
+	}
+
+      loop_nest.release ();
+      free_dependence_relations (dependences);
+      free_data_refs (datarefs);
+    }
+
+  if (changed)
+    {
+      scev_reset ();
+      free_dominance_info (CDI_DOMINATORS);
+      return TODO_cleanup_cfg;
+    }
+  return 0;
+}
+
+/* Pass boilerplate.  */
+
+namespace {
+
+const pass_data pass_data_loop_jam =
+{
+  GIMPLE_PASS, /* type.  */
+  "unrolljam", /* name.  */
+  OPTGROUP_LOOP, /* optinfo_flags.  */
+  TV_LOOP_JAM, /* tv_id.  */
+  PROP_cfg, /* properties_required.  */
+  0, /* properties_provided.  */
+  0, /* properties_destroyed.  */
+  0, /* todo_flags_start.  */
+  0, /* todo_flags_finish.  */
+};
+
+class pass_loop_jam : public gimple_opt_pass
+{
+public:
+  pass_loop_jam (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_loop_jam, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+  {
+    return flag_unroll_jam != 0;
+  }
+  virtual unsigned int execute (function *);
+
+};
+
+unsigned int
+pass_loop_jam::execute (function *fun)
+{
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  return tree_loop_unroll_and_jam ();
+}
+
+}
+
+gimple_opt_pass *
+make_pass_loop_jam (gcc::context *ctxt)
+{
+  return new pass_loop_jam (ctxt);
+}
+
+diff -N -urp a/gcc/opts.c b/gcc/opts.c
+--- a/gcc/opts.c	2018-11-07 11:37:24.891223860 +0800
+++ b/gcc/opts.c	2018-11-07 11:38:26.171223860 +0800
+@@ -534,6 +534,7 @@ static const struct default_options defa
+     { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
+     { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
+     { OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
+    { OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 },
+     { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
+     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
+     { OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
+diff -N -urp a/gcc/params.def b/gcc/params.def
+--- a/gcc/params.def	2018-11-07 11:37:27.543223860 +0800
+++ b/gcc/params.def	2018-11-07 11:38:26.171223860 +0800
+@@ -1280,6 +1280,16 @@ DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
+ 	  "Enable loop epilogue vectorization using smaller vector size.",
+ 	  0, 0, 1)
+ 
+DEFPARAM (PARAM_UNROLL_JAM_MIN_PERCENT,
+	 "unroll-jam-min-percent",
+	 "Minimum percentage of memrefs that must go away for unroll-and-jam to be considered profitable.",
+	 1, 0, 100)
+
+DEFPARAM (PARAM_UNROLL_JAM_MAX_UNROLL,
+	 "unroll-jam-max-unroll",
+	 "Maximum unroll factor for the unroll-and-jam transformation.",
+	 4, 0, 0)
+
+ /*
+ 
+ Local variables:
+diff -N -urp a/gcc/passes.def b/gcc/passes.def
+--- a/gcc/passes.def	2018-11-07 11:37:24.859223860 +0800
+++ b/gcc/passes.def	2018-11-07 11:38:26.171223860 +0800
+@@ -272,6 +272,7 @@ along with GCC; see the file COPYING3.
+ 	  NEXT_PASS (pass_tree_unswitch);
+ 	  NEXT_PASS (pass_scev_cprop);
+ 	  NEXT_PASS (pass_loop_split);
+	  NEXT_PASS (pass_loop_jam);
+ 	  /* All unswitching, final value replacement and splitting can expose
+ 	     empty loops.  Remove them now.  */
+ 	  NEXT_PASS (pass_cd_dce);
+diff -N -urp a/gcc/timevar.def b/gcc/timevar.def
+--- a/gcc/timevar.def	2018-11-07 11:37:24.935223860 +0800
+++ b/gcc/timevar.def	2018-11-07 11:38:26.175223860 +0800
+@@ -186,6 +186,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON     , "
+ DEFTIMEVAR (TV_SCEV_CONST            , "scev constant prop")
+ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "tree loop unswitching")
+ DEFTIMEVAR (TV_LOOP_SPLIT            , "loop splitting")
+DEFTIMEVAR (TV_LOOP_JAM		     , "unroll and jam")
+ DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
+ DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
+ DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
+diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h
+--- a/gcc/tree-pass.h	2018-11-07 11:37:24.887223860 +0800
+++ b/gcc/tree-pass.h	2018-11-07 11:38:26.175223860 +0800
+@@ -369,6 +369,7 @@ extern gimple_opt_pass *make_pass_tree_l
+ extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
--- a/fstack-clash-protection.patch
+++ b/fstack-clash-protection.patch
@ -0,0 +1,768 @@
+diff -N -urp a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c
+--- a/gcc/combine-stack-adj.c	2017-01-20 08:05:30.925466000 +0800
+++ b/gcc/combine-stack-adj.c	2019-01-10 17:10:16.606528459 +0800
+@@ -508,6 +508,8 @@ combine_stack_adjustments_for_block (bas
+ 	continue;
+ 
+       set = single_set_for_csa (insn);
+      if (set && find_reg_note (insn, REG_STACK_CHECK, NULL_RTX))
+	set = NULL_RTX;
+       if (set)
+ 	{
+ 	  rtx dest = SET_DEST (set);
+diff -N -urp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2019-01-10 13:33:20.926185828 +0800
+++ b/gcc/common.opt	2019-01-10 16:37:35.238476827 +0800
+@@ -2336,13 +2336,18 @@ Common Report Var(flag_variable_expansio
+ Apply variable expansion when loops are unrolled.
+ 
+ fstack-check=
+-Common Report RejectNegative Joined
+Common Report RejectNegative Joined Optimization
+ -fstack-check=[no|generic|specific]	Insert stack checking code into the program.
+ 
+ fstack-check
+ Common Alias(fstack-check=, specific, no)
+ Insert stack checking code into the program.  Same as -fstack-check=specific.
+ 
+fstack-clash-protection
+Common Report Var(flag_stack_clash_protection) Optimization
+Insert code to probe each page of stack space as it is allocated to protect
+from stack-clash style attacks.
+
+ fstack-limit
+ Common Var(common_deferred_options) Defer
+ 
+diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2019-01-10 13:33:20.914185828 +0800
+++ b/gcc/config/aarch64/aarch64.c	2019-01-11 14:12:22.248521895 +0800
+@@ -3881,12 +3881,14 @@ aarch64_expand_prologue (void)
+     {
+       if (crtl->is_leaf && !cfun->calls_alloca)
+ 	{
+-	  if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
+-	    aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
+-					    frame_size - STACK_CHECK_PROTECT);
+	  if (frame_size > PROBE_INTERVAL
+	      && frame_size > get_stack_check_protect ())
+	    aarch64_emit_probe_stack_range (get_stack_check_protect (),
+					    (frame_size
+					     - get_stack_check_protect ()));
+ 	}
+       else if (frame_size > 0)
+-	aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
+	aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
+     }
+ 
+   aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
+diff -N -urp a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+--- a/gcc/config/i386/i386.c	2019-01-10 13:33:20.674185822 +0800
+++ b/gcc/config/i386/i386.c	2019-01-28 10:55:37.006876481 +0800
+@@ -14396,7 +14396,7 @@ ix86_expand_prologue (void)
+ 	  HOST_WIDE_INT size = allocate;
+ 
+ 	  if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
+-	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+	    size = 0x80000000 - get_stack_check_protect () - 1;
+ 
+ 	  if (TARGET_STACK_PROBE)
+ 	    {
+@@ -14406,18 +14406,21 @@ ix86_expand_prologue (void)
+ 		    ix86_emit_probe_stack_range (0, size);
+ 		}
+ 	      else
+-		ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+		ix86_emit_probe_stack_range (0,
+					     size + get_stack_check_protect ());
+ 	    }
+ 	  else
+ 	    {
+ 	      if (crtl->is_leaf && !cfun->calls_alloca)
+ 		{
+-		  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+-		    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
+-						 size - STACK_CHECK_PROTECT);
+		  if (size > PROBE_INTERVAL
+		      && size > get_stack_check_protect ())
+		    ix86_emit_probe_stack_range (get_stack_check_protect (),
+						 (size
+						 - get_stack_check_protect ()));
+ 		}
+ 	      else
+-		ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+		ix86_emit_probe_stack_range (get_stack_check_protect (), size);
+ 	    }
+ 	}
+     }
+diff -N -urp a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
+--- a/gcc/config/ia64/ia64.c	2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/config/ia64/ia64.c	2019-01-28 10:58:37.582881234 +0800
+@@ -3481,15 +3481,16 @@ ia64_expand_prologue (void)
+ 
+       if (crtl->is_leaf && !cfun->calls_alloca)
+ 	{
+-	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+-	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
+-					 size - STACK_CHECK_PROTECT,
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    ia64_emit_probe_stack_range (get_stack_check_protect (),
+					 size - get_stack_check_protect (),
+ 					 bs_size);
+-	  else if (size + bs_size > STACK_CHECK_PROTECT)
+-	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
+	  else if (size + bs_size > get_stack_check_protect ())
+	    ia64_emit_probe_stack_range (get_stack_check_protect (),
+					 0, bs_size);
+ 	}
+       else if (size + bs_size > 0)
+-	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
+	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
+     }
+ 
+   if (dump_file) 
+diff -N -urp a/gcc/coretypes.h b/gcc/coretypes.h
+--- a/gcc/coretypes.h	2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/coretypes.h	2019-01-11 14:09:58.612518114 +0800
+@@ -371,6 +371,7 @@ typedef unsigned char uchar;
+ #include "input.h"
+ #include "is-a.h"
+ #include "memory-block.h"
+#include "dumpfile.h"
+ #endif /* GENERATOR_FILE && !USED_FOR_TARGET */
+ 
+ #endif /* coretypes.h */
+diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+--- a/gcc/doc/invoke.texi	2019-01-10 13:33:20.882185827 +0800
+++ b/gcc/doc/invoke.texi	2019-01-10 16:40:40.066481692 +0800
+@@ -10050,6 +10050,21 @@ compilation without.  The value for comp
+ needs to be more conservative (higher) in order to make tracer
+ effective.
+ 
+@item stack-clash-protection-guard-size
+Specify the size of the operating system provided stack guard as
+2 raised to @var{num} bytes.  The default value is 12 (4096 bytes).
+Acceptable values are between 12 and 30.  Higher values may reduce the
+number of explicit probes, but a value larger than the operating system
+provided guard will leave code vulnerable to stack clash style attacks.
+
+@item stack-clash-protection-probe-interval
+Stack clash protection involves probing stack space as it is allocated.  This
+param controls the maximum distance between probes into the stack as 2 raised
+to @var{num} bytes.  Acceptable values are between 10 and 16 and defaults to
+12.  Higher values may reduce the number of explicit probes, but a value
+larger than the operating system provided guard will leave code vulnerable to
+stack clash style attacks.
+
+ @item max-cse-path-length
+ 
+ The maximum number of basic blocks on path that CSE considers.
+@@ -11248,7 +11263,8 @@ target support in the compiler but comes
+ @enumerate
+ @item
+ Modified allocation strategy for large objects: they are always
+-allocated dynamically if their size exceeds a fixed threshold.
+allocated dynamically if their size exceeds a fixed threshold.  Note this
+may change the semantics of some code.
+ 
+ @item
+ Fixed limit on the size of the static frame of functions: when it is
+@@ -11263,6 +11279,25 @@ generic implementation, code performance
+ Note that old-style stack checking is also the fallback method for
+ @samp{specific} if no target support has been added in the compiler.
+ 
+@samp{-fstack-check=} is designed for Ada's needs to detect infinite recursion
+and stack overflows.  @samp{specific} is an excellent choice when compiling
+Ada code.  It is not generally sufficient to protect against stack-clash
+attacks.  To protect against those you want @samp{-fstack-clash-protection}.
+
+@item -fstack-clash-protection
+@opindex fstack-clash-protection
+Generate code to prevent stack clash style attacks.  When this option is
+enabled, the compiler will only allocate one page of stack space at a time
+and each page is accessed immediately after allocation.  Thus, it prevents
+allocations from jumping over any stack guard page provided by the
+operating system.
+
+Most targets do not fully support stack clash protection.  However, on
+those targets @option{-fstack-clash-protection} will protect dynamic stack
+allocations.  @option{-fstack-clash-protection} may also provide limited
+protection for static stack allocations if the target supports
+@option{-fstack-check=specific}.
+
+ @item -fstack-limit-register=@var{reg}
+ @itemx -fstack-limit-symbol=@var{sym}
+ @itemx -fno-stack-limit
+diff -N -urp a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+--- a/gcc/doc/tm.texi	2017-04-05 01:52:27.193766000 +0800
+++ b/gcc/doc/tm.texi	2019-01-10 16:50:44.006497591 +0800
+@@ -3419,6 +3419,10 @@ GCC computed the default from the values
+ normally not need to override that default.
+ @end defmac
+ 
+@deftypefn {Target Hook} bool TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE (rtx @var{residual})
+Some targets make optimistic assumptions about the state of stack probing when they emit their prologues.  On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks.  Define this variable to return nonzero if such a probe is required or zero otherwise.  You need not define this macro if it would always have the value zero.
+@end deftypefn
+
+ @need 2000
+ @node Frame Registers
+ @subsection Registers That Address the Stack Frame
+diff -N -urp a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+--- a/gcc/doc/tm.texi.in	2017-04-05 01:52:27.193766000 +0800
+++ b/gcc/doc/tm.texi.in	2019-01-10 16:51:41.530499105 +0800
+@@ -2999,6 +2999,8 @@ GCC computed the default from the values
+ normally not need to override that default.
+ @end defmac
+ 
+@hook TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
+
+ @need 2000
+ @node Frame Registers
+ @subsection Registers That Address the Stack Frame
+diff -N -urp a/gcc/explow.c b/gcc/explow.c
+--- a/gcc/explow.c	2017-02-02 20:39:09.589196000 +0800
+++ b/gcc/explow.c	2019-01-10 16:56:07.454506105 +0800
+@@ -39,8 +39,10 @@ along with GCC; see the file COPYING3.
+ #include "expr.h"
+ #include "common/common-target.h"
+ #include "output.h"
+#include "params.h"
+ 
+ static rtx break_out_memory_refs (rtx);
+static void anti_adjust_stack_and_probe_stack_clash (rtx);
+ 
+ 
+ /* Truncate and perhaps sign-extend C as appropriate for MODE.  */
+@@ -1271,6 +1273,29 @@ get_dynamic_stack_size (rtx *psize, unsi
+   *psize = size;
+ }
+ 
+/* Return the number of bytes to "protect" on the stack for -fstack-check.
+
+   "protect" in the context of -fstack-check means how many bytes we
+   should always ensure are available on the stack.  More importantly
+   this is how many bytes are skipped when probing the stack.
+
+   On some targets we want to reuse the -fstack-check prologue support
+   to give a degree of protection against stack clashing style attacks.
+
+   In that scenario we do not want to skip bytes before probing as that
+   would render the stack clash protections useless.
+
+   So we never use STACK_CHECK_PROTECT directly.  Instead we indirect though
+   this helper which allows us to provide different values for
+   -fstack-check and -fstack-clash-protection.  */
+HOST_WIDE_INT
+get_stack_check_protect (void)
+{
+  if (flag_stack_clash_protection)
+    return 0;
+  return STACK_CHECK_PROTECT;
+}
+
+ /* Return an rtx representing the address of an area of memory dynamically
+    pushed on the stack.
+ 
+@@ -1429,7 +1454,7 @@ allocate_dynamic_stack_space (rtx size,
+     probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
+ 		       size);
+   else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+-    probe_stack_range (STACK_CHECK_PROTECT, size);
+    probe_stack_range (get_stack_check_protect (), size);
+ 
+   /* Don't let anti_adjust_stack emit notes.  */
+   suppress_reg_args_size = true;
+@@ -1482,6 +1507,8 @@ allocate_dynamic_stack_space (rtx size,
+ 
+       if (flag_stack_check && STACK_CHECK_MOVING_SP)
+ 	anti_adjust_stack_and_probe (size, false);
+      else if (flag_stack_clash_protection)
+	anti_adjust_stack_and_probe_stack_clash (size);
+       else
+ 	anti_adjust_stack (size);
+ 
+@@ -1757,6 +1784,237 @@ probe_stack_range (HOST_WIDE_INT first,
+   emit_insn (gen_blockage ());
+ }
+ 
+/* Compute parameters for stack clash probing a dynamic stack
+   allocation of SIZE bytes.
+
+   We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL.
+
+   Additionally we conditionally dump the type of probing that will
+   be needed given the values computed.  */
+
+void
+compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr,
+					  rtx *residual,
+					  HOST_WIDE_INT *probe_interval,
+					  rtx size)
+{
+  /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL.  */
+  *probe_interval
+    = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
+  *rounded_size = simplify_gen_binary (AND, Pmode, size,
+					GEN_INT (-*probe_interval));
+
+  /* Compute the value of the stack pointer for the last iteration.
+     It's just SP + ROUNDED_SIZE.  */
+  rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX);
+  *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode,
+					      stack_pointer_rtx,
+					      rounded_size_op),
+			      NULL_RTX);
+
+  /* Compute any residuals not allocated by the loop above.  Residuals
+     are just the ROUNDED_SIZE - SIZE.  */
+  *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size);
+
+  /* Dump key information to make writing tests easy.  */
+  if (dump_file)
+    {
+      if (*rounded_size == CONST0_RTX (Pmode))
+	fprintf (dump_file,
+		 "Stack clash skipped dynamic allocation and probing loop.\n");
+      else if (CONST_INT_P (*rounded_size)
+	       && INTVAL (*rounded_size) <= 4 * *probe_interval)
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing inline.\n");
+      else if (CONST_INT_P (*rounded_size))
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing in "
+		 "rotated loop.\n");
+      else
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing in loop.\n");
+
+      if (*residual != CONST0_RTX (Pmode))
+	fprintf (dump_file,
+		 "Stack clash dynamic allocation and probing residuals.\n");
+      else
+	fprintf (dump_file,
+		 "Stack clash skipped dynamic allocation and "
+		 "probing residuals.\n");
+    }
+}
+
+/* Emit the start of an allocate/probe loop for stack
+   clash protection.
+
+   LOOP_LAB and END_LAB are returned for use when we emit the
+   end of the loop.
+
+   LAST addr is the value for SP which stops the loop.  */
+void
+emit_stack_clash_protection_probe_loop_start (rtx *loop_lab,
+					      rtx *end_lab,
+					      rtx last_addr,
+					      bool rotated)
+{
+  /* Essentially we want to emit any setup code, the top of loop
+     label and the comparison at the top of the loop.  */
+  *loop_lab = gen_label_rtx ();
+  *end_lab = gen_label_rtx ();
+
+  emit_label (*loop_lab);
+  if (!rotated)
+    emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX,
+			     Pmode, 1, *end_lab);
+}
+
+/* Emit the end of a stack clash probing loop.
+
+   This consists of just the jump back to LOOP_LAB and
+   emitting END_LOOP after the loop.  */
+
+void
+emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop,
+					    rtx last_addr, bool rotated)
+{
+  if (rotated)
+    emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX,
+			     Pmode, 1, loop_lab);
+  else
+    emit_jump (loop_lab);
+
+  emit_label (end_loop);
+
+}
+
+/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
+   while probing it.  This pushes when SIZE is positive.  SIZE need not
+   be constant.
+
+   This is subtly different than anti_adjust_stack_and_probe to try and
+   prevent stack-clash attacks
+
+     1.  It must assume no knowledge of the probing state, any allocation
+	must probe.
+
+	Consider the case of a 1 byte alloca in a loop.  If the sum of the
+	allocations is large, then this could be used to jump the guard if
+	probes were not emitted.
+
+     2.  It never skips probes, whereas anti_adjust_stack_and_probe will
+	skip probes on the first couple PROBE_INTERVALs on the assumption
+	they're done elsewhere.
+
+     3.  It only allocates and probes SIZE bytes, it does not need to
+	allocate/probe beyond that because this probing style does not
+	guarantee signal handling capability if the guard is hit.  */
+
+static void
+anti_adjust_stack_and_probe_stack_clash (rtx size)
+{
+  /* First ensure SIZE is Pmode.  */
+  if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
+    size = convert_to_mode (Pmode, size, 1);
+
+  /* We can get here with a constant size on some targets.  */
+  rtx rounded_size, last_addr, residual;
+  HOST_WIDE_INT probe_interval;
+  compute_stack_clash_protection_loop_data (&rounded_size, &last_addr,
+					    &residual, &probe_interval, size);
+
+  if (rounded_size != CONST0_RTX (Pmode))
+    {
+      if (CONST_INT_P (rounded_size)
+	  && INTVAL (rounded_size) <= 4 * probe_interval)
+	{
+	  for (HOST_WIDE_INT i = 0;
+	       i < INTVAL (rounded_size);
+	       i += probe_interval)
+	    {
+	      anti_adjust_stack (GEN_INT (probe_interval));
+
+	      /* The prologue does not probe residuals.  Thus the offset
+		 here to probe just beyond what the prologue had already
+		 allocated.  */
+	      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					       (probe_interval
+						- GET_MODE_SIZE (word_mode))));
+	      emit_insn (gen_blockage ());
+	    }
+	}
+      else
+	{
+	  rtx loop_lab, end_loop;
+	  bool rotate_loop = CONST_INT_P (rounded_size);
+	  emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop,
+							last_addr, rotate_loop);
+
+	  anti_adjust_stack (GEN_INT (probe_interval));
+
+	  /* The prologue does not probe residuals.  Thus the offset here
+	     to probe just beyond what the prologue had already allocated.  */
+	  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					   (probe_interval
+					    - GET_MODE_SIZE (word_mode))));
+
+	  emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop,
+						      last_addr, rotate_loop);
+	  emit_insn (gen_blockage ());
+	}
+    }
+
+  if (residual != CONST0_RTX (Pmode))
+    {
+      rtx label = NULL_RTX;
+      /* RESIDUAL could be zero at runtime and in that case *sp could
+	 hold live data.  Furthermore, we do not want to probe into the
+	 red zone.
+
+	 Go ahead and just guard the probe at *sp on RESIDUAL != 0 at
+	 runtime if RESIDUAL is not a compile time constant.  */
+      if (!CONST_INT_P (residual))
+	{
+	  label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (residual, CONST0_RTX (GET_MODE (residual)),
+				   EQ, NULL_RTX, Pmode, 1, label);
+	}
+
+      rtx x = force_reg (Pmode, plus_constant (Pmode, residual,
+					       -GET_MODE_SIZE (word_mode)));
+      anti_adjust_stack (residual);
+      emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x));
+      emit_insn (gen_blockage ());
+      if (!CONST_INT_P (residual))
+	emit_label (label);
+    }
+
+  /* Some targets make optimistic assumptions in their prologues about
+     how the caller may have probed the stack.  Make sure we honor
+     those assumptions when needed.  */
+  if (size != CONST0_RTX (Pmode)
+      && targetm.stack_clash_protection_final_dynamic_probe (residual))
+    {
+      /* SIZE could be zero at runtime and in that case *sp could hold
+	 live data.  Furthermore, we don't want to probe into the red
+	 zone.
+
+	 Go ahead and just guard the probe at *sp on SIZE != 0 at runtime
+	 if SIZE is not a compile time constant.  */
+      rtx label = NULL_RTX;
+      if (!CONST_INT_P (size))
+	{
+	  label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)),
+				   EQ, NULL_RTX, Pmode, 1, label);
+	}
+
+      emit_stack_probe (stack_pointer_rtx);
+      emit_insn (gen_blockage ());
+      if (!CONST_INT_P (size))
+	emit_label (label);
+    }
+}
+
+ /* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
+    while probing it.  This pushes when SIZE is positive.  SIZE need not
+    be constant.  If ADJUST_BACK is true, adjust back the stack pointer
+diff -N -urp a/gcc/explow.h b/gcc/explow.h
+--- a/gcc/explow.h	2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/explow.h	2019-01-10 16:57:37.934508487 +0800
+@@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx);
+ /* Add some bytes to the stack while probing it.  An rtx says how many. */
+ extern void anti_adjust_stack_and_probe (rtx, bool);
+ 
+/* Support for building allocation/probing loops for stack-clash
+   protection of dyamically allocated stack space.  */
+extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *,
+						      HOST_WIDE_INT *, rtx);
+extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *,
+							  rtx, bool);
+extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx,
+							rtx, bool);
+
+ /* This enum is used for the following two functions.  */
+ enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL};
+ 
+diff -N -urp a/gcc/flag-types.h b/gcc/flag-types.h
+--- a/gcc/flag-types.h	2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/flag-types.h	2019-01-10 16:42:11.490484099 +0800
+@@ -166,7 +166,14 @@ enum permitted_flt_eval_methods
+   PERMITTED_FLT_EVAL_METHODS_C11
+ };
+ 
+-/* Type of stack check.  */
+/* Type of stack check.
+
+   Stack checking is designed to detect infinite recursion and stack
+   overflows for Ada programs.  Furthermore stack checking tries to ensure
+   in that scenario that enough stack space is left to run a signal handler.
+
+   -fstack-check= does not prevent stack-clash style attacks.  For that
+   you want -fstack-clash-protection.  */
+ enum stack_check_type
+ {
+   /* Do not check the stack.  */
+diff -N -urp a/gcc/function.c b/gcc/function.c
+--- a/gcc/function.c	2017-08-08 21:21:12.755378000 +0800
+++ b/gcc/function.c	2019-01-10 17:07:17.414523742 +0800
+@@ -5695,6 +5695,58 @@ get_arg_pointer_save_area (void)
+   return ret;
+ }
+ 
+
+/* If debugging dumps are requested, dump information about how the
+   target handled -fstack-check=clash for the prologue.
+
+   PROBES describes what if any probes were emitted.
+
+   RESIDUALS indicates if the prologue had any residual allocation
+   (i.e. total allocation was not a multiple of PROBE_INTERVAL).  */
+
+void
+dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals)
+{
+  if (!dump_file)
+    return;
+
+  switch (probes)
+    {
+    case NO_PROBE_NO_FRAME:
+      fprintf (dump_file,
+	       "Stack clash no probe no stack adjustment in prologue.\n");
+      break;
+    case NO_PROBE_SMALL_FRAME:
+      fprintf (dump_file,
+	       "Stack clash no probe small stack adjustment in prologue.\n");
+      break;
+    case PROBE_INLINE:
+      fprintf (dump_file, "Stack clash inline probes in prologue.\n");
+      break;
+    case PROBE_LOOP:
+      fprintf (dump_file, "Stack clash probe loop in prologue.\n");
+      break;
+    }
+
+  if (residuals)
+    fprintf (dump_file, "Stack clash residual allocation in prologue.\n");
+  else
+    fprintf (dump_file, "Stack clash no residual allocation in prologue.\n");
+
+  if (frame_pointer_needed)
+    fprintf (dump_file, "Stack clash frame pointer needed.\n");
+  else
+    fprintf (dump_file, "Stack clash no frame pointer needed.\n");
+
+  if (TREE_THIS_VOLATILE (cfun->decl))
+    fprintf (dump_file,
+	     "Stack clash noreturn prologue, assuming no implicit"
+	     " probes in caller.\n");
+  else
+    fprintf (dump_file,
+	     "Stack clash not noreturn prologue.\n");
+}
+
+ /* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
+    for the first time.  */
+ 
+diff -N -urp a/gcc/function.h b/gcc/function.h
+--- a/gcc/function.h	2017-01-25 01:07:36.015431000 +0800
+++ b/gcc/function.h	2019-01-10 17:08:12.806525200 +0800
+@@ -553,6 +553,14 @@ do {								\
+   ((TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn)	     \
+    ? MAX (FUNCTION_BOUNDARY, 2 * BITS_PER_UNIT) : FUNCTION_BOUNDARY)
+ 
+enum stack_clash_probes {
+  NO_PROBE_NO_FRAME,
+  NO_PROBE_SMALL_FRAME,
+  PROBE_INLINE,
+  PROBE_LOOP
+};
+
+extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool);
+ 
+ 
+ extern void push_function_context (void);
+diff -N -urp a/gcc/params.def b/gcc/params.def
+--- a/gcc/params.def	2019-01-10 13:33:20.894185827 +0800
+++ b/gcc/params.def	2019-01-10 16:43:15.414485782 +0800
+@@ -213,6 +213,16 @@ DEFPARAM(PARAM_STACK_FRAME_GROWTH,
+ 	 "Maximal stack frame growth due to inlining (in percent).",
+ 	 1000, 0, 0)
+ 
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
+	 "stack-clash-protection-guard-size",
+	 "Size of the stack guard expressed as a power of two.",
+	 12, 12, 30)
+
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
+	 "stack-clash-protection-probe-interval",
+	 "Interval in which to probe the stack expressed as a power of two.",
+	 12, 10, 16)
+
+ /* The GCSE optimization will be disabled if it would require
+    significantly more memory than this value.  */
+ DEFPARAM(PARAM_MAX_GCSE_MEMORY,
+diff -N -urp a/gcc/reg-notes.def b/gcc/reg-notes.def
+--- a/gcc/reg-notes.def	2017-03-28 05:00:35.674561000 +0800
+++ b/gcc/reg-notes.def	2019-01-10 17:12:11.678531488 +0800
+@@ -223,6 +223,10 @@ REG_NOTE (ARGS_SIZE)
+    pseudo reg.  */
+ REG_NOTE (RETURNED)
+ 
+/* Indicates the instruction is a stack check probe that should not
+   be combined with other stack adjustments.  */
+REG_NOTE (STACK_CHECK)
+
+ /* Used to mark a call with the function decl called by the call.
+    The decl might not be available in the call due to splitting of the call
+    insn.  This note is a SYMBOL_REF.  */
+diff -N -urp a/gcc/rtl.h b/gcc/rtl.h
+--- a/gcc/rtl.h	2017-03-14 20:47:42.745690000 +0800
+++ b/gcc/rtl.h	2019-01-10 16:59:15.574511058 +0800
+@@ -2707,6 +2707,7 @@ get_full_set_src_cost (rtx x, machine_mo
+ /* In explow.c */
+ extern HOST_WIDE_INT trunc_int_for_mode	(HOST_WIDE_INT, machine_mode);
+ extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
+extern HOST_WIDE_INT get_stack_check_protect (void);
+ 
+ /* In rtl.c */
+ extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL);
+diff -N -urp a/gcc/sched-deps.c b/gcc/sched-deps.c
+--- a/gcc/sched-deps.c	2017-01-01 20:07:43.905435000 +0800
+++ b/gcc/sched-deps.c	2019-01-10 17:13:37.470533746 +0800
+@@ -4717,6 +4717,11 @@ parse_add_or_inc (struct mem_inc_info *m
+   if (RTX_FRAME_RELATED_P (insn) || !pat)
+     return false;
+ 
+  /* Do not allow breaking data dependencies for insns that are marked
+     with REG_STACK_CHECK.  */
+  if (find_reg_note (insn, REG_STACK_CHECK, NULL))
+    return false;
+
+   /* Result must be single reg.  */
+   if (!REG_P (SET_DEST (pat)))
+     return false;
+diff -N -urp a/gcc/target.def b/gcc/target.def
+--- a/gcc/target.def	2019-01-10 13:33:20.762185824 +0800
+++ b/gcc/target.def	2019-01-10 17:01:49.146515100 +0800
+@@ -5490,6 +5490,12 @@ these registers when the target switches
+  void, (void),
+  hook_void_void)
+ 
+DEFHOOK
+(stack_clash_protection_final_dynamic_probe,
+ "Some targets make optimistic assumptions about the state of stack probing when they emit their prologues.  On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks.  Define this variable to return nonzero if such a probe is required or zero otherwise.  You need not define this macro if it would always have the value zero.",
+ bool, (rtx residual),
+ default_stack_clash_protection_final_dynamic_probe)
+
+ /* Functions specific to the C family of frontends.  */
+ #undef HOOK_PREFIX
+ #define HOOK_PREFIX "TARGET_C_"
+diff -N -urp a/gcc/targhooks.c b/gcc/targhooks.c
+--- a/gcc/targhooks.c	2017-02-07 19:29:06.644837000 +0800
+++ b/gcc/targhooks.c	2019-01-10 17:03:23.818517592 +0800
+@@ -2107,4 +2107,10 @@ default_excess_precision (enum excess_pr
+   return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
+ }
+ 
+bool
+default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+ #include "gt-targhooks.h"
+diff -N -urp a/gcc/targhooks.h b/gcc/targhooks.h
+--- a/gcc/targhooks.h	2017-04-05 01:52:27.193766000 +0800
+++ b/gcc/targhooks.h	2019-01-10 17:04:11.438518846 +0800
+@@ -263,5 +263,6 @@ extern unsigned int default_min_arithmet
+ 
+ extern enum flt_eval_method
+ default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED);
+extern bool default_stack_clash_protection_final_dynamic_probe (rtx);
+ 
+ #endif /* GCC_TARGHOOKS_H */
+diff -N -urp a/gcc/toplev.c b/gcc/toplev.c
+--- a/gcc/toplev.c	2017-09-15 16:18:34.015147000 +0800
+++ b/gcc/toplev.c	2019-01-10 16:45:33.626489420 +0800
+@@ -1573,6 +1573,26 @@ process_options (void)
+       flag_associative_math = 0;
+     }
+ 
+  /* -fstack-clash-protection is not currently supported on targets
+     where the stack grows up.  */
+  if (flag_stack_clash_protection && !STACK_GROWS_DOWNWARD)
+    {
+      warning_at (UNKNOWN_LOCATION, 0,
+		  "%<-fstack-clash-protection%> is not supported on targets "
+		  "where the stack grows from lower to higher addresses");
+      flag_stack_clash_protection = 0;
+    }
+
+  /* We can not support -fstack-check= and -fstack-clash-protection at
+     the same time.  */
+  if (flag_stack_check != NO_STACK_CHECK && flag_stack_clash_protection)
+    {
+      warning_at (UNKNOWN_LOCATION, 0,
+		  "%<-fstack-check=%> and %<-fstack-clash_protection%> are "
+		  "mutually exclusive.  Disabling %<-fstack-check=%>");
+      flag_stack_check = NO_STACK_CHECK;
+    }
+
+   /* With -fcx-limited-range, we do cheap and quick complex arithmetic.  */
+   if (flag_cx_limited_range)
+     flag_complex_method = 0;
--- a/gcc-7.3.0.tar.gz
+++ b/gcc-7.3.0.tar.gz
--- a/gcc-adapt-to-isl.patch
+++ b/gcc-adapt-to-isl.patch
@ -0,0 +1,12 @@
+diff --git a/gcc/graphite.h b/gcc/graphite.h
+index 4e0e58c..be0a22b 100644 (file)
+--- a/gcc/graphite.h
+++ b/gcc/graphite.h
+@@ -37,6 +37,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include <isl/schedule.h>
+ #include <isl/ast_build.h>
+ #include <isl/schedule_node.h>
+#include <isl/id.h>
+#include <isl/space.h>
+ 
+ typedef struct poly_dr *poly_dr_p;
--- a/gcc.spec
+++ b/gcc.spec
--- a/isl-0.14.tar.xz
+++ b/isl-0.14.tar.xz
--- a/mark-pattern-as-clobbering-CC-REGNUM.patch
+++ b/mark-pattern-as-clobbering-CC-REGNUM.patch
@ -0,0 +1,13 @@
+diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+--- a/gcc/config/aarch64/aarch64.md	2019-05-30 16:12:52.950606040 +0800
+++ b/gcc/config/aarch64/aarch64.md	2019-05-30 16:15:56.606599549 +0800
+@@ -3110,7 +3110,8 @@
+ (define_insn_and_split "*compare_cstore<mode>_insn"
+   [(set (match_operand:GPI 0 "register_operand" "=r")
+ 	 (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
+-		  (match_operand:GPI 2 "aarch64_imm24" "n")))]
+		  (match_operand:GPI 2 "aarch64_imm24" "n")))
+   (clobber (reg:CC CC_REGNUM))]
+   "!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
+    && !aarch64_plus_operand (operands[2], <MODE>mode)
+    && !reload_completed"
--- a/option-mfentry-and-mlong-calls-bugfix.patch
+++ b/option-mfentry-and-mlong-calls-bugfix.patch
@ -0,0 +1,108 @@
+diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2018-09-19 17:11:42.583520820 +0800
+++ b/gcc/config/aarch64/aarch64.c	2018-09-19 17:10:22.715520820 +0800
+@@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym)
+ void
+ aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+ {
+-  if (!TARGET_LONG_CALLS)
+  if (flag_fentry)
+     {
+-      fprintf (file, "\tmov\tx9, x30\n");
+-      fprintf (file, "\tbl\t__fentry__\n");
+-      fprintf (file, "\tmov\tx30, x9\n");
+-    }
+-  else
+-    {
+-      if (flag_pic)
+      if (!TARGET_LONG_CALLS)
+	{
+	  fprintf (file, "\tmov\tx9, x30\n");
+-	  fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
+-	  fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
+-	  fprintf (file, "\tblr\tx10\n");
+	  fprintf (file, "\tbl\t__fentry__\n");
+	  fprintf (file, "\tmov\tx30, x9\n");
+	}
+       else
+	{
+-	  fprintf (file, "\tmov\tx9, x30\n");
+-	  fprintf (file, "\tadrp\tx10, __fentry__\n");
+-	  fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
+-	  fprintf (file, "\tblr\tx10\n");
+-	  fprintf (file, "\tmov\tx30, x9\n");
+	  if (flag_pic)
+	    {
+	      fprintf (file, "\tmov\tx9, x30\n");
+	      fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
+	      fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
+	      fprintf (file, "\tblr\tx10\n");
+	      fprintf (file, "\tmov\tx30, x9\n");
+	    }
+	  else
+	    {
+	      fprintf (file, "\tmov\tx9, x30\n");
+	      fprintf (file, "\tadrp\tx10, __fentry__\n");
+	      fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
+	      fprintf (file, "\tblr\tx10\n");
+	      fprintf (file, "\tmov\tx30, x9\n");
+	    }
+	}
+     }
+ }
+@@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn)
+   add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
+ }
+ 
+/* Return true, if profiling code should be emitted before
+   prologue.  Otherwise it returns false.
+   Note: For x86 with "hotfix" it is sorried.  */
+static bool
+aarch64_profile_before_prologue (void)
+{
+  return flag_fentry != 0;
+}
+
+ /* Expand a compare and swap pattern.  */
+ 
+ void
+@@ -14952,6 +14964,9 @@ aarch64_run_selftests (void)
+ #undef TARGET_ASM_ALIGNED_SI_OP
+ #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+ 
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue
+
+ #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+ #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+   hook_bool_const_tree_hwi_hwi_const_tree_true
+diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+--- a/gcc/config/aarch64/aarch64.h	2018-09-19 17:11:42.587520820 +0800
+++ b/gcc/config/aarch64/aarch64.h	2018-09-19 17:10:22.715520820 +0800
+@@ -850,9 +850,12 @@ typedef struct
+   {									\
+     rtx fun, lr;							\
+     const rtx_insn* tmp = get_insns ();					\
+-    lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);			\
+-    fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
+-    emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);	\
+    if (!flag_fentry)							\
+      {									\
+	lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);		\
+	fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
+	emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);	\
+      }									\
+     if (TARGET_LONG_CALLS)						\
+       {									\
+ 	emit_insn (gen_blockage ());					\
+diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+--- a/gcc/config/aarch64/aarch64.opt	2018-09-19 17:11:42.587520820 +0800
+++ b/gcc/config/aarch64/aarch64.opt	2018-09-19 17:10:22.715520820 +0800
+@@ -192,3 +192,7 @@ single precision and to 32 bits for doub
+ mverbose-cost-dump
+ Common Undocumented Var(flag_aarch64_verbose_cost)
+ Enables verbose cost model dumping in the debug dump files.
+
+mfentry
+Target Report Var(flag_fentry) Init(0)
+Emit profiling counter call at function entry immediately after prologue.
--- a/option-mlong-calls.patch
+++ b/option-mlong-calls.patch
@ -0,0 +1,362 @@
+diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+--- a/gcc/config/aarch64/aarch64-protos.h	2018-11-06 10:43:27.862079389 +0800
+++ b/gcc/config/aarch64/aarch64-protos.h	2018-11-06 10:44:34.930081154 +0800
+@@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void);
+ const char *aarch64_mangle_builtin_type (const_tree);
+ const char *aarch64_output_casesi (rtx *);
+ 
+extern void aarch64_pr_long_calls (struct cpp_reader *);
+extern void aarch64_pr_no_long_calls (struct cpp_reader *);
+extern void aarch64_pr_long_calls_off (struct cpp_reader *);
+
+ enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
+ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
+ enum reg_class aarch64_regno_regclass (unsigned);
+@@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool);
+ void aarch64_expand_mov_immediate (rtx, rtx);
+ void aarch64_expand_prologue (void);
+ void aarch64_expand_vector_init (rtx, rtx);
+void aarch64_function_profiler (FILE *, int);
+ void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
+ 				   const_tree, unsigned);
+ void aarch64_init_expanders (void);
+diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2018-11-06 10:43:27.870079389 +0800
+++ b/gcc/config/aarch64/aarch64.c	2018-11-06 10:44:34.934081154 +0800
+@@ -70,6 +70,9 @@
+ /* This file should be included last.  */
+ #include "target-def.h"
+ 
+static void aarch64_set_default_type_attributes (tree);
+static int aarch64_comp_type_attributes (const_tree, const_tree);
+
+ /* Defined for convenience.  */
+ #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+ 
+@@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns
+     return choose_hard_reg_mode (regno, nregs, false);
+ }
+ 
+/* Table of machine attributes.  */
+static const struct attribute_spec aarch64_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+     affects_type_identity }.  */
+  /* Function calls made to this symbol must be done indirectly, because
+     it may lie outside of the 26 bit addressing range of a normal function
+     call.  */
+  { "long_call",    0, 0, false, true,  true,  NULL, false },
+  /* Whereas these functions are always known to reside within the 26 bit
+     addressing range.  */
+  { "short_call",   0, 0, false, true,  true,  NULL, false },
+  { NULL,	    0, 0, false, false, false, NULL, false }
+};
+
+/* Encode the current state of the #pragma[no_]long_calls.  */
+typedef enum
+{
+  OFF,		/* No #pragma[no_]long_calls is in effect.  */
+  LONG, 	/* #pragma long_calls is in effect.  */
+  SHORT 	/* #pragma no_long_calls is in effect.  */
+} aarch64_pragma_enum;
+
+static aarch64_pragma_enum aarch64_pragma_long_calls = OFF;
+
+void
+aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  aarch64_pragma_long_calls = LONG;
+}
+
+void
+aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  aarch64_pragma_long_calls = SHORT;
+}
+
+void
+aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  aarch64_pragma_long_calls = OFF;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible.  */
+static int
+aarch64_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  int l1, l2, s1, s2;
+
+  /* Check for mismatch of non-default calling convention.  */
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
+
+  /* Check for mismatched call attributes.  */
+  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
+
+  /* Only bother to check if an attribute is defined.  */
+  if (l1 | l2 | s1 | s2)
+    {
+      /* If one type has an attribute, the other
+	 must have the same attribute.  */
+      if ((l1 != l2) || (s1 != s2))
+	{
+	  return 0;
+	}
+
+      /* Disallow mixed attributes.  */
+      if ((l1 && s2) || (l2 && s1))
+	{
+	  return 0;
+	}
+    }
+
+  return 1;
+}
+
+/* Assigns default attributes to newly defined type.  This is used to
+   set short_call/long_call attributes for function types of
+   functions defined inside corresponding #pragma scopes.  */
+static void
+aarch64_set_default_type_attributes (tree type)
+{
+  /* Add __attribute__ ((long_call)) to all functions, when
+     inside #pragma long_calls or __attribute__ ((short_call)),
+     when inside #pragma no_long_calls.  */
+  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
+    {
+      tree type_attr_list = NULL;
+      tree attr_name = NULL;
+      type_attr_list = TYPE_ATTRIBUTES (type);
+
+      if (aarch64_pragma_long_calls == LONG)
+	{
+	  attr_name = get_identifier ("long_call");
+	}
+      else if (aarch64_pragma_long_calls == SHORT)
+	{
+	  attr_name = get_identifier ("short_call");
+	}
+      else
+	{
+	  return;
+	}
+
+      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
+      TYPE_ATTRIBUTES (type) = type_attr_list;
+    }
+}
+
+/* Return true if DECL is known to be linked into section SECTION.  */
+static bool
+aarch64_function_in_section_p (tree decl, section *section)
+{
+  /* We can only be certain about the prevailing symbol definition.  */
+  if (!decl_binds_to_current_def_p (decl))
+    return false;
+
+  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
+  if (!DECL_SECTION_NAME (decl))
+    {
+      /* Make sure that we will not create a unique section for DECL.  */
+      if (flag_function_sections || DECL_COMDAT_GROUP (decl))
+	return false;
+    }
+
+  return function_section (decl) == section;
+}
+
+ /* Return true if calls to DECL should be treated as
+    long-calls (ie called via a register).  */
+ static bool
+-aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
+aarch64_decl_is_long_call_p (tree decl)
+ {
+-  return false;
+  tree attrs = NULL;
+
+  if (!decl)
+    return TARGET_LONG_CALLS;
+
+  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  if (lookup_attribute ("short_call", attrs))
+    return false;
+
+  /* For "f", be conservative, and only cater for cases in which the
+     whole of the current function is placed in the same section.  */
+  if (!flag_reorder_blocks_and_partition
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && aarch64_function_in_section_p (decl, current_function_section ()))
+    return false;
+
+  if (lookup_attribute ("long_call", attrs))
+    return true;
+
+  return TARGET_LONG_CALLS;
+ }
+ 
+ /* Return true if calls to symbol-ref SYM should be treated as
+@@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym)
+   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
+ }
+ 
+void
+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+  if (!TARGET_LONG_CALLS)
+    {
+      fprintf (file, "\tmov\tx9, x30\n");
+      fprintf (file, "\tbl\t__fentry__\n");
+      fprintf (file, "\tmov\tx30, x9\n");
+    }
+  else
+    {
+      if (flag_pic)
+	{
+	  fprintf (file, "\tmov\tx9, x30\n");
+	  fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
+	  fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
+	  fprintf (file, "\tblr\tx10\n");
+	  fprintf (file, "\tmov\tx30, x9\n");
+	}
+      else
+	{
+	  fprintf (file, "\tmov\tx9, x30\n");
+	  fprintf (file, "\tadrp\tx10, __fentry__\n");
+	  fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
+	  fprintf (file, "\tblr\tx10\n");
+	  fprintf (file, "\tmov\tx30, x9\n");
+	}
+    }
+}
+
+ /* Return true if calls to symbol-ref SYM should not go through
+    plt stubs.  */
+ 
+@@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p
+ #undef TARGET_SCHED_CAN_SPECULATE_INSN
+ #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
+ 
+#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
+
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
+
+ #undef TARGET_CAN_USE_DOLOOP_P
+ #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+ 
+diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+--- a/gcc/config/aarch64/aarch64.h	2018-11-06 10:43:27.870079389 +0800
+++ b/gcc/config/aarch64/aarch64.h	2018-11-06 10:49:29.574088911 +0800
+@@ -28,7 +28,6 @@
+ 
+ 
+ 
+-#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
+ 
+ /* Target machine storage layout.  */
+ 
+@@ -659,6 +658,14 @@ typedef struct
+ } CUMULATIVE_ARGS;
+ #endif
+ 
+/* Handle pragmas for compatibility with Intel's compilers.  */
+#define REGISTER_TARGET_PRAGMAS() do { \
+  c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \
+  c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \
+  c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \
+  aarch64_register_pragmas (); \
+} while (0)
+
+ #define FUNCTION_ARG_PADDING(MODE, TYPE) \
+   (aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward)
+ 
+@@ -842,13 +849,20 @@ typedef struct
+ #define PROFILE_HOOK(LABEL)						\
+   {									\
+     rtx fun, lr;							\
+    const rtx_insn* tmp = get_insns ();					\
+     lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);			\
+     fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);			\
+     emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);	\
+    if (TARGET_LONG_CALLS)						\
+      {									\
+	emit_insn (gen_blockage ());					\
+	emit_insn_after (gen_blockage (), NEXT_INSN (tmp));		\
+      }									\
+   }
+ 
+ /* All the work done in PROFILE_HOOK, but still required.  */
+-#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+#define FUNCTION_PROFILER(STREAM, LABELNO)				\
+  aarch64_function_profiler (STREAM, LABELNO)
+ 
+ /* For some reason, the Linux headers think they know how to define
+    these macros.  They don't!!!  */
+diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+--- a/gcc/config/aarch64/aarch64.md	2018-11-06 10:43:27.874079389 +0800
+++ b/gcc/config/aarch64/aarch64.md	2018-11-06 10:44:34.934081154 +0800
+@@ -850,9 +850,10 @@
+   {
+     rtx pat;
+     rtx callee = XEXP (operands[0], 0);
+-    if (!REG_P (callee)
+-       && ((GET_CODE (callee) != SYMBOL_REF)
+-	   || aarch64_is_noplt_call_p (callee)))
+
+    if (GET_CODE (callee) == SYMBOL_REF
+	? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
+	: !REG_P (callee))
+       XEXP (operands[0], 0) = force_reg (Pmode, callee);
+ 
+     if (operands[2] == NULL_RTX)
+@@ -881,9 +882,10 @@
+   {
+     rtx pat;
+     rtx callee = XEXP (operands[1], 0);
+-    if (!REG_P (callee)
+-       && ((GET_CODE (callee) != SYMBOL_REF)
+-	   || aarch64_is_noplt_call_p (callee)))
+
+    if (GET_CODE (callee) == SYMBOL_REF
+	? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
+	: !REG_P (callee))
+       XEXP (operands[1], 0) = force_reg (Pmode, callee);
+ 
+     if (operands[3] == NULL_RTX)
+diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+--- a/gcc/config/aarch64/aarch64.opt	2018-11-06 10:43:27.874079389 +0800
+++ b/gcc/config/aarch64/aarch64.opt	2018-11-06 10:44:34.934081154 +0800
+@@ -80,6 +80,10 @@ mlittle-endian
+ Target Report RejectNegative InverseMask(BIG_END)
+ Assume target CPU is configured as little endian.
+ 
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Generate call insns as indirect calls, if necessary.
+
+ mcmodel=
+ Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save
+ Specify the code model.
+diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
+--- a/gcc/config/aarch64/predicates.md	2018-11-06 10:43:27.878079389 +0800
+++ b/gcc/config/aarch64/predicates.md	2018-11-06 10:44:34.938081154 +0800
+@@ -27,8 +27,9 @@
+ )
+ 
+ (define_predicate "aarch64_call_insn_operand"
+-  (ior (match_code "symbol_ref")
+-       (match_operand 0 "register_operand")))
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!aarch64_is_long_call_p (op)"))
+	    (match_operand 0 "register_operand")))
+ 
+ ;; Return true if OP a (const_int 0) operand.
+ (define_predicate "const0_operand"
--- a/sanitizer-pr-85835.patch
+++ b/sanitizer-pr-85835.patch
@ -0,0 +1,33 @@
+diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
+index 858bb21..de18e56 100644 (file)
+--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
+@@ -157,7 +157,6 @@ typedef struct user_fpregs elf_fpregset_t;
+ # include <sys/procfs.h>
+ #endif
+ #include <sys/user.h>
+-#include <sys/ustat.h>
+ #include <linux/cyclades.h>
+ #include <linux/if_eql.h>
+ #include <linux/if_plip.h>
+@@ -250,7 +249,19 @@ namespace __sanitizer {
+ #endif // SANITIZER_LINUX || SANITIZER_FREEBSD
+ 
+ #if SANITIZER_LINUX && !SANITIZER_ANDROID
+-  unsigned struct_ustat_sz = sizeof(struct ustat);
+  // Use pre-computed size of struct ustat to avoid <sys/ustat.h> which
+  // has been removed from glibc 2.28.
+#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
+  || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
+  || defined(__x86_64__)
+#define SIZEOF_STRUCT_USTAT 32
+#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
+  || defined(__powerpc__) || defined(__s390__)
+#define SIZEOF_STRUCT_USTAT 20
+#else
+#error Unknown size of struct ustat
+#endif
+  unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
+   unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
+   unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
+ #endif // SANITIZER_LINUX && !SANITIZER_ANDROID
--- a/try-unroll.patch
+++ b/try-unroll.patch
@ -0,0 +1,11 @@
+--- a/gcc/tree-ssa-loop-ivcanon.c	2018-12-06 05:05:43.841181211 +0800
+++ b/gcc/tree-ssa-loop-ivcanon.c	2018-12-06 05:03:17.545185153 +0800
+@@ -726,7 +726,7 @@ try_unroll_loop_completely (struct loop
+       edge_to_cancel = NULL;
+     }
+ 
+-  if (!n_unroll_found)
+  if (!n_unroll_found || SCEV_NOT_KNOWN == TREE_CODE (niter))
+     return false;
+ 
+   if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
--- a/turn-on-funwind-tables-by-default.patch
+++ b/turn-on-funwind-tables-by-default.patch
@ -0,0 +1,25 @@
+diff -N -urp a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
+--- a/gcc/common/config/aarch64/aarch64-common.c	2019-07-02 09:28:49.798701181 +0800
+++ b/gcc/common/config/aarch64/aarch64-common.c	2019-07-02 09:30:15.436282799 +0800
+@@ -51,6 +51,10 @@ static const struct default_options aarc
+     { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+     /* Enable redundant extension instructions removal at -O2 and higher.  */
+     { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
+#if (TARGET_DEFAULT_ASYNC_UNWIND_TABLES == 1)
+    { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
+    { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
+#endif
+     { OPT_LEVELS_NONE, 0, NULL, 0 }
+   };
+ 
+diff -N -urp a/gcc/config.gcc b/gcc/config.gcc
+--- a/gcc/config.gcc	2019-07-02 09:28:50.114701170 +0800
+++ b/gcc/config.gcc	2019-07-02 09:31:50.636196118 +0800
+@@ -966,6 +966,7 @@ aarch64*-*-linux*)
+ 	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
+ 	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
+ 	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
+	tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
+ 	case $target in
+ 	aarch64_be-*)
+ 		tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"