This backport contains 1 patchs from gcc main stream tree. The commit id of these patchs list as following in the order of time. 46a58c779af3055a4b10b285a1f4be28abe4351c 0001-tree-optimization-96920-another-ICE-when-vectorizing.patch diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96920.c b/gcc/testsuite/gcc.dg/vect/pr96920.c --- a/gcc/testsuite/gcc.dg/vect/pr96920.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/vect/pr96920.c 2020-10-26 21:46:25.316000000 +0800 @@ -0,0 +1,20 @@ +/* { dg-do compile } */ + +int a[1024]; +int b[2048]; + +void foo (int x, int y) +{ + for (int i = 0; i < 1024; ++i) + { + int tem0 = b[2*i]; + int tem1 = b[2*i+1]; + for (int j = 0; j < 32; ++j) + { + int tem = tem0; + tem0 = tem1; + tem1 = tem; + a[i] += tem0; + } + } +} diff -uprN a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 b/gcc/testsuite/gfortran.dg/vect/pr96920.f90 --- a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gfortran.dg/vect/pr96920.f90 2020-10-26 21:46:25.316000000 +0800 @@ -0,0 +1,37 @@ +! { dg-do compile } + subroutine ice(npoint, nterm, x, g) + implicit none + integer norder + parameter (norder=10) + integer j + integer k + integer ii + integer nterm + integer npoint + real b(norder) + real c(norder) + real d(norder) + real x(npoint) + real g(npoint) + real gg + real prev + real prev2 + + j = 1 + 100 continue + j = j+1 + if (nterm == j) then + do ii=1,npoint + k = nterm + gg= d(k) + prev= 0.0 + do k=k-1,1,-1 + prev2= prev + prev= gg + gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2 + enddo + g(ii) = gg + enddo + endif + go to 100 + end diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c --- a/gcc/tree-vect-loop.c 2020-10-26 21:45:23.056000000 +0800 +++ b/gcc/tree-vect-loop.c 2020-10-26 21:49:02.884000000 +0800 @@ -8166,6 +8166,47 @@ scale_profile_for_vect_loop (struct loop scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob); } +/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI + latch edge values originally defined by it. */ + +static void +maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo, + stmt_vec_info def_stmt_info) +{ + tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt); + if (!def || TREE_CODE (def) != SSA_NAME) + return; + stmt_vec_info phi_info; + imm_use_iterator iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, iter, def) + if (gphi *phi = dyn_cast (USE_STMT (use_p))) + if (gimple_bb (phi)->loop_father->header == gimple_bb (phi) + && (phi_info = loop_vinfo->lookup_stmt (phi)) + && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info)) + && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION + && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION) + { + loop_p loop = gimple_bb (phi)->loop_father; + edge e = loop_latch_edge (loop); + if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def) + { + stmt_vec_info phi_defs = STMT_VINFO_VEC_STMT (phi_info); + stmt_vec_info latch_defs = STMT_VINFO_VEC_STMT (def_stmt_info); + while (phi_defs && latch_defs) + { + add_phi_arg (as_a (phi_defs->stmt), + gimple_get_lhs (latch_defs->stmt), e, + gimple_phi_arg_location (phi, e->dest_idx)); + phi_defs = STMT_VINFO_RELATED_STMT (phi_defs); + latch_defs = STMT_VINFO_RELATED_STMT (latch_defs); + } + gcc_assert (!latch_defs); + gcc_assert (!phi_defs); + } + } +} + /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI. When vectorizing STMT_INFO as a store, set *SEEN_STORE to its stmt_vec_info. */ @@ -8533,7 +8574,7 @@ vect_transform_loop (loop_vec_info loop_ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) - { + { gphi *phi = si.phi (); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -8568,6 +8609,27 @@ vect_transform_loop (loop_vec_info loop_ } } + for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); + gsi_next (&si)) + { + gphi *phi = si.phi (); + stmt_info = loop_vinfo->lookup_stmt (phi); + if (!stmt_info) + continue; + + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + continue; + + if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) + && ! PURE_SLP_STMT (stmt_info)) + maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info); + } + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);) { @@ -8604,9 +8666,16 @@ vect_transform_loop (loop_vec_info loop_ = STMT_VINFO_RELATED_STMT (stmt_info); vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si, &seen_store); + maybe_set_vectorized_backedge_value (loop_vinfo, + pat_stmt_info); + } + else + { + vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, + &seen_store); + maybe_set_vectorized_backedge_value (loop_vinfo, + stmt_info); } - vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, - &seen_store); } gsi_next (&si); if (seen_store) @@ -8623,43 +8692,6 @@ vect_transform_loop (loop_vec_info loop_ } } - /* Fill in backedge defs of reductions. */ - for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i) - { - stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i]; - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - stmt_vec_info phi_info - = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info)); - stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); - gphi *phi - = dyn_cast (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt); - edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father); - do - { - add_phi_arg (as_a (phi_info->stmt), - gimple_get_lhs (vec_stmt->stmt), e, - gimple_phi_arg_location (phi, e->dest_idx)); - phi_info = STMT_VINFO_RELATED_STMT (phi_info); - vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt); - } - while (phi_info); - gcc_assert (!vec_stmt); - } - for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i) - { - slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first; - slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second; - gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); - e = loop_latch_edge (gimple_bb (phi)->loop_father); - gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () - == SLP_TREE_VEC_STMTS (slp_node).length ()); - for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j) - add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt), - gimple_get_lhs - (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt), - e, gimple_phi_arg_location (phi, e->dest_idx)); - } - /* Stub out scalar statements that must not survive vectorization. Doing this here helps with grouped statements, or statements that are involved in patterns. */ diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h --- a/gcc/tree-vectorizer.h 2020-10-26 21:45:23.052000000 +0800 +++ b/gcc/tree-vectorizer.h 2020-10-26 21:46:25.316000000 +0800 @@ -575,11 +575,6 @@ typedef struct _loop_vec_info : public v stmt in the chain. */ auto_vec reduction_chains; - /* The vectorized stmts defining the latch values of the reduction - they are involved with. */ - auto_vec reduc_latch_defs; - auto_vec > reduc_latch_slp_defs; - /* Cost vector for a single scalar iteration. */ auto_vec scalar_cost_vec; diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c --- a/gcc/tree-vect-slp.c 2020-10-26 21:45:23.052000000 +0800 +++ b/gcc/tree-vect-slp.c 2020-10-26 21:46:25.320000000 +0800 @@ -2189,6 +2189,7 @@ vect_analyze_slp_instance (vec_info *vin SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_LOADS (new_instance) = vNULL; SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL; + new_instance->reduc_phis = NULL; vect_gather_slp_loads (new_instance, node); if (dump_enabled_p ()) @@ -4282,6 +4283,26 @@ vect_schedule_slp (vec_info *vinfo) stmt_vec_info store_info; unsigned int j; + /* For reductions set the latch values of the vectorized PHIs. */ + if (instance->reduc_phis + && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS + (instance->reduc_phis)[0]) != FOLD_LEFT_REDUCTION + && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS + (instance->reduc_phis)[0]) != EXTRACT_LAST_REDUCTION) + { + slp_tree slp_node = root; + slp_tree phi_node = instance->reduc_phis; + gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); + edge e = loop_latch_edge (gimple_bb (phi)->loop_father); + gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () + == SLP_TREE_VEC_STMTS (slp_node).length ()); + for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j) + add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt), + gimple_get_lhs + (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt), + e, gimple_phi_arg_location (phi, e->dest_idx)); + } + /* Remove scalar call stmts. Do not do this for basic-block vectorization as not all uses may be vectorized. ??? Why should this be necessary? DCE should be able to diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c --- a/gcc/tree-vect-stmts.c 2020-10-26 21:45:23.012000000 +0800 +++ b/gcc/tree-vect-stmts.c 2020-10-26 21:46:25.320000000 +0800 @@ -10229,37 +10229,6 @@ vect_transform_stmt (stmt_vec_info stmt_ if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) return is_store; - /* If this stmt defines a value used on a backedge, record it so - we can update the vectorized PHIs later. */ - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - stmt_vec_info reduc_info; - if (STMT_VINFO_REDUC_DEF (orig_stmt_info) - && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info - && (reduc_info = info_for_reduction (orig_stmt_info)) - && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION - && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION) - { - gphi *phi; - edge e; - if (!slp_node - && (phi = dyn_cast - (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt)) - && dominated_by_p (CDI_DOMINATORS, - gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)) - && (e = loop_latch_edge (gimple_bb (phi)->loop_father)) - && (PHI_ARG_DEF_FROM_EDGE (phi, e) - == gimple_get_lhs (orig_stmt_info->stmt))) - { - as_a (vinfo)->reduc_latch_defs.safe_push (stmt_info); - } - else if (slp_node - && slp_node != slp_node_instance->reduc_phis) - { - as_a (vinfo)->reduc_latch_slp_defs.safe_push - (std::make_pair (slp_node, slp_node_instance->reduc_phis)); - } - } - /* Handle stmts whose DEF is used outside the loop-nest that is being vectorized. */ done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,