This backport contains 2 patchs from gcc main stream tree. The commit id of these patchs list as following in the order of time. 4bf29d15f2e01348a45a1f4e1a135962f123fdd6 0001-AArch64-PR79262-Adjust-vector-cost.patch 27071013521b015d17a2666448f27a6ff0c55aca 0001-Move-EXTRACT_LAST_REDUCTION-costing-to-vectorizable_.patch diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c --- a/gcc/config/aarch64/aarch64.c 2020-11-20 04:36:33.988000000 +0800 +++ b/gcc/config/aarch64/aarch64.c 2020-11-20 04:32:20.984000000 +0800 @@ -448,7 +448,7 @@ static const struct cpu_vector_cost gene 1, /* vec_int_stmt_cost */ 1, /* vec_fp_stmt_cost */ 2, /* vec_permute_cost */ - 1, /* vec_to_scalar_cost */ + 2, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* vec_align_load_cost */ 1, /* vec_unalign_load_cost */ diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c --- a/gcc/tree-vect-loop.c 2020-11-20 04:36:34.016000000 +0800 +++ b/gcc/tree-vect-loop.c 2020-11-20 04:32:20.984000000 +0800 @@ -3926,8 +3926,11 @@ vect_model_reduction_cost (stmt_vec_info code = gimple_assign_rhs_code (orig_stmt_info->stmt); - if (reduction_type == EXTRACT_LAST_REDUCTION - || reduction_type == FOLD_LEFT_REDUCTION) + if (reduction_type == EXTRACT_LAST_REDUCTION) + /* No extra instructions are needed in the prologue. The loop body + operations are costed in vectorizable_condition. */ + inside_cost = 0; + else if (reduction_type == FOLD_LEFT_REDUCTION) { /* No extra instructions needed in the prologue. */ prologue_cost = 0; diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c --- a/gcc/tree-vect-stmts.c 2020-11-20 04:36:33.996000000 +0800 +++ b/gcc/tree-vect-stmts.c 2020-11-20 04:32:20.984000000 +0800 @@ -859,7 +859,8 @@ vect_model_simple_cost (stmt_vec_info st enum vect_def_type *dt, int ndts, slp_tree node, - stmt_vector_for_cost *cost_vec) + stmt_vector_for_cost *cost_vec, + vect_cost_for_stmt kind = vector_stmt) { int inside_cost = 0, prologue_cost = 0; @@ -906,7 +907,7 @@ vect_model_simple_cost (stmt_vec_info st } /* Pass the inside-of-loop statements to the target-specific cost model. */ - inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt, + inside_cost += record_stmt_cost (cost_vec, ncopies, kind, stmt_info, 0, vect_body); if (dump_enabled_p ()) @@ -9194,15 +9195,18 @@ vectorizable_condition (stmt_vec_info st " EXTRACT_LAST_REDUCTION.\n"); LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; } - if (expand_vec_cond_expr_p (vectype, comp_vectype, - cond_code)) - { - STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; - vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, - cost_vec); - return true; - } - return false; + + vect_cost_for_stmt kind = vector_stmt; + if (reduction_type == EXTRACT_LAST_REDUCTION) + /* Count one reduction-like operation per vector. */ + kind = vec_to_scalar; + else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) + return false; + + STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; + vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, + cost_vec, kind); + return true; } /* Transform. */