This backport contains 1 patch from gcc main stream tree. The commit id of these patchs list as following in the order of time. 0001-SLP-SLP-vectorization-vectorize-vector-constructors.patch 818b3293f4545d899148810f4f7d676b81e989dd diff -N -urp a/gcc/expr.c b/gcc/expr.c --- a/gcc/expr.c 2020-07-24 11:19:53.840000000 +0800 +++ b/gcc/expr.c 2020-07-24 11:56:50.128000000 +0800 @@ -6788,6 +6788,7 @@ store_constructor (tree exp, rtx target, && n_elts.is_constant (&const_n_elts)) { machine_mode emode = eltmode; + bool vector_typed_elts_p = false; if (CONSTRUCTOR_NELTS (exp) && (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value)) @@ -6798,13 +6799,14 @@ store_constructor (tree exp, rtx target, * TYPE_VECTOR_SUBPARTS (etype), n_elts)); emode = TYPE_MODE (etype); + vector_typed_elts_p = true; } icode = convert_optab_handler (vec_init_optab, mode, emode); if (icode != CODE_FOR_nothing) { unsigned int i, n = const_n_elts; - if (emode != eltmode) + if (vector_typed_elts_p) { n = CONSTRUCTOR_NELTS (exp); vec_vec_init_p = true; diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c --- a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-07-24 11:56:50.128000000 +0800 @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-slp-all" } */ +/* { dg-require-effective-target vect_int } */ + +char g_d[1024], g_s1[1024], g_s2[1024]; +void foo(void) +{ + char *d = g_d, *s1 = g_s1, *s2 = g_s2; + + for ( int y = 0; y < 128; y++ ) + { + d[0 ] = s1[0 ] + s2[0 ]; + d[1 ] = s1[1 ] + s2[1 ]; + d[2 ] = s1[2 ] + s2[2 ]; + d[3 ] = s1[3 ] + s2[3 ]; + d[4 ] = s1[4 ] + s2[4 ]; + d[5 ] = s1[5 ] + s2[5 ]; + d[6 ] = s1[6 ] + s2[6 ]; + d[7 ] = s1[7 ] + s2[7 ]; + d[8 ] = s1[8 ] + s2[8 ]; + d[9 ] = s1[9 ] + s2[9 ]; + d[10] = s1[10] + s2[10]; + d[11] = s1[11] + s2[11]; + d[12] = s1[12] + s2[12]; + d[13] = s1[13] + s2[13]; + d[14] = s1[14] + s2[14]; + d[15] = s1[15] + s2[15]; + d += 16; + } +} + +/* See that we vectorize an SLP instance. */ +/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */ diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c --- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2020-07-24 11:56:50.128000000 +0800 @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */ +/* { dg-require-effective-target vect_int } */ + +#define ARR_SIZE 1000 + +void foo (int *a, int *b) +{ + int i; + for (i = 0; i < (ARR_SIZE - 2); ++i) + a[i] = b[0] + b[1] + b[i+1] + b[i+2]; +} + +void bar (int *a, int *b) +{ + int i; + for (i = 0; i < (ARR_SIZE - 2); ++i) + { + a[i] = b[0]; + } + for (i = 0; i < (ARR_SIZE - 2); ++i) + { + a[i] = a[i] + b[1]; + } + for (i = 0; i < (ARR_SIZE - 2); ++i) + { + a[i] = a[i] + b[i+1]; + } + for (i = 0; i < (ARR_SIZE - 2); ++i) + { + a[i] = a[i] + b[i+2]; + } +} + +int main () +{ + int a1[ARR_SIZE]; + int a2[ARR_SIZE]; + int b[ARR_SIZE]; + int i; + + for (i = 0; i < ARR_SIZE; i++) + { + a1[i] = 0; + a2[i] = 0; + b[i] = i; + } + + foo (a1, b); + bar (a2, b); + + for (i = 0; i < ARR_SIZE; i++) + if (a1[i] != a2[i]) + return 1; + + return 0; + +} +/* See that we vectorize an SLP instance. */ +/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */ diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h --- a/gcc/tree-vectorizer.h 2020-07-24 11:19:51.976000000 +0800 +++ b/gcc/tree-vectorizer.h 2020-07-24 11:56:50.132000000 +0800 @@ -151,6 +151,10 @@ typedef struct _slp_instance { /* The root of SLP tree. */ slp_tree root; + /* For vector constructors, the constructor stmt that the SLP tree is built + from, NULL otherwise. */ + stmt_vec_info root_stmt; + /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ unsigned int group_size; @@ -170,6 +174,7 @@ typedef struct _slp_instance { #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor #define SLP_INSTANCE_LOADS(S) (S)->loads +#define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt #define SLP_TREE_CHILDREN(S) (S)->children #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts diff -N -urp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c --- a/gcc/tree-vect-slp.c 2020-07-24 11:19:51.980000000 +0800 +++ b/gcc/tree-vect-slp.c 2020-07-24 11:56:50.132000000 +0800 @@ -2019,6 +2019,7 @@ vect_analyze_slp_instance (vec_info *vin unsigned int i; struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); vec scalar_stmts; + bool constructor = false; if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) { @@ -2032,6 +2033,13 @@ vect_analyze_slp_instance (vec_info *vin vectype = STMT_VINFO_VECTYPE (stmt_info); group_size = REDUC_GROUP_SIZE (stmt_info); } + else if (is_gimple_assign (stmt_info->stmt) + && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR) + { + vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)); + group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt)); + constructor = true; + } else { gcc_assert (is_a (vinfo)); @@ -2079,6 +2087,25 @@ vect_analyze_slp_instance (vec_info *vin STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); } + else if (constructor) + { + tree rhs = gimple_assign_rhs1 (stmt_info->stmt); + tree val; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val) + { + if (TREE_CODE (val) == SSA_NAME) + { + gimple* def = SSA_NAME_DEF_STMT (val); + stmt_vec_info def_info = vinfo->lookup_stmt (def); + /* Value is defined in another basic block. */ + if (!def_info) + return false; + scalar_stmts.safe_push (def_info); + } + else + return false; + } + } else { /* Collect reduction statements. */ @@ -2164,6 +2191,8 @@ vect_analyze_slp_instance (vec_info *vin SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_LOADS (new_instance) = vNULL; + SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL; + vect_gather_slp_loads (new_instance, node); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3032,6 +3061,43 @@ vect_bb_vectorization_profitable_p (bb_v return true; } +/* Find any vectorizable constructors and add them to the grouped_store + array. */ + +static void +vect_slp_check_for_constructors (bb_vec_info bb_vinfo) +{ + gimple_stmt_iterator gsi; + + for (gsi = bb_vinfo->region_begin; + gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (is_gimple_assign (stmt) + && gimple_assign_rhs_code (stmt) == CONSTRUCTOR + && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME + && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE) + { + tree rhs = gimple_assign_rhs1 (stmt); + + if (CONSTRUCTOR_NELTS (rhs) == 0) + continue; + + poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)); + + if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs))) + continue; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Found vectorizable constructor: %G\n", stmt); + stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt); + BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info); + } + } +} + /* Check if the region described by BB_VINFO can be vectorized, returning true if so. When returning false, set FATAL to true if the same failure would prevent vectorization at other vector sizes, false if it is still @@ -3079,6 +3145,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi return false; } + vect_slp_check_for_constructors (bb_vinfo); + /* If there are no grouped stores in the region there is no need to continue with pattern recog as vect_analyze_slp will fail anyway. */ @@ -3135,6 +3203,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi relevant. */ vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance)); vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance)); + if (SLP_INSTANCE_ROOT_STMT (instance)) + STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp; i++; } @@ -4175,6 +4245,49 @@ vect_remove_slp_scalar_calls (slp_tree n vect_remove_slp_scalar_calls (node, visited); } +/* Vectorize the instance root. */ + +void +vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) +{ + gassign *rstmt; + + if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1) + { + stmt_vec_info child_stmt_info; + int j; + + FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info) + { + tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt); + tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt); + rstmt = gimple_build_assign (root_lhs, vect_lhs); + break; + } + } + else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1) + { + int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node); + stmt_vec_info child_stmt_info; + int j; + vec *v; + vec_alloc (v, nelts); + + FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info) + { + CONSTRUCTOR_APPEND_ELT (v, + NULL_TREE, + gimple_get_lhs (child_stmt_info->stmt)); + } + tree lhs = gimple_get_lhs (instance->root_stmt->stmt); + tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt)); + tree r_constructor = build_constructor (rtype, v); + rstmt = gimple_build_assign (lhs, r_constructor); + } + gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt); + gsi_replace (&rgsi, rstmt, true); +} + /* Generate vector code for all SLP instances in the loop/basic block. */ void @@ -4189,9 +4302,13 @@ vect_schedule_slp (vec_info *vinfo) slp_instances = vinfo->slp_instances; FOR_EACH_VEC_ELT (slp_instances, i, instance) { + slp_tree node = SLP_INSTANCE_TREE (instance); /* Schedule the tree of INSTANCE. */ - vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), - instance, bst_map); + vect_schedule_slp_instance (node, instance, bst_map); + + if (SLP_INSTANCE_ROOT_STMT (instance)) + vectorize_slp_instance_root_stmt (node, instance); + if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizing stmts using SLP.\n"); @@ -4220,6 +4337,9 @@ vect_schedule_slp (vec_info *vinfo) if (!STMT_VINFO_DATA_REF (store_info)) break; + if (SLP_INSTANCE_ROOT_STMT (instance)) + continue; + store_info = vect_orig_stmt (store_info); /* Free the attached stmt_vec_info and remove the stmt. */ vinfo->remove_stmt (store_info);