357 lines
11 KiB
Diff
357 lines
11 KiB
Diff
|
|
This backport contains 1 patch from gcc main stream tree.
|
||
|
|
The commit id of these patchs list as following in the order of time.
|
||
|
|
|
||
|
|
0001-SLP-SLP-vectorization-vectorize-vector-constructors.patch
|
||
|
|
818b3293f4545d899148810f4f7d676b81e989dd
|
||
|
|
|
||
|
|
diff -N -urp a/gcc/expr.c b/gcc/expr.c
|
||
|
|
--- a/gcc/expr.c 2020-07-24 11:19:53.840000000 +0800
|
||
|
|
+++ b/gcc/expr.c 2020-07-24 11:56:50.128000000 +0800
|
||
|
|
@@ -6788,6 +6788,7 @@ store_constructor (tree exp, rtx target,
|
||
|
|
&& n_elts.is_constant (&const_n_elts))
|
||
|
|
{
|
||
|
|
machine_mode emode = eltmode;
|
||
|
|
+ bool vector_typed_elts_p = false;
|
||
|
|
|
||
|
|
if (CONSTRUCTOR_NELTS (exp)
|
||
|
|
&& (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value))
|
||
|
|
@@ -6798,13 +6799,14 @@ store_constructor (tree exp, rtx target,
|
||
|
|
* TYPE_VECTOR_SUBPARTS (etype),
|
||
|
|
n_elts));
|
||
|
|
emode = TYPE_MODE (etype);
|
||
|
|
+ vector_typed_elts_p = true;
|
||
|
|
}
|
||
|
|
icode = convert_optab_handler (vec_init_optab, mode, emode);
|
||
|
|
if (icode != CODE_FOR_nothing)
|
||
|
|
{
|
||
|
|
unsigned int i, n = const_n_elts;
|
||
|
|
|
||
|
|
- if (emode != eltmode)
|
||
|
|
+ if (vector_typed_elts_p)
|
||
|
|
{
|
||
|
|
n = CONSTRUCTOR_NELTS (exp);
|
||
|
|
vec_vec_init_p = true;
|
||
|
|
diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c
|
||
|
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-07-24 11:56:50.128000000 +0800
|
||
|
|
@@ -0,0 +1,34 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O3 -fdump-tree-slp-all" } */
|
||
|
|
+/* { dg-require-effective-target vect_int } */
|
||
|
|
+
|
||
|
|
+char g_d[1024], g_s1[1024], g_s2[1024];
|
||
|
|
+void foo(void)
|
||
|
|
+{
|
||
|
|
+ char *d = g_d, *s1 = g_s1, *s2 = g_s2;
|
||
|
|
+
|
||
|
|
+ for ( int y = 0; y < 128; y++ )
|
||
|
|
+ {
|
||
|
|
+ d[0 ] = s1[0 ] + s2[0 ];
|
||
|
|
+ d[1 ] = s1[1 ] + s2[1 ];
|
||
|
|
+ d[2 ] = s1[2 ] + s2[2 ];
|
||
|
|
+ d[3 ] = s1[3 ] + s2[3 ];
|
||
|
|
+ d[4 ] = s1[4 ] + s2[4 ];
|
||
|
|
+ d[5 ] = s1[5 ] + s2[5 ];
|
||
|
|
+ d[6 ] = s1[6 ] + s2[6 ];
|
||
|
|
+ d[7 ] = s1[7 ] + s2[7 ];
|
||
|
|
+ d[8 ] = s1[8 ] + s2[8 ];
|
||
|
|
+ d[9 ] = s1[9 ] + s2[9 ];
|
||
|
|
+ d[10] = s1[10] + s2[10];
|
||
|
|
+ d[11] = s1[11] + s2[11];
|
||
|
|
+ d[12] = s1[12] + s2[12];
|
||
|
|
+ d[13] = s1[13] + s2[13];
|
||
|
|
+ d[14] = s1[14] + s2[14];
|
||
|
|
+ d[15] = s1[15] + s2[15];
|
||
|
|
+ d += 16;
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* See that we vectorize an SLP instance. */
|
||
|
|
+/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */
|
||
|
|
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */
|
||
|
|
diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
|
||
|
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2020-07-24 11:56:50.128000000 +0800
|
||
|
|
@@ -0,0 +1,61 @@
|
||
|
|
+/* { dg-do run } */
|
||
|
|
+/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
|
||
|
|
+/* { dg-require-effective-target vect_int } */
|
||
|
|
+
|
||
|
|
+#define ARR_SIZE 1000
|
||
|
|
+
|
||
|
|
+void foo (int *a, int *b)
|
||
|
|
+{
|
||
|
|
+ int i;
|
||
|
|
+ for (i = 0; i < (ARR_SIZE - 2); ++i)
|
||
|
|
+ a[i] = b[0] + b[1] + b[i+1] + b[i+2];
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void bar (int *a, int *b)
|
||
|
|
+{
|
||
|
|
+ int i;
|
||
|
|
+ for (i = 0; i < (ARR_SIZE - 2); ++i)
|
||
|
|
+ {
|
||
|
|
+ a[i] = b[0];
|
||
|
|
+ }
|
||
|
|
+ for (i = 0; i < (ARR_SIZE - 2); ++i)
|
||
|
|
+ {
|
||
|
|
+ a[i] = a[i] + b[1];
|
||
|
|
+ }
|
||
|
|
+ for (i = 0; i < (ARR_SIZE - 2); ++i)
|
||
|
|
+ {
|
||
|
|
+ a[i] = a[i] + b[i+1];
|
||
|
|
+ }
|
||
|
|
+ for (i = 0; i < (ARR_SIZE - 2); ++i)
|
||
|
|
+ {
|
||
|
|
+ a[i] = a[i] + b[i+2];
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int main ()
|
||
|
|
+{
|
||
|
|
+ int a1[ARR_SIZE];
|
||
|
|
+ int a2[ARR_SIZE];
|
||
|
|
+ int b[ARR_SIZE];
|
||
|
|
+ int i;
|
||
|
|
+
|
||
|
|
+ for (i = 0; i < ARR_SIZE; i++)
|
||
|
|
+ {
|
||
|
|
+ a1[i] = 0;
|
||
|
|
+ a2[i] = 0;
|
||
|
|
+ b[i] = i;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ foo (a1, b);
|
||
|
|
+ bar (a2, b);
|
||
|
|
+
|
||
|
|
+ for (i = 0; i < ARR_SIZE; i++)
|
||
|
|
+ if (a1[i] != a2[i])
|
||
|
|
+ return 1;
|
||
|
|
+
|
||
|
|
+ return 0;
|
||
|
|
+
|
||
|
|
+}
|
||
|
|
+/* See that we vectorize an SLP instance. */
|
||
|
|
+/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */
|
||
|
|
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */
|
||
|
|
diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
|
||
|
|
--- a/gcc/tree-vectorizer.h 2020-07-24 11:19:51.976000000 +0800
|
||
|
|
+++ b/gcc/tree-vectorizer.h 2020-07-24 11:56:50.132000000 +0800
|
||
|
|
@@ -151,6 +151,10 @@ typedef struct _slp_instance {
|
||
|
|
/* The root of SLP tree. */
|
||
|
|
slp_tree root;
|
||
|
|
|
||
|
|
+ /* For vector constructors, the constructor stmt that the SLP tree is built
|
||
|
|
+ from, NULL otherwise. */
|
||
|
|
+ stmt_vec_info root_stmt;
|
||
|
|
+
|
||
|
|
/* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */
|
||
|
|
unsigned int group_size;
|
||
|
|
|
||
|
|
@@ -170,6 +174,7 @@ typedef struct _slp_instance {
|
||
|
|
#define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size
|
||
|
|
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
|
||
|
|
#define SLP_INSTANCE_LOADS(S) (S)->loads
|
||
|
|
+#define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt
|
||
|
|
|
||
|
|
#define SLP_TREE_CHILDREN(S) (S)->children
|
||
|
|
#define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
|
||
|
|
diff -N -urp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
||
|
|
--- a/gcc/tree-vect-slp.c 2020-07-24 11:19:51.980000000 +0800
|
||
|
|
+++ b/gcc/tree-vect-slp.c 2020-07-24 11:56:50.132000000 +0800
|
||
|
|
@@ -2019,6 +2019,7 @@ vect_analyze_slp_instance (vec_info *vin
|
||
|
|
unsigned int i;
|
||
|
|
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
|
||
|
|
vec<stmt_vec_info> scalar_stmts;
|
||
|
|
+ bool constructor = false;
|
||
|
|
|
||
|
|
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||
|
|
{
|
||
|
|
@@ -2032,6 +2033,13 @@ vect_analyze_slp_instance (vec_info *vin
|
||
|
|
vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||
|
|
group_size = REDUC_GROUP_SIZE (stmt_info);
|
||
|
|
}
|
||
|
|
+ else if (is_gimple_assign (stmt_info->stmt)
|
||
|
|
+ && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
|
||
|
|
+ {
|
||
|
|
+ vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
|
||
|
|
+ group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
|
||
|
|
+ constructor = true;
|
||
|
|
+ }
|
||
|
|
else
|
||
|
|
{
|
||
|
|
gcc_assert (is_a <loop_vec_info> (vinfo));
|
||
|
|
@@ -2079,6 +2087,25 @@ vect_analyze_slp_instance (vec_info *vin
|
||
|
|
STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
|
||
|
|
= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
|
||
|
|
}
|
||
|
|
+ else if (constructor)
|
||
|
|
+ {
|
||
|
|
+ tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
|
||
|
|
+ tree val;
|
||
|
|
+ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
|
||
|
|
+ {
|
||
|
|
+ if (TREE_CODE (val) == SSA_NAME)
|
||
|
|
+ {
|
||
|
|
+ gimple* def = SSA_NAME_DEF_STMT (val);
|
||
|
|
+ stmt_vec_info def_info = vinfo->lookup_stmt (def);
|
||
|
|
+ /* Value is defined in another basic block. */
|
||
|
|
+ if (!def_info)
|
||
|
|
+ return false;
|
||
|
|
+ scalar_stmts.safe_push (def_info);
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
else
|
||
|
|
{
|
||
|
|
/* Collect reduction statements. */
|
||
|
|
@@ -2164,6 +2191,8 @@ vect_analyze_slp_instance (vec_info *vin
|
||
|
|
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
|
||
|
|
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
||
|
|
SLP_INSTANCE_LOADS (new_instance) = vNULL;
|
||
|
|
+ SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
|
||
|
|
+
|
||
|
|
vect_gather_slp_loads (new_instance, node);
|
||
|
|
if (dump_enabled_p ())
|
||
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||
|
|
@@ -3032,6 +3061,43 @@ vect_bb_vectorization_profitable_p (bb_v
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Find any vectorizable constructors and add them to the grouped_store
|
||
|
|
+ array. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
|
||
|
|
+{
|
||
|
|
+ gimple_stmt_iterator gsi;
|
||
|
|
+
|
||
|
|
+ for (gsi = bb_vinfo->region_begin;
|
||
|
|
+ gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
|
||
|
|
+ {
|
||
|
|
+ gimple *stmt = gsi_stmt (gsi);
|
||
|
|
+
|
||
|
|
+ if (is_gimple_assign (stmt)
|
||
|
|
+ && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
|
||
|
|
+ && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
|
||
|
|
+ && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
|
||
|
|
+ {
|
||
|
|
+ tree rhs = gimple_assign_rhs1 (stmt);
|
||
|
|
+
|
||
|
|
+ if (CONSTRUCTOR_NELTS (rhs) == 0)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
|
||
|
|
+
|
||
|
|
+ if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (dump_enabled_p ())
|
||
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
||
|
|
+ "Found vectorizable constructor: %G\n", stmt);
|
||
|
|
+ stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
|
||
|
|
+ BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Check if the region described by BB_VINFO can be vectorized, returning
|
||
|
|
true if so. When returning false, set FATAL to true if the same failure
|
||
|
|
would prevent vectorization at other vector sizes, false if it is still
|
||
|
|
@@ -3079,6 +3145,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
+ vect_slp_check_for_constructors (bb_vinfo);
|
||
|
|
+
|
||
|
|
/* If there are no grouped stores in the region there is no need
|
||
|
|
to continue with pattern recog as vect_analyze_slp will fail
|
||
|
|
anyway. */
|
||
|
|
@@ -3135,6 +3203,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
|
||
|
|
relevant. */
|
||
|
|
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
|
||
|
|
vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
|
||
|
|
+ if (SLP_INSTANCE_ROOT_STMT (instance))
|
||
|
|
+ STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
|
||
|
|
|
||
|
|
i++;
|
||
|
|
}
|
||
|
|
@@ -4175,6 +4245,49 @@ vect_remove_slp_scalar_calls (slp_tree n
|
||
|
|
vect_remove_slp_scalar_calls (node, visited);
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Vectorize the instance root. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
|
||
|
|
+{
|
||
|
|
+ gassign *rstmt;
|
||
|
|
+
|
||
|
|
+ if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
|
||
|
|
+ {
|
||
|
|
+ stmt_vec_info child_stmt_info;
|
||
|
|
+ int j;
|
||
|
|
+
|
||
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
|
||
|
|
+ {
|
||
|
|
+ tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
|
||
|
|
+ tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
|
||
|
|
+ rstmt = gimple_build_assign (root_lhs, vect_lhs);
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
|
||
|
|
+ {
|
||
|
|
+ int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
|
||
|
|
+ stmt_vec_info child_stmt_info;
|
||
|
|
+ int j;
|
||
|
|
+ vec<constructor_elt, va_gc> *v;
|
||
|
|
+ vec_alloc (v, nelts);
|
||
|
|
+
|
||
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
|
||
|
|
+ {
|
||
|
|
+ CONSTRUCTOR_APPEND_ELT (v,
|
||
|
|
+ NULL_TREE,
|
||
|
|
+ gimple_get_lhs (child_stmt_info->stmt));
|
||
|
|
+ }
|
||
|
|
+ tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
|
||
|
|
+ tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
|
||
|
|
+ tree r_constructor = build_constructor (rtype, v);
|
||
|
|
+ rstmt = gimple_build_assign (lhs, r_constructor);
|
||
|
|
+ }
|
||
|
|
+ gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
|
||
|
|
+ gsi_replace (&rgsi, rstmt, true);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Generate vector code for all SLP instances in the loop/basic block. */
|
||
|
|
|
||
|
|
void
|
||
|
|
@@ -4189,9 +4302,13 @@ vect_schedule_slp (vec_info *vinfo)
|
||
|
|
slp_instances = vinfo->slp_instances;
|
||
|
|
FOR_EACH_VEC_ELT (slp_instances, i, instance)
|
||
|
|
{
|
||
|
|
+ slp_tree node = SLP_INSTANCE_TREE (instance);
|
||
|
|
/* Schedule the tree of INSTANCE. */
|
||
|
|
- vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
|
||
|
|
- instance, bst_map);
|
||
|
|
+ vect_schedule_slp_instance (node, instance, bst_map);
|
||
|
|
+
|
||
|
|
+ if (SLP_INSTANCE_ROOT_STMT (instance))
|
||
|
|
+ vectorize_slp_instance_root_stmt (node, instance);
|
||
|
|
+
|
||
|
|
if (dump_enabled_p ())
|
||
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||
|
|
"vectorizing stmts using SLP.\n");
|
||
|
|
@@ -4220,6 +4337,9 @@ vect_schedule_slp (vec_info *vinfo)
|
||
|
|
if (!STMT_VINFO_DATA_REF (store_info))
|
||
|
|
break;
|
||
|
|
|
||
|
|
+ if (SLP_INSTANCE_ROOT_STMT (instance))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
store_info = vect_orig_stmt (store_info);
|
||
|
|
/* Free the attached stmt_vec_info and remove the stmt. */
|
||
|
|
vinfo->remove_stmt (store_info);
|