Sync patch from openeuler/gcc - 20221201 (cherry picked from commit 5487e8942c694fd317f2cbf1662e9eaf33f2f612)
268 lines
7.6 KiB
Diff
268 lines
7.6 KiB
Diff
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
|
|
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
Date: Mon, 8 Aug 2022 09:13:53 +0800
|
|
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
|
|
loops For vectorized stores in loop, if all succeed loops immediately use the
|
|
data, transfer data using registers instead of load store to prevent overhead
|
|
from memory access.
|
|
|
|
---
|
|
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
|
|
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
|
|
2 files changed, 226 insertions(+)
|
|
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
|
new file mode 100644
|
|
index 000000000..d8b29fbd5
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
|
@@ -0,0 +1,45 @@
|
|
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
|
|
+
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+
|
|
+static unsigned inline abs2 (unsigned a)
|
|
+{
|
|
+ unsigned s = ((a>>15)&0x10001)*0xffff;
|
|
+ return (a+s)^s;
|
|
+}
|
|
+
|
|
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
|
|
+{
|
|
+ unsigned tmp[4][4];
|
|
+ unsigned a0, a1, a2, a3;
|
|
+ int sum = 0;
|
|
+ for (int i = 0; i < 4; i++)
|
|
+ {
|
|
+ int t0 = a00[i] + a11[i];
|
|
+ int t1 = a00[i] - a11[i];
|
|
+ int t2 = a22[i] + a33[i];
|
|
+ int t3 = a22[i] - a33[i];
|
|
+ tmp[i][0] = t0 + t2;
|
|
+ tmp[i][2] = t0 - t2;
|
|
+ tmp[i][1] = t1 + t3;
|
|
+ tmp[i][3] = t1 - t3;
|
|
+ }
|
|
+ for (int i = 0; i < 4; i++)
|
|
+ {
|
|
+ int t0 = tmp[0][i] + tmp[1][i];
|
|
+ int t1 = tmp[0][i] - tmp[1][i];
|
|
+ int t2 = tmp[2][i] + tmp[3][i];
|
|
+ int t3 = tmp[2][i] - tmp[3][i];
|
|
+ a0 = t0 + t2;
|
|
+ a2 = t0 - t2;
|
|
+ a1 = t1 + t3;
|
|
+ a3 = t1 - t3;
|
|
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
|
|
+ }
|
|
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
|
|
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
|
index 2c2197022..98b233718 100644
|
|
--- a/gcc/tree-vect-stmts.c
|
|
+++ b/gcc/tree-vect-stmts.c
|
|
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
|
|
return NULL_TREE;
|
|
}
|
|
|
|
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
|
|
+ BB in DFS. */
|
|
+
|
|
+static unsigned
|
|
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
|
|
+{
|
|
+ unsigned num = 0;
|
|
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
|
|
+ !gsi_end_p (gsi); gsi_next (&gsi))
|
|
+ {
|
|
+ gimple *stmt = gsi_stmt (gsi);
|
|
+ if (is_gimple_debug (stmt))
|
|
+ continue;
|
|
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
|
|
+ && !gimple_has_volatile_ops (stmt))
|
|
+ {
|
|
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
|
|
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
|
|
+ {
|
|
+ stmts.safe_push (stmt);
|
|
+ num++;
|
|
+ }
|
|
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
|
|
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
|
|
+ num++;
|
|
+ }
|
|
+ }
|
|
+ return num;
|
|
+}
|
|
+
|
|
+static bool
|
|
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
|
|
+{
|
|
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
|
|
+ {
|
|
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
|
|
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
|
|
+ if (TREE_CODE (op1) != TREE_CODE (op2))
|
|
+ continue;
|
|
+ if (TREE_CODE (op1) == ADDR_EXPR)
|
|
+ {
|
|
+ op1 = TREE_OPERAND (op1, 0);
|
|
+ op2 = TREE_OPERAND (op2, 0);
|
|
+ }
|
|
+ enum tree_code code = TREE_CODE (op1);
|
|
+ switch (code)
|
|
+ {
|
|
+ case VAR_DECL:
|
|
+ if (DECL_NAME (op1) == DECL_NAME (op2)
|
|
+ && DR_IS_READ ((*datarefs)[ui]))
|
|
+ return true;
|
|
+ break;
|
|
+ case SSA_NAME:
|
|
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
|
|
+ && DR_IS_READ ((*datarefs)[ui]))
|
|
+ return true;
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
|
|
+ Otherwise, set false to SUCCESS. */
|
|
+
|
|
+static void
|
|
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
|
|
+ stmt_vec_info stmt_info, bool &success)
|
|
+{
|
|
+ if (stmt_info == NULL)
|
|
+ {
|
|
+ success = false;
|
|
+ return;
|
|
+ }
|
|
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
|
|
+ {
|
|
+ success = false;
|
|
+ return;
|
|
+ }
|
|
+ unsigned ui = 0;
|
|
+ gimple *candidate = NULL;
|
|
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
|
|
+ {
|
|
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
|
|
+ continue;
|
|
+
|
|
+ if (candidate->bb != candidate->bb->loop_father->header)
|
|
+ {
|
|
+ success = false;
|
|
+ return;
|
|
+ }
|
|
+ auto_vec<data_reference_p> datarefs;
|
|
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
|
|
+ candidate->bb, &datarefs);
|
|
+ if (res == chrec_dont_know)
|
|
+ {
|
|
+ success = false;
|
|
+ return;
|
|
+ }
|
|
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
|
|
+ return;
|
|
+ }
|
|
+ success = false;
|
|
+}
|
|
+
|
|
+/* Deep first search from present BB. If succeedor has load STMTS,
|
|
+ stop further searching. */
|
|
+
|
|
+static void
|
|
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
|
|
+ bool &success, vec<basic_block> &visited_bbs)
|
|
+{
|
|
+ if (bb == cfun->cfg->x_exit_block_ptr)
|
|
+ {
|
|
+ success = false;
|
|
+ return;
|
|
+ }
|
|
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
|
|
+ return;
|
|
+
|
|
+ visited_bbs.safe_push (bb);
|
|
+ auto_vec<gimple *> stmts;
|
|
+ unsigned num = mem_refs_in_bb (bb, stmts);
|
|
+ /* Empty BB. */
|
|
+ if (num == 0)
|
|
+ {
|
|
+ edge e;
|
|
+ edge_iterator ei;
|
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
+ {
|
|
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
|
|
+ if (!success)
|
|
+ return;
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+ /* Non-empty BB. */
|
|
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
|
|
+}
|
|
+
|
|
+/* For grouped store, if all succeedors of present BB have vectorized load
|
|
+ from same base of store. If so, set memory_access_type using
|
|
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
|
|
+
|
|
+static bool
|
|
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
|
|
+{
|
|
+ gimple *stmt = stmt_vinfo->stmt;
|
|
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
|
+ return false;
|
|
+
|
|
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
|
|
+ return false;
|
|
+
|
|
+ basic_block bb = stmt->bb;
|
|
+ bool success = true;
|
|
+ auto_vec<basic_block> visited_bbs;
|
|
+ visited_bbs.safe_push (bb);
|
|
+ edge e;
|
|
+ edge_iterator ei;
|
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
|
|
+ return success;
|
|
+}
|
|
+
|
|
/* A subroutine of get_load_store_type, with a subset of the same
|
|
arguments. Handle the case where STMT_INFO is part of a grouped load
|
|
or store.
|
|
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
|
|
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
|
|
overrun_p = would_overrun_p;
|
|
}
|
|
+
|
|
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
|
|
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
|
|
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
|
|
+ loop_vinfo->vectorization_factor)
|
|
+ && conti_perm (stmt_info, loop_vinfo)
|
|
+ && (vls_type == VLS_LOAD
|
|
+ ? vect_grouped_load_supported (vectype, single_element_p,
|
|
+ group_size)
|
|
+ : vect_grouped_store_supported (vectype, group_size)))
|
|
+ {
|
|
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
|
|
+ overrun_p = would_overrun_p;
|
|
+ }
|
|
}
|
|
|
|
/* As a last resort, trying using a gather load or scatter store.
|
|
--
|
|
2.27.0.windows.1
|
|
|