188 lines
7.1 KiB
Diff
188 lines
7.1 KiB
Diff
|
|
From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001
|
||
|
|
From: liuhongt <hongtao.liu@intel.com>
|
||
|
|
Date: Thu, 10 Aug 2023 16:26:13 +0800
|
||
|
|
Subject: [PATCH 12/32] Support -m[no-]gather -m[no-]scatter to enable/disable
|
||
|
|
vectorization for all gather/scatter instructions
|
||
|
|
|
||
|
|
Rename original use_gather to use_gather_8parts, Support
|
||
|
|
-mtune-ctrl={,^}use_gather to set/clear tune features
|
||
|
|
use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
|
||
|
|
as alias of -mtune-ctrl=, use_gather, ^use_gather.
|
||
|
|
|
||
|
|
Similar for use_scatter.
|
||
|
|
|
||
|
|
gcc/ChangeLog:
|
||
|
|
|
||
|
|
* config/i386/i386-builtins.cc
|
||
|
|
(ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
|
||
|
|
* config/i386/i386-options.cc (parse_mtune_ctrl_str):
|
||
|
|
Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
|
||
|
|
8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
|
||
|
|
* config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
|
||
|
|
for use_scatter_8parts
|
||
|
|
* config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
|
||
|
|
(TARGET_USE_GATHER_8PARTS): .. this.
|
||
|
|
(TARGET_USE_SCATTER): Rename to ..
|
||
|
|
(TARGET_USE_SCATTER_8PARTS): .. this.
|
||
|
|
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
|
||
|
|
(X86_TUNE_USE_GATHER_8PARTS): .. this.
|
||
|
|
(X86_TUNE_USE_SCATTER): Rename to
|
||
|
|
(X86_TUNE_USE_SCATTER_8PARTS): .. this.
|
||
|
|
* config/i386/i386.opt: Add new options mgather, mscatter.
|
||
|
|
|
||
|
|
(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737)
|
||
|
|
---
|
||
|
|
gcc/config/i386/i386-builtins.cc | 2 +-
|
||
|
|
gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++---------
|
||
|
|
gcc/config/i386/i386.cc | 2 +-
|
||
|
|
gcc/config/i386/i386.h | 8 ++---
|
||
|
|
gcc/config/i386/i386.opt | 4 +++
|
||
|
|
gcc/config/i386/x86-tune.def | 4 +--
|
||
|
|
6 files changed, 52 insertions(+), 22 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
|
||
|
|
index 050c6228a..8ed32e14f 100644
|
||
|
|
--- a/gcc/config/i386/i386-builtins.cc
|
||
|
|
+++ b/gcc/config/i386/i386-builtins.cc
|
||
|
|
@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
|
||
|
|
? !TARGET_USE_GATHER_2PARTS
|
||
|
|
: (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
|
||
|
|
? !TARGET_USE_GATHER_4PARTS
|
||
|
|
- : !TARGET_USE_GATHER)))
|
||
|
|
+ : !TARGET_USE_GATHER_8PARTS)))
|
||
|
|
return NULL_TREE;
|
||
|
|
|
||
|
|
if ((TREE_CODE (index_type) != INTEGER_TYPE
|
||
|
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||
|
|
index 9617fc162..3df1f0c41 100644
|
||
|
|
--- a/gcc/config/i386/i386-options.cc
|
||
|
|
+++ b/gcc/config/i386/i386-options.cc
|
||
|
|
@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
|
||
|
|
curr_feature_string++;
|
||
|
|
clear = true;
|
||
|
|
}
|
||
|
|
- for (i = 0; i < X86_TUNE_LAST; i++)
|
||
|
|
- {
|
||
|
|
- if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
|
||
|
|
- {
|
||
|
|
- ix86_tune_features[i] = !clear;
|
||
|
|
- if (dump)
|
||
|
|
- fprintf (stderr, "Explicitly %s feature %s\n",
|
||
|
|
- clear ? "clear" : "set", ix86_tune_feature_names[i]);
|
||
|
|
- break;
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- if (i == X86_TUNE_LAST)
|
||
|
|
- error ("unknown parameter to option %<-mtune-ctrl%>: %s",
|
||
|
|
- clear ? curr_feature_string - 1 : curr_feature_string);
|
||
|
|
+
|
||
|
|
+ if (!strcmp (curr_feature_string, "use_gather"))
|
||
|
|
+ {
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
|
||
|
|
+ if (dump)
|
||
|
|
+ fprintf (stderr, "Explicitly %s features use_gather_2parts,"
|
||
|
|
+ " use_gather_4parts, use_gather_8parts\n",
|
||
|
|
+ clear ? "clear" : "set");
|
||
|
|
+
|
||
|
|
+ }
|
||
|
|
+ else if (!strcmp (curr_feature_string, "use_scatter"))
|
||
|
|
+ {
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
|
||
|
|
+ if (dump)
|
||
|
|
+ fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
|
||
|
|
+ " use_scatter_4parts, use_scatter_8parts\n",
|
||
|
|
+ clear ? "clear" : "set");
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ for (i = 0; i < X86_TUNE_LAST; i++)
|
||
|
|
+ {
|
||
|
|
+ if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
|
||
|
|
+ {
|
||
|
|
+ ix86_tune_features[i] = !clear;
|
||
|
|
+ if (dump)
|
||
|
|
+ fprintf (stderr, "Explicitly %s feature %s\n",
|
||
|
|
+ clear ? "clear" : "set", ix86_tune_feature_names[i]);
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (i == X86_TUNE_LAST)
|
||
|
|
+ error ("unknown parameter to option %<-mtune-ctrl%>: %s",
|
||
|
|
+ clear ? curr_feature_string - 1 : curr_feature_string);
|
||
|
|
+ }
|
||
|
|
curr_feature_string = next_feature_string;
|
||
|
|
}
|
||
|
|
while (curr_feature_string);
|
||
|
|
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||
|
|
index 479fc6010..e75d37023 100644
|
||
|
|
--- a/gcc/config/i386/i386.cc
|
||
|
|
+++ b/gcc/config/i386/i386.cc
|
||
|
|
@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
|
||
|
|
? !TARGET_USE_SCATTER_2PARTS
|
||
|
|
: (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
|
||
|
|
? !TARGET_USE_SCATTER_4PARTS
|
||
|
|
- : !TARGET_USE_SCATTER))
|
||
|
|
+ : !TARGET_USE_SCATTER_8PARTS))
|
||
|
|
return NULL_TREE;
|
||
|
|
|
||
|
|
if ((TREE_CODE (index_type) != INTEGER_TYPE
|
||
|
|
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||
|
|
index 688aaabd3..aaa136ba0 100644
|
||
|
|
--- a/gcc/config/i386/i386.h
|
||
|
|
+++ b/gcc/config/i386/i386.h
|
||
|
|
@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
||
|
|
ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
|
||
|
|
#define TARGET_USE_SCATTER_4PARTS \
|
||
|
|
ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
|
||
|
|
-#define TARGET_USE_GATHER \
|
||
|
|
- ix86_tune_features[X86_TUNE_USE_GATHER]
|
||
|
|
-#define TARGET_USE_SCATTER \
|
||
|
|
- ix86_tune_features[X86_TUNE_USE_SCATTER]
|
||
|
|
+#define TARGET_USE_GATHER_8PARTS \
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
|
||
|
|
+#define TARGET_USE_SCATTER_8PARTS \
|
||
|
|
+ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
|
||
|
|
#define TARGET_FUSE_CMP_AND_BRANCH_32 \
|
||
|
|
ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
|
||
|
|
#define TARGET_FUSE_CMP_AND_BRANCH_64 \
|
||
|
|
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||
|
|
index 498fb454d..b154110d8 100644
|
||
|
|
--- a/gcc/config/i386/i386.opt
|
||
|
|
+++ b/gcc/config/i386/i386.opt
|
||
|
|
@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated.
|
||
|
|
munroll-only-small-loops
|
||
|
|
Target Var(ix86_unroll_only_small_loops) Init(0) Save
|
||
|
|
Enable conservative small loop unrolling.
|
||
|
|
+
|
||
|
|
+mscatter
|
||
|
|
+Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
|
||
|
|
+Enable vectorization for scatter instruction.
|
||
|
|
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
|
||
|
|
index 4392709fc..bdb455d20 100644
|
||
|
|
--- a/gcc/config/i386/x86-tune.def
|
||
|
|
+++ b/gcc/config/i386/x86-tune.def
|
||
|
|
@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
|
||
|
|
|
||
|
|
/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
|
||
|
|
elements. */
|
||
|
|
-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
|
||
|
|
+DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
|
||
|
|
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
|
||
|
|
| m_GENERIC | m_GDS))
|
||
|
|
|
||
|
|
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
|
||
|
|
elements. */
|
||
|
|
-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
|
||
|
|
+DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
|
||
|
|
~(m_ZNVER4))
|
||
|
|
|
||
|
|
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
|
||
|
|
--
|
||
|
|
2.28.0.windows.1
|
||
|
|
|