221 lines
11 KiB
Diff
221 lines
11 KiB
Diff
From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001
|
|
From: liuhongt <hongtao.liu@intel.com>
|
|
Date: Thu, 10 Aug 2023 11:41:39 +0800
|
|
Subject: [PATCH 11/32] Software mitigation: Disable gather generation in
|
|
vectorization for GDS affected Intel Processors.
|
|
|
|
For more details of GDS (Gather Data Sampling), refer to
|
|
https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html
|
|
|
|
After microcode update, there's performance regression. To avoid that,
|
|
the patch disables gather generation in autovectorization but uses
|
|
gather scalar emulation instead.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/i386/i386-options.cc (m_GDS): New macro.
|
|
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't
|
|
enable for m_GDS.
|
|
(X86_TUNE_USE_GATHER_4PARTS): Ditto.
|
|
(X86_TUNE_USE_GATHER): Ditto.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/i386/avx2-gather-2.c: Adjust options to keep
|
|
gather vectorization.
|
|
* gcc.target/i386/avx2-gather-6.c: Ditto.
|
|
* gcc.target/i386/avx512f-pr88464-1.c: Ditto.
|
|
* gcc.target/i386/avx512f-pr88464-5.c: Ditto.
|
|
* gcc.target/i386/avx512vl-pr88464-1.c: Ditto.
|
|
* gcc.target/i386/avx512vl-pr88464-11.c: Ditto.
|
|
* gcc.target/i386/avx512vl-pr88464-3.c: Ditto.
|
|
* gcc.target/i386/avx512vl-pr88464-9.c: Ditto.
|
|
* gcc.target/i386/pr88531-1b.c: Ditto.
|
|
* gcc.target/i386/pr88531-1c.c: Ditto.
|
|
|
|
(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d)
|
|
---
|
|
gcc/config/i386/i386-options.cc | 5 +++++
|
|
gcc/config/i386/x86-tune.def | 9 ++++++---
|
|
gcc/testsuite/gcc.target/i386/avx2-gather-2.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx2-gather-6.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/pr88531-1b.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/pr88531-1c.c | 2 +-
|
|
12 files changed, 21 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
|
index fb2ed942f..9617fc162 100644
|
|
--- a/gcc/config/i386/i386-options.cc
|
|
+++ b/gcc/config/i386/i386-options.cc
|
|
@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3. If not see
|
|
#define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
|
|
#define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
|
|
#define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
|
|
+/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828.
|
|
+ Software mitigation. */
|
|
+#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \
|
|
+ | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
|
|
+ | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
|
|
|
|
#define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
|
|
#define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
|
|
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
|
|
index e6b9e2125..4392709fc 100644
|
|
--- a/gcc/config/i386/x86-tune.def
|
|
+++ b/gcc/config/i386/x86-tune.def
|
|
@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
|
|
/* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
|
|
elements. */
|
|
DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
|
|
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
|
|
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
|
|
+ | m_GENERIC | m_GDS))
|
|
|
|
/* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
|
|
elements. */
|
|
@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
|
|
/* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
|
|
elements. */
|
|
DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
|
|
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
|
|
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
|
|
+ | m_GENERIC | m_GDS))
|
|
|
|
/* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
|
|
elements. */
|
|
@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
|
|
/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
|
|
elements. */
|
|
DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
|
|
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
|
|
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
|
|
+ | m_GENERIC | m_GDS))
|
|
|
|
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
|
|
elements. */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
|
|
index ad5ef7310..978924b0f 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
|
|
+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */
|
|
|
|
#include "avx2-gather-1.c"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
|
|
index b9119581a..067b251e3 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */
|
|
+/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=haswell" } */
|
|
|
|
#include "avx2-gather-5.c"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
|
|
index 06d21bb01..d1a229861 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
|
|
@@ -1,6 +1,6 @@
|
|
/* PR tree-optimization/88464 */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
|
|
index 462e951fd..d7b0b2b28 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
|
|
@@ -1,6 +1,6 @@
|
|
/* PR tree-optimization/88464 */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
|
|
index 55a28dddb..07439185e 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
|
|
@@ -1,6 +1,6 @@
|
|
/* PR tree-optimization/88464 */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
|
|
index 969600885..3a9810827 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
|
|
@@ -1,6 +1,6 @@
|
|
/* PR tree-optimization/88464 */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
|
|
index 6b0c8a859..ac669e048 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
|
|
@@ -1,6 +1,6 @@
|
|
/* PR tree-optimization/88464 */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
|
|
index 3af568ab3..14a1083b6 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
|
|
@@ -1,6 +1,6 @@
|
|
/* PR tree-optimization/88464 */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
|
|
index 812c8a10f..e6df789de 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -march=skylake -mfpmath=sse" } */
|
|
+/* { dg-options "-O3 -march=skylake -mfpmath=sse -mtune=haswell" } */
|
|
|
|
#include "pr88531-1a.c"
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1c.c b/gcc/testsuite/gcc.target/i386/pr88531-1c.c
|
|
index 43fc5913e..a093c87c0 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/pr88531-1c.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr88531-1c.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse" } */
|
|
+/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse -mtune=haswell" } */
|
|
|
|
#include "pr88531-1a.c"
|
|
|
|
--
|
|
2.28.0.windows.1
|
|
|