139 lines
5.2 KiB
Diff
139 lines
5.2 KiB
Diff
From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
|
|
From: liuhongt <hongtao.liu@intel.com>
|
|
Date: Mon, 26 Jun 2023 09:50:25 +0800
|
|
Subject: [PATCH 05/32] Make option mvzeroupper independent of optimization
|
|
level.
|
|
|
|
pass_insert_vzeroupper is under condition
|
|
|
|
TARGET_AVX && TARGET_VZEROUPPER
|
|
&& flag_expensive_optimizations && !optimize_size
|
|
|
|
But the document of mvzeroupper doesn't mention the insertion
|
|
required -O2 and above, it may confuse users when they explicitly
|
|
use -Os -mvzeroupper.
|
|
|
|
------------
|
|
mvzeroupper
|
|
Target Mask(VZEROUPPER) Save
|
|
Generate vzeroupper instruction before a transfer of control flow out of
|
|
the function.
|
|
------------
|
|
|
|
The patch moves flag_expensive_optimizations && !optimize_size to
|
|
ix86_option_override_internal. It makes -mvzeroupper independent of
|
|
optimization level, but still keeps the behavior of architecture
|
|
tuning(emit_vzeroupper) unchanged.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
|
|
Move flag_expensive_optimizations && !optimize_size to ..
|
|
* config/i386/i386-options.cc (ix86_option_override_internal):
|
|
.. this, it makes -mvzeroupper independent of optimization
|
|
level, but still keeps the behavior of architecture
|
|
tuning(emit_vzeroupper) unchanged.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/i386/avx-vzeroupper-29.c: New testcase.
|
|
* gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
|
|
* gcc.target/i386/avx-vzeroupper-7.c: Ditto.
|
|
* gcc.target/i386/avx-vzeroupper-9.c: Ditto.
|
|
---
|
|
gcc/config/i386/i386-features.cc | 3 +--
|
|
gcc/config/i386/i386-options.cc | 4 +++-
|
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++-
|
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
|
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++-
|
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++-
|
|
6 files changed, 24 insertions(+), 6 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
|
|
|
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
|
|
index 6fe41c3c2..6a2444eb6 100644
|
|
--- a/gcc/config/i386/i386-features.cc
|
|
+++ b/gcc/config/i386/i386-features.cc
|
|
@@ -1875,8 +1875,7 @@ public:
|
|
/* opt_pass methods: */
|
|
virtual bool gate (function *)
|
|
{
|
|
- return TARGET_AVX && TARGET_VZEROUPPER
|
|
- && flag_expensive_optimizations && !optimize_size;
|
|
+ return TARGET_AVX && TARGET_VZEROUPPER;
|
|
}
|
|
|
|
virtual unsigned int execute (function *)
|
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
|
index ff44ad4e0..74e969b68 100644
|
|
--- a/gcc/config/i386/i386-options.cc
|
|
+++ b/gcc/config/i386/i386-options.cc
|
|
@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
|
|
sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
|
|
|
|
if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
|
|
- && TARGET_EMIT_VZEROUPPER)
|
|
+ && TARGET_EMIT_VZEROUPPER
|
|
+ && flag_expensive_optimizations
|
|
+ && !optimize_size)
|
|
opts->x_target_flags |= MASK_VZEROUPPER;
|
|
if (!(opts_set->x_target_flags & MASK_STV))
|
|
opts->x_target_flags |= MASK_STV;
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
|
index e694d4048..5a40e8783 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
|
@@ -16,5 +16,6 @@ foo ()
|
|
_mm256_zeroupper ();
|
|
}
|
|
|
|
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
|
|
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
|
new file mode 100644
|
|
index 000000000..4af637757
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
|
@@ -0,0 +1,14 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
|
|
+
|
|
+#include <immintrin.h>
|
|
+
|
|
+extern __m256 x, y;
|
|
+
|
|
+void
|
|
+foo ()
|
|
+{
|
|
+ x = y;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
|
index ab6d68779..75fe58897 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
|
@@ -12,4 +12,5 @@ foo ()
|
|
_mm256_zeroupper ();
|
|
}
|
|
|
|
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
|
index 974e1626a..fa0a6dfca 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
|
@@ -15,4 +15,5 @@ foo ()
|
|
_mm256_zeroupper ();
|
|
}
|
|
|
|
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
|
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
|
|
--
|
|
2.28.0.windows.1
|
|
|