152 lines
4.5 KiB
Diff
152 lines
4.5 KiB
Diff
From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001
|
|
From: liuhongt <hongtao.liu@intel.com>
|
|
Date: Thu, 7 Dec 2023 09:17:27 +0800
|
|
Subject: [PATCH 16/32] Don't assume it's AVX_U128_CLEAN after call_insn whose
|
|
abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
|
|
|
|
If the function desn't clobber any sse registers or only clobber
|
|
128-bit part, then vzeroupper isn't issued before the function exit.
|
|
the status not CLEAN but ANY after the function.
|
|
|
|
Also for sibling_call, it's safe to issue an vzeroupper. Also there
|
|
could be missing vzeroupper since there's no mode_exit for
|
|
sibling_call_p.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
PR target/112891
|
|
* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
|
|
AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
|
|
align with ix86_avx_u128_mode_needed.
|
|
(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
|
|
sibling_call.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/i386/pr112891.c: New test.
|
|
* gcc.target/i386/pr112891-2.c: New test.
|
|
|
|
(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)
|
|
---
|
|
gcc/config/i386/i386.cc | 22 +++++++++++++---
|
|
gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++
|
|
gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++
|
|
3 files changed, 78 insertions(+), 3 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c
|
|
|
|
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
|
index e75d37023..60f3296b0 100644
|
|
--- a/gcc/config/i386/i386.cc
|
|
+++ b/gcc/config/i386/i386.cc
|
|
@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
|
|
modes wider than 256 bits. It's only safe to issue a
|
|
vzeroupper if all SSE registers are clobbered. */
|
|
const function_abi &abi = insn_callee_abi (insn);
|
|
- if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
|
|
- abi.mode_clobbers (V4DImode)))
|
|
+ /* Should be safe to issue an vzeroupper before sibling_call_p.
|
|
+ Also there not mode_exit for sibling_call, so there could be
|
|
+ missing vzeroupper for that. */
|
|
+ if (!(SIBLING_CALL_P (insn)
|
|
+ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
|
|
+ abi.mode_clobbers (V4DImode))))
|
|
return AVX_U128_ANY;
|
|
|
|
return AVX_U128_CLEAN;
|
|
@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
|
|
bool avx_upper_reg_found = false;
|
|
note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
|
|
|
|
- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
|
|
+ if (avx_upper_reg_found)
|
|
+ return AVX_U128_DIRTY;
|
|
+
|
|
+ /* If the function desn't clobber any sse registers or only clobber
|
|
+ 128-bit part, Then vzeroupper isn't issued before the function exit.
|
|
+ the status not CLEAN but ANY after the function. */
|
|
+ const function_abi &abi = insn_callee_abi (insn);
|
|
+ if (!(SIBLING_CALL_P (insn)
|
|
+ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
|
|
+ abi.mode_clobbers (V4DImode))))
|
|
+ return AVX_U128_ANY;
|
|
+
|
|
+ return AVX_U128_CLEAN;
|
|
}
|
|
|
|
/* Otherwise, return current mode. Remember that if insn
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
|
|
new file mode 100644
|
|
index 000000000..164c3985d
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
|
|
@@ -0,0 +1,30 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-mavx2 -O3" } */
|
|
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
|
|
+
|
|
+void
|
|
+__attribute__((noinline))
|
|
+bar (double* a)
|
|
+{
|
|
+ a[0] = 1.0;
|
|
+ a[1] = 2.0;
|
|
+}
|
|
+
|
|
+double
|
|
+__attribute__((noinline))
|
|
+foo (double* __restrict a, double* b)
|
|
+{
|
|
+ a[0] += b[0];
|
|
+ a[1] += b[1];
|
|
+ a[2] += b[2];
|
|
+ a[3] += b[3];
|
|
+ bar (b);
|
|
+ return a[5] + b[5];
|
|
+}
|
|
+
|
|
+double
|
|
+foo1 (double* __restrict a, double* b)
|
|
+{
|
|
+ double c = foo (a, b);
|
|
+ return __builtin_exp (c);
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
|
|
new file mode 100644
|
|
index 000000000..dbf6c6794
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
|
|
@@ -0,0 +1,29 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-mavx2 -O3" } */
|
|
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
|
|
+
|
|
+void
|
|
+__attribute__((noinline))
|
|
+bar (double* a)
|
|
+{
|
|
+ a[0] = 1.0;
|
|
+ a[1] = 2.0;
|
|
+}
|
|
+
|
|
+void
|
|
+__attribute__((noinline))
|
|
+foo (double* __restrict a, double* b)
|
|
+{
|
|
+ a[0] += b[0];
|
|
+ a[1] += b[1];
|
|
+ a[2] += b[2];
|
|
+ a[3] += b[3];
|
|
+ bar (b);
|
|
+}
|
|
+
|
|
+double
|
|
+foo1 (double* __restrict a, double* b)
|
|
+{
|
|
+ foo (a, b);
|
|
+ return __builtin_exp (b[1]);
|
|
+}
|
|
--
|
|
2.28.0.windows.1
|
|
|