[Sync] Sync patch from openeuler/gcc
This commit is contained in:
parent
e487f0b8ff
commit
25bccb60b0
135
0058-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
Normal file
135
0058-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
From 1649f9fbbc5267de2a675336d3ac665528a03db8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: liuhongt <hongtao.liu@intel.com>
|
||||||
|
Date: Wed, 10 May 2023 15:16:58 +0800
|
||||||
|
Subject: [PATCH 03/32] x86: Add a new option -mdaz-ftz to enable FTZ and DAZ
|
||||||
|
flags in MXCSR.
|
||||||
|
|
||||||
|
if (mdaz-ftz)
|
||||||
|
link crtfastmath.o
|
||||||
|
else if ((Ofast || ffast-math || funsafe-math-optimizations)
|
||||||
|
&& !mno-daz-ftz)
|
||||||
|
link crtfastmath.o
|
||||||
|
else
|
||||||
|
Don't link crtfastmath.o
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/i386/cygwin.h (ENDFILE_SPEC): Link crtfastmath.o
|
||||||
|
whenever -mdaz-ftz is specified. Don't link crtfastmath.o
|
||||||
|
when -mno-daz-ftz is specified.
|
||||||
|
* config/i386/darwin.h (ENDFILE_SPEC): Ditto.
|
||||||
|
* config/i386/gnu-user-common.h
|
||||||
|
(GNU_USER_TARGET_MATHFILE_SPEC): Ditto.
|
||||||
|
* config/i386/mingw32.h (ENDFILE_SPEC): Ditto.
|
||||||
|
* config/i386/i386.opt (mdaz-ftz): New option.
|
||||||
|
* doc/invoke.texi (x86 options): Document mftz-daz.
|
||||||
|
---
|
||||||
|
gcc/config/i386/cygwin.h | 2 +-
|
||||||
|
gcc/config/i386/darwin.h | 4 ++--
|
||||||
|
gcc/config/i386/gnu-user-common.h | 2 +-
|
||||||
|
gcc/config/i386/i386.opt | 4 ++++
|
||||||
|
gcc/config/i386/mingw32.h | 2 +-
|
||||||
|
gcc/doc/invoke.texi | 11 ++++++++++-
|
||||||
|
6 files changed, 19 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
|
||||||
|
index d06eda369..5412c5d44 100644
|
||||||
|
--- a/gcc/config/i386/cygwin.h
|
||||||
|
+++ b/gcc/config/i386/cygwin.h
|
||||||
|
@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
#undef ENDFILE_SPEC
|
||||||
|
#define ENDFILE_SPEC \
|
||||||
|
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
|
||||||
|
+ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||||
|
%{!shared:%:if-exists(default-manifest.o%s)}\
|
||||||
|
%{fvtable-verify=none:%s; \
|
||||||
|
fvtable-verify=preinit:vtv_end.o%s; \
|
||||||
|
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
|
||||||
|
index a55f6b2b8..2f773924d 100644
|
||||||
|
--- a/gcc/config/i386/darwin.h
|
||||||
|
+++ b/gcc/config/i386/darwin.h
|
||||||
|
@@ -109,8 +109,8 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
"%{!force_cpusubtype_ALL:-force_cpusubtype_ALL} "
|
||||||
|
|
||||||
|
#undef ENDFILE_SPEC
|
||||||
|
-#define ENDFILE_SPEC \
|
||||||
|
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
|
||||||
|
+#define ENDFILE_SPEC
|
||||||
|
+\ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||||
|
%{mpc32:crtprec32.o%s} \
|
||||||
|
%{mpc64:crtprec64.o%s} \
|
||||||
|
%{mpc80:crtprec80.o%s}" TM_DESTRUCTOR
|
||||||
|
diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h
|
||||||
|
index 23b54c5be..3d2a33f17 100644
|
||||||
|
--- a/gcc/config/i386/gnu-user-common.h
|
||||||
|
+++ b/gcc/config/i386/gnu-user-common.h
|
||||||
|
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
/* Similar to standard GNU userspace, but adding -ffast-math support. */
|
||||||
|
#define GNU_USER_TARGET_MATHFILE_SPEC \
|
||||||
|
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
|
||||||
|
+ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||||
|
%{mpc32:crtprec32.o%s} \
|
||||||
|
%{mpc64:crtprec64.o%s} \
|
||||||
|
%{mpc80:crtprec80.o%s}"
|
||||||
|
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||||||
|
index fc1b944ac..498fb454d 100644
|
||||||
|
--- a/gcc/config/i386/i386.opt
|
||||||
|
+++ b/gcc/config/i386/i386.opt
|
||||||
|
@@ -420,6 +420,10 @@ mpc80
|
||||||
|
Target RejectNegative
|
||||||
|
Set 80387 floating-point precision to 80-bit.
|
||||||
|
|
||||||
|
+mdaz-ftz
|
||||||
|
+Target
|
||||||
|
+Set the FTZ and DAZ Flags.
|
||||||
|
+
|
||||||
|
mpreferred-stack-boundary=
|
||||||
|
Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg)
|
||||||
|
Attempt to keep stack aligned to this power of 2.
|
||||||
|
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
|
||||||
|
index d3ca0cd02..ddbe6a405 100644
|
||||||
|
--- a/gcc/config/i386/mingw32.h
|
||||||
|
+++ b/gcc/config/i386/mingw32.h
|
||||||
|
@@ -197,7 +197,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
#undef ENDFILE_SPEC
|
||||||
|
#define ENDFILE_SPEC \
|
||||||
|
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
|
||||||
|
+ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||||
|
%{!shared:%:if-exists(default-manifest.o%s)}\
|
||||||
|
%{fvtable-verify=none:%s; \
|
||||||
|
fvtable-verify=preinit:vtv_end.o%s; \
|
||||||
|
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||||
|
index 2b376e0e9..3a48655e5 100644
|
||||||
|
--- a/gcc/doc/invoke.texi
|
||||||
|
+++ b/gcc/doc/invoke.texi
|
||||||
|
@@ -1437,7 +1437,7 @@ See RS/6000 and PowerPC Options.
|
||||||
|
-m96bit-long-double -mlong-double-64 -mlong-double-80 -mlong-double-128 @gol
|
||||||
|
-mregparm=@var{num} -msseregparm @gol
|
||||||
|
-mveclibabi=@var{type} -mvect8-ret-in-mem @gol
|
||||||
|
--mpc32 -mpc64 -mpc80 -mstackrealign @gol
|
||||||
|
+-mpc32 -mpc64 -mpc80 -mdaz-ftz -mstackrealign @gol
|
||||||
|
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
|
||||||
|
-mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol
|
||||||
|
-m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol
|
||||||
|
@@ -32122,6 +32122,15 @@ are enabled by default; routines in such libraries could suffer significant
|
||||||
|
loss of accuracy, typically through so-called ``catastrophic cancellation'',
|
||||||
|
when this option is used to set the precision to less than extended precision.
|
||||||
|
|
||||||
|
+@item -mdaz-ftz
|
||||||
|
+@opindex mdaz-ftz
|
||||||
|
+
|
||||||
|
+The flush-to-zero (FTZ) and denormals-are-zero (DAZ) flags in the MXCSR register
|
||||||
|
+are used to control floating-point calculations.SSE and AVX instructions
|
||||||
|
+including scalar and vector instructions could benefit from enabling the FTZ
|
||||||
|
+and DAZ flags when @option{-mdaz-ftz} is specified. Don't set FTZ/DAZ flags
|
||||||
|
+when @option{-mno-daz-ftz} is specified.
|
||||||
|
+
|
||||||
|
@item -mstackrealign
|
||||||
|
@opindex mstackrealign
|
||||||
|
Realign the stack at entry. On the x86, the @option{-mstackrealign}
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
@ -0,0 +1,65 @@
|
|||||||
|
From e70fa730dcfcb3a7b1d56a2e166752d4299f0504 Mon Sep 17 00:00:00 2001
|
||||||
|
From: liuhongt <hongtao.liu@intel.com>
|
||||||
|
Date: Mon, 5 Jun 2023 12:38:41 +0800
|
||||||
|
Subject: [PATCH 04/32] Explicitly view_convert_expr mask to signed type when
|
||||||
|
folding pblendvb builtins.
|
||||||
|
|
||||||
|
Since mask < 0 will be always false for vector char when
|
||||||
|
-funsigned-char, but vpblendvb needs to check the most significant
|
||||||
|
bit. The patch explicitly VCE to vector signed char.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/110108
|
||||||
|
* config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly
|
||||||
|
view_convert_expr mask to signed type when folding pblendvb
|
||||||
|
builtins.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/i386/pr110108-2.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/i386/i386.cc | 4 +++-
|
||||||
|
gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
|
||||||
|
2 files changed, 17 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||||
|
index 462dce10e..479fc6010 100644
|
||||||
|
--- a/gcc/config/i386/i386.cc
|
||||||
|
+++ b/gcc/config/i386/i386.cc
|
||||||
|
@@ -18396,8 +18396,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
||||||
|
tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
|
||||||
|
? intSI_type_node : intDI_type_node;
|
||||||
|
type = get_same_sized_vectype (itype, type);
|
||||||
|
- arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
|
||||||
|
}
|
||||||
|
+ else
|
||||||
|
+ type = signed_type_for (type);
|
||||||
|
+ arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
|
||||||
|
tree zero_vec = build_zero_cst (type);
|
||||||
|
tree cmp_type = truth_type_for (type);
|
||||||
|
tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..2d1d2fd49
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mavx2 -O2 -funsigned-char" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
|
||||||
|
+
|
||||||
|
+#include <immintrin.h>
|
||||||
|
+__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
|
||||||
|
+ __m128i Result = _mm_blendv_epi8(X0, X1, X2);
|
||||||
|
+ return Result;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
|
||||||
|
+ __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
|
||||||
|
+ return Result;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
138
0060-Make-option-mvzeroupper-independent-of-optimization-.patch
Normal file
138
0060-Make-option-mvzeroupper-independent-of-optimization-.patch
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
|
||||||
|
From: liuhongt <hongtao.liu@intel.com>
|
||||||
|
Date: Mon, 26 Jun 2023 09:50:25 +0800
|
||||||
|
Subject: [PATCH 05/32] Make option mvzeroupper independent of optimization
|
||||||
|
level.
|
||||||
|
|
||||||
|
pass_insert_vzeroupper is under condition
|
||||||
|
|
||||||
|
TARGET_AVX && TARGET_VZEROUPPER
|
||||||
|
&& flag_expensive_optimizations && !optimize_size
|
||||||
|
|
||||||
|
But the document of mvzeroupper doesn't mention the insertion
|
||||||
|
required -O2 and above, it may confuse users when they explicitly
|
||||||
|
use -Os -mvzeroupper.
|
||||||
|
|
||||||
|
------------
|
||||||
|
mvzeroupper
|
||||||
|
Target Mask(VZEROUPPER) Save
|
||||||
|
Generate vzeroupper instruction before a transfer of control flow out of
|
||||||
|
the function.
|
||||||
|
------------
|
||||||
|
|
||||||
|
The patch moves flag_expensive_optimizations && !optimize_size to
|
||||||
|
ix86_option_override_internal. It makes -mvzeroupper independent of
|
||||||
|
optimization level, but still keeps the behavior of architecture
|
||||||
|
tuning(emit_vzeroupper) unchanged.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
|
||||||
|
Move flag_expensive_optimizations && !optimize_size to ..
|
||||||
|
* config/i386/i386-options.cc (ix86_option_override_internal):
|
||||||
|
.. this, it makes -mvzeroupper independent of optimization
|
||||||
|
level, but still keeps the behavior of architecture
|
||||||
|
tuning(emit_vzeroupper) unchanged.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/i386/avx-vzeroupper-29.c: New testcase.
|
||||||
|
* gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
|
||||||
|
* gcc.target/i386/avx-vzeroupper-7.c: Ditto.
|
||||||
|
* gcc.target/i386/avx-vzeroupper-9.c: Ditto.
|
||||||
|
---
|
||||||
|
gcc/config/i386/i386-features.cc | 3 +--
|
||||||
|
gcc/config/i386/i386-options.cc | 4 +++-
|
||||||
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++-
|
||||||
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
|
||||||
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++-
|
||||||
|
gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++-
|
||||||
|
6 files changed, 24 insertions(+), 6 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
|
||||||
|
index 6fe41c3c2..6a2444eb6 100644
|
||||||
|
--- a/gcc/config/i386/i386-features.cc
|
||||||
|
+++ b/gcc/config/i386/i386-features.cc
|
||||||
|
@@ -1875,8 +1875,7 @@ public:
|
||||||
|
/* opt_pass methods: */
|
||||||
|
virtual bool gate (function *)
|
||||||
|
{
|
||||||
|
- return TARGET_AVX && TARGET_VZEROUPPER
|
||||||
|
- && flag_expensive_optimizations && !optimize_size;
|
||||||
|
+ return TARGET_AVX && TARGET_VZEROUPPER;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual unsigned int execute (function *)
|
||||||
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||||
|
index ff44ad4e0..74e969b68 100644
|
||||||
|
--- a/gcc/config/i386/i386-options.cc
|
||||||
|
+++ b/gcc/config/i386/i386-options.cc
|
||||||
|
@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
|
||||||
|
sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
|
||||||
|
|
||||||
|
if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
|
||||||
|
- && TARGET_EMIT_VZEROUPPER)
|
||||||
|
+ && TARGET_EMIT_VZEROUPPER
|
||||||
|
+ && flag_expensive_optimizations
|
||||||
|
+ && !optimize_size)
|
||||||
|
opts->x_target_flags |= MASK_VZEROUPPER;
|
||||||
|
if (!(opts_set->x_target_flags & MASK_STV))
|
||||||
|
opts->x_target_flags |= MASK_STV;
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
||||||
|
index e694d4048..5a40e8783 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
||||||
|
@@ -16,5 +16,6 @@ foo ()
|
||||||
|
_mm256_zeroupper ();
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..4af637757
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
|
||||||
|
+
|
||||||
|
+#include <immintrin.h>
|
||||||
|
+
|
||||||
|
+extern __m256 x, y;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo ()
|
||||||
|
+{
|
||||||
|
+ x = y;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
||||||
|
index ab6d68779..75fe58897 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
||||||
|
@@ -12,4 +12,5 @@ foo ()
|
||||||
|
_mm256_zeroupper ();
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
||||||
|
index 974e1626a..fa0a6dfca 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
||||||
|
@@ -15,4 +15,5 @@ foo ()
|
||||||
|
_mm256_zeroupper ();
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
@ -0,0 +1,68 @@
|
|||||||
|
From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Hongyu Wang <hongyu.wang@intel.com>
|
||||||
|
Date: Sun, 25 Jun 2023 09:50:21 +0800
|
||||||
|
Subject: [PATCH 06/32] i386: Sync tune_string with arch_string for target
|
||||||
|
attribute
|
||||||
|
|
||||||
|
arch=*
|
||||||
|
|
||||||
|
For function with target attribute arch=*, current logic will set its
|
||||||
|
tune to -mtune from command line so all target_clones will get same
|
||||||
|
tuning flags which would affect the performance for each clone. Override
|
||||||
|
tune with arch if tune was not explicitly specified to get proper tuning
|
||||||
|
flags for target_clones.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/i386/i386-options.cc (ix86_valid_target_attribute_tree):
|
||||||
|
Override tune_string with arch_string if tune_string is not
|
||||||
|
explicitly specified.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/i386/mvc17.c: New test.
|
||||||
|
|
||||||
|
(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8)
|
||||||
|
---
|
||||||
|
gcc/config/i386/i386-options.cc | 6 +++++-
|
||||||
|
gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++
|
||||||
|
2 files changed, 16 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||||
|
index 74e969b68..fb2ed942f 100644
|
||||||
|
--- a/gcc/config/i386/i386-options.cc
|
||||||
|
+++ b/gcc/config/i386/i386-options.cc
|
||||||
|
@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
|
||||||
|
if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
|
||||||
|
opts->x_ix86_tune_string
|
||||||
|
= ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
|
||||||
|
- else if (orig_tune_defaulted)
|
||||||
|
+ /* If we have explicit arch string and no tune string specified, set
|
||||||
|
+ tune_string to NULL and later it will be overriden by arch_string
|
||||||
|
+ so target clones can get proper optimization. */
|
||||||
|
+ else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
|
||||||
|
+ || orig_tune_defaulted)
|
||||||
|
opts->x_ix86_tune_string = NULL;
|
||||||
|
|
||||||
|
/* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..8b83c1aec
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/mvc17.c
|
||||||
|
@@ -0,0 +1,11 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-require-ifunc "" } */
|
||||||
|
+/* { dg-options "-O2 -march=x86-64" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "rep mov" 1 } } */
|
||||||
|
+
|
||||||
|
+__attribute__((target_clones("default","arch=icelake-server")))
|
||||||
|
+void
|
||||||
|
+foo (char *a, char *b, int size)
|
||||||
|
+{
|
||||||
|
+ __builtin_memcpy (a, b, size & 0x7F);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
111
0062-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
Normal file
111
0062-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001
|
||||||
|
From: liuhongt <hongtao.liu@intel.com>
|
||||||
|
Date: Tue, 20 Jun 2023 15:41:00 +0800
|
||||||
|
Subject: [PATCH 07/32] Refine maskloadmn pattern with UNSPEC_MASKLOAD.
|
||||||
|
|
||||||
|
If mem_addr points to a memory region with less than whole vector size
|
||||||
|
bytes of accessible memory and k is a mask that would prevent reading
|
||||||
|
the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
|
||||||
|
it to be transformed to vpblendd.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/110309
|
||||||
|
* config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
|
||||||
|
Refine pattern with UNSPEC_MASKLOAD.
|
||||||
|
(maskload<mode><avx512fmaskmodelower>): Ditto.
|
||||||
|
(*<avx512>_load<mode>_mask): Extend mode iterator to
|
||||||
|
VI12HF_AVX512VL.
|
||||||
|
(*<avx512>_load<mode>): Ditto.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/i386/pr110309.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/i386/sse.md | 32 +++++++++++++-----------
|
||||||
|
gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
|
||||||
|
2 files changed, 28 insertions(+), 14 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||||
|
index eb767e56c..b30e96cb1 100644
|
||||||
|
--- a/gcc/config/i386/sse.md
|
||||||
|
+++ b/gcc/config/i386/sse.md
|
||||||
|
@@ -1411,12 +1411,12 @@
|
||||||
|
})
|
||||||
|
|
||||||
|
(define_insn "*<avx512>_load<mode>_mask"
|
||||||
|
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||||||
|
- (vec_merge:VI12_AVX512VL
|
||||||
|
- (unspec:VI12_AVX512VL
|
||||||
|
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
|
||||||
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
|
||||||
|
+ (vec_merge:VI12HF_AVX512VL
|
||||||
|
+ (unspec:VI12HF_AVX512VL
|
||||||
|
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
|
||||||
|
UNSPEC_MASKLOAD)
|
||||||
|
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
|
||||||
|
+ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C")
|
||||||
|
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
|
||||||
|
"TARGET_AVX512BW"
|
||||||
|
"vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||||
|
@@ -1425,9 +1425,9 @@
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
|
(define_insn_and_split "*<avx512>_load<mode>"
|
||||||
|
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||||||
|
- (unspec:VI12_AVX512VL
|
||||||
|
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
|
||||||
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
|
||||||
|
+ (unspec:VI12HF_AVX512VL
|
||||||
|
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
|
||||||
|
UNSPEC_MASKLOAD))]
|
||||||
|
"TARGET_AVX512BW"
|
||||||
|
"#"
|
||||||
|
@@ -25973,17 +25973,21 @@
|
||||||
|
"TARGET_AVX")
|
||||||
|
|
||||||
|
(define_expand "maskload<mode><avx512fmaskmodelower>"
|
||||||
|
- [(set (match_operand:V48H_AVX512VL 0 "register_operand")
|
||||||
|
- (vec_merge:V48H_AVX512VL
|
||||||
|
- (match_operand:V48H_AVX512VL 1 "memory_operand")
|
||||||
|
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
|
||||||
|
+ (vec_merge:V48_AVX512VL
|
||||||
|
+ (unspec:V48_AVX512VL
|
||||||
|
+ [(match_operand:V48_AVX512VL 1 "memory_operand")]
|
||||||
|
+ UNSPEC_MASKLOAD)
|
||||||
|
(match_dup 0)
|
||||||
|
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||||
|
"TARGET_AVX512F")
|
||||||
|
|
||||||
|
(define_expand "maskload<mode><avx512fmaskmodelower>"
|
||||||
|
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
|
||||||
|
- (vec_merge:VI12_AVX512VL
|
||||||
|
- (match_operand:VI12_AVX512VL 1 "memory_operand")
|
||||||
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
|
||||||
|
+ (vec_merge:VI12HF_AVX512VL
|
||||||
|
+ (unspec:VI12HF_AVX512VL
|
||||||
|
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand")]
|
||||||
|
+ UNSPEC_MASKLOAD)
|
||||||
|
(match_dup 0)
|
||||||
|
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||||
|
"TARGET_AVX512BW")
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..f6e9e9c3c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
|
||||||
|
@@ -0,0 +1,10 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
|
||||||
|
+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+void foo (int * __restrict a, int *b)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 6; ++i)
|
||||||
|
+ a[i] = b[i] + 42;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
126
0063-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
Normal file
126
0063-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001
|
||||||
|
From: liuhongt <hongtao.liu@intel.com>
|
||||||
|
Date: Mon, 26 Jun 2023 21:07:09 +0800
|
||||||
|
Subject: [PATCH 08/32] Refine maskstore patterns with UNSPEC_MASKMOV.
|
||||||
|
|
||||||
|
Similar like r14-2070-gc79476da46728e
|
||||||
|
|
||||||
|
If mem_addr points to a memory region with less than whole vector size
|
||||||
|
bytes of accessible memory and k is a mask that would prevent reading
|
||||||
|
the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
|
||||||
|
it to be transformed to any other whole memory access instructions.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR rtl-optimization/110237
|
||||||
|
* config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
|
||||||
|
UNSPEC_MASKMOV.
|
||||||
|
(maskstore<mode><avx512fmaskmodelower): Ditto.
|
||||||
|
(*<avx512>_store<mode>_mask): New define_insn, it's renamed
|
||||||
|
from original <avx512>_store<mode>_mask.
|
||||||
|
---
|
||||||
|
gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
|
||||||
|
1 file changed, 57 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||||
|
index b30e96cb1..3af159896 100644
|
||||||
|
--- a/gcc/config/i386/sse.md
|
||||||
|
+++ b/gcc/config/i386/sse.md
|
||||||
|
@@ -1554,7 +1554,7 @@
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
|
-(define_insn "<avx512>_store<mode>_mask"
|
||||||
|
+(define_insn "*<avx512>_store<mode>_mask"
|
||||||
|
[(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
|
||||||
|
(vec_merge:V48_AVX512VL
|
||||||
|
(match_operand:V48_AVX512VL 1 "register_operand" "v")
|
||||||
|
@@ -1582,7 +1582,7 @@
|
||||||
|
(set_attr "memory" "store")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
|
-(define_insn "<avx512>_store<mode>_mask"
|
||||||
|
+(define_insn "*<avx512>_store<mode>_mask"
|
||||||
|
[(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
|
||||||
|
(vec_merge:VI12HF_AVX512VL
|
||||||
|
(match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
|
||||||
|
@@ -26002,21 +26002,66 @@
|
||||||
|
"TARGET_AVX")
|
||||||
|
|
||||||
|
(define_expand "maskstore<mode><avx512fmaskmodelower>"
|
||||||
|
- [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
|
||||||
|
- (vec_merge:V48H_AVX512VL
|
||||||
|
- (match_operand:V48H_AVX512VL 1 "register_operand")
|
||||||
|
- (match_dup 0)
|
||||||
|
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||||
|
+ [(set (match_operand:V48_AVX512VL 0 "memory_operand")
|
||||||
|
+ (unspec:V48_AVX512VL
|
||||||
|
+ [(match_operand:V48_AVX512VL 1 "register_operand")
|
||||||
|
+ (match_dup 0)
|
||||||
|
+ (match_operand:<avx512fmaskmode> 2 "register_operand")]
|
||||||
|
+ UNSPEC_MASKMOV))]
|
||||||
|
"TARGET_AVX512F")
|
||||||
|
|
||||||
|
(define_expand "maskstore<mode><avx512fmaskmodelower>"
|
||||||
|
- [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
|
||||||
|
- (vec_merge:VI12_AVX512VL
|
||||||
|
- (match_operand:VI12_AVX512VL 1 "register_operand")
|
||||||
|
- (match_dup 0)
|
||||||
|
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||||
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand")
|
||||||
|
+ (unspec:VI12HF_AVX512VL
|
||||||
|
+ [(match_operand:VI12HF_AVX512VL 1 "register_operand")
|
||||||
|
+ (match_dup 0)
|
||||||
|
+ (match_operand:<avx512fmaskmode> 2 "register_operand")]
|
||||||
|
+ UNSPEC_MASKMOV))]
|
||||||
|
"TARGET_AVX512BW")
|
||||||
|
|
||||||
|
+(define_insn "<avx512>_store<mode>_mask"
|
||||||
|
+ [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
|
||||||
|
+ (unspec:V48_AVX512VL
|
||||||
|
+ [(match_operand:V48_AVX512VL 1 "register_operand" "v")
|
||||||
|
+ (match_dup 0)
|
||||||
|
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
|
||||||
|
+ UNSPEC_MASKMOV))]
|
||||||
|
+ "TARGET_AVX512F"
|
||||||
|
+{
|
||||||
|
+ if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
|
||||||
|
+ {
|
||||||
|
+ if (misaligned_operand (operands[0], <MODE>mode))
|
||||||
|
+ return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||||
|
+ else
|
||||||
|
+ return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if (misaligned_operand (operands[0], <MODE>mode))
|
||||||
|
+ return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||||
|
+ else
|
||||||
|
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+ [(set_attr "type" "ssemov")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "memory" "store")
|
||||||
|
+ (set_attr "mode" "<sseinsnmode>")])
|
||||||
|
+
|
||||||
|
+(define_insn "<avx512>_store<mode>_mask"
|
||||||
|
+ [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
|
||||||
|
+ (unspec:VI12HF_AVX512VL
|
||||||
|
+ [(match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
|
||||||
|
+ (match_dup 0)
|
||||||
|
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
|
||||||
|
+ UNSPEC_MASKMOV))]
|
||||||
|
+ "TARGET_AVX512BW"
|
||||||
|
+ "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
|
||||||
|
+ [(set_attr "type" "ssemov")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "memory" "store")
|
||||||
|
+ (set_attr "mode" "<sseinsnmode>")])
|
||||||
|
+
|
||||||
|
(define_expand "cbranch<mode>4"
|
||||||
|
[(set (reg:CC FLAGS_REG)
|
||||||
|
(compare:CC (match_operand:VI48_AVX 1 "register_operand")
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
@ -0,0 +1,38 @@
|
|||||||
|
From 50757adc93ef32a97a8a1083f5d53a9c00da6ac8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Cui, Lili" <lili.cui@intel.com>
|
||||||
|
Date: Thu, 29 Jun 2023 03:10:35 +0000
|
||||||
|
Subject: [PATCH 09/32] x86: Update model values for Alderlake and Rocketlake.
|
||||||
|
|
||||||
|
Update model values for Alderlake and Rocketlake according to SDM.
|
||||||
|
|
||||||
|
gcc/ChangeLog
|
||||||
|
|
||||||
|
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
|
||||||
|
from Rocketlake, remove model value 0xbf from Alderlake.
|
||||||
|
---
|
||||||
|
gcc/common/config/i386/cpuinfo.h | 2 --
|
||||||
|
1 file changed, 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||||
|
index 0333da56b..28b2ff0b0 100644
|
||||||
|
--- a/gcc/common/config/i386/cpuinfo.h
|
||||||
|
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||||
|
@@ -435,7 +435,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||||
|
cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
|
||||||
|
break;
|
||||||
|
case 0xa7:
|
||||||
|
- case 0xa8:
|
||||||
|
/* Rocket Lake. */
|
||||||
|
cpu = "rocketlake";
|
||||||
|
CHECK___builtin_cpu_is ("corei7");
|
||||||
|
@@ -508,7 +507,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||||
|
break;
|
||||||
|
case 0x97:
|
||||||
|
case 0x9a:
|
||||||
|
- case 0xbf:
|
||||||
|
/* Alder Lake. */
|
||||||
|
cpu = "alderlake";
|
||||||
|
CHECK___builtin_cpu_is ("corei7");
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
78
0065-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
Normal file
78
0065-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
From 60364b439a80c217174e1830e0b7507d6f4538c4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: liuhongt <hongtao.liu@intel.com>
|
||||||
|
Date: Fri, 4 Aug 2023 09:27:39 +0800
|
||||||
|
Subject: [PATCH 10/32] Workaround possible CPUID bug in Sandy Bridge.
|
||||||
|
|
||||||
|
Don't access leaf 7 subleaf 1 unless subleaf 0 says it is
|
||||||
|
supported via EAX.
|
||||||
|
|
||||||
|
Intel documentation says invalid subleaves return 0. We had been
|
||||||
|
relying on that behavior instead of checking the max sublef number.
|
||||||
|
|
||||||
|
It appears that some Sandy Bridge CPUs return at least the subleaf 0
|
||||||
|
EDX value for subleaf 1. Best guess is that this is a bug in a
|
||||||
|
microcode patch since all of the bits we're seeing set in EDX were
|
||||||
|
introduced after Sandy Bridge was originally released.
|
||||||
|
|
||||||
|
This is causing avxvnniint16 to be incorrectly enabled with
|
||||||
|
-march=native on these CPUs.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* common/config/i386/cpuinfo.h (get_available_features): Check
|
||||||
|
max_subleaf_level for valid subleaf before use CPUID.
|
||||||
|
---
|
||||||
|
gcc/common/config/i386/cpuinfo.h | 29 +++++++++++++++++------------
|
||||||
|
1 file changed, 17 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||||
|
index 28b2ff0b0..316ad3cb3 100644
|
||||||
|
--- a/gcc/common/config/i386/cpuinfo.h
|
||||||
|
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||||
|
@@ -647,7 +647,9 @@ get_available_features (struct __processor_model *cpu_model,
|
||||||
|
/* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
|
||||||
|
if (max_cpuid_level >= 7)
|
||||||
|
{
|
||||||
|
- __cpuid_count (7, 0, eax, ebx, ecx, edx);
|
||||||
|
+ unsigned int max_subleaf_level;
|
||||||
|
+
|
||||||
|
+ __cpuid_count (7, 0, max_subleaf_level, ebx, ecx, edx);
|
||||||
|
if (ebx & bit_BMI)
|
||||||
|
set_feature (FEATURE_BMI);
|
||||||
|
if (ebx & bit_SGX)
|
||||||
|
@@ -759,18 +761,21 @@ get_available_features (struct __processor_model *cpu_model,
|
||||||
|
set_feature (FEATURE_AVX512FP16);
|
||||||
|
}
|
||||||
|
|
||||||
|
- __cpuid_count (7, 1, eax, ebx, ecx, edx);
|
||||||
|
- if (eax & bit_HRESET)
|
||||||
|
- set_feature (FEATURE_HRESET);
|
||||||
|
- if (avx_usable)
|
||||||
|
- {
|
||||||
|
- if (eax & bit_AVXVNNI)
|
||||||
|
- set_feature (FEATURE_AVXVNNI);
|
||||||
|
- }
|
||||||
|
- if (avx512_usable)
|
||||||
|
+ if (max_subleaf_level >= 1)
|
||||||
|
{
|
||||||
|
- if (eax & bit_AVX512BF16)
|
||||||
|
- set_feature (FEATURE_AVX512BF16);
|
||||||
|
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
|
||||||
|
+ if (eax & bit_HRESET)
|
||||||
|
+ set_feature (FEATURE_HRESET);
|
||||||
|
+ if (avx_usable)
|
||||||
|
+ {
|
||||||
|
+ if (eax & bit_AVXVNNI)
|
||||||
|
+ set_feature (FEATURE_AVXVNNI);
|
||||||
|
+ }
|
||||||
|
+ if (avx512_usable)
|
||||||
|
+ {
|
||||||
|
+ if (eax & bit_AVX512BF16)
|
||||||
|
+ set_feature (FEATURE_AVX512BF16);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.28.0.windows.1
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user