!418 Revert Intel patches.
From: @lesleyzheng1103 Reviewed-by: @huang-xiaoquan Signed-off-by: @huang-xiaoquan
This commit is contained in:
commit
1c11226629
@ -1,135 +0,0 @@
|
||||
From 1649f9fbbc5267de2a675336d3ac665528a03db8 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Wed, 10 May 2023 15:16:58 +0800
|
||||
Subject: [PATCH 03/32] x86: Add a new option -mdaz-ftz to enable FTZ and DAZ
|
||||
flags in MXCSR.
|
||||
|
||||
if (mdaz-ftz)
|
||||
link crtfastmath.o
|
||||
else if ((Ofast || ffast-math || funsafe-math-optimizations)
|
||||
&& !mno-daz-ftz)
|
||||
link crtfastmath.o
|
||||
else
|
||||
Don't link crtfastmath.o
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/i386/cygwin.h (ENDFILE_SPEC): Link crtfastmath.o
|
||||
whenever -mdaz-ftz is specified. Don't link crtfastmath.o
|
||||
when -mno-daz-ftz is specified.
|
||||
* config/i386/darwin.h (ENDFILE_SPEC): Ditto.
|
||||
* config/i386/gnu-user-common.h
|
||||
(GNU_USER_TARGET_MATHFILE_SPEC): Ditto.
|
||||
* config/i386/mingw32.h (ENDFILE_SPEC): Ditto.
|
||||
* config/i386/i386.opt (mdaz-ftz): New option.
|
||||
* doc/invoke.texi (x86 options): Document mftz-daz.
|
||||
---
|
||||
gcc/config/i386/cygwin.h | 2 +-
|
||||
gcc/config/i386/darwin.h | 4 ++--
|
||||
gcc/config/i386/gnu-user-common.h | 2 +-
|
||||
gcc/config/i386/i386.opt | 4 ++++
|
||||
gcc/config/i386/mingw32.h | 2 +-
|
||||
gcc/doc/invoke.texi | 11 ++++++++++-
|
||||
6 files changed, 19 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
|
||||
index d06eda369..5412c5d44 100644
|
||||
--- a/gcc/config/i386/cygwin.h
|
||||
+++ b/gcc/config/i386/cygwin.h
|
||||
@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
#undef ENDFILE_SPEC
|
||||
#define ENDFILE_SPEC \
|
||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
|
||||
+ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||
%{!shared:%:if-exists(default-manifest.o%s)}\
|
||||
%{fvtable-verify=none:%s; \
|
||||
fvtable-verify=preinit:vtv_end.o%s; \
|
||||
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
|
||||
index a55f6b2b8..2f773924d 100644
|
||||
--- a/gcc/config/i386/darwin.h
|
||||
+++ b/gcc/config/i386/darwin.h
|
||||
@@ -109,8 +109,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
"%{!force_cpusubtype_ALL:-force_cpusubtype_ALL} "
|
||||
|
||||
#undef ENDFILE_SPEC
|
||||
-#define ENDFILE_SPEC \
|
||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
|
||||
+#define ENDFILE_SPEC
|
||||
+\ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||
%{mpc32:crtprec32.o%s} \
|
||||
%{mpc64:crtprec64.o%s} \
|
||||
%{mpc80:crtprec80.o%s}" TM_DESTRUCTOR
|
||||
diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h
|
||||
index 23b54c5be..3d2a33f17 100644
|
||||
--- a/gcc/config/i386/gnu-user-common.h
|
||||
+++ b/gcc/config/i386/gnu-user-common.h
|
||||
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
/* Similar to standard GNU userspace, but adding -ffast-math support. */
|
||||
#define GNU_USER_TARGET_MATHFILE_SPEC \
|
||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
|
||||
+ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||
%{mpc32:crtprec32.o%s} \
|
||||
%{mpc64:crtprec64.o%s} \
|
||||
%{mpc80:crtprec80.o%s}"
|
||||
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||||
index fc1b944ac..498fb454d 100644
|
||||
--- a/gcc/config/i386/i386.opt
|
||||
+++ b/gcc/config/i386/i386.opt
|
||||
@@ -420,6 +420,10 @@ mpc80
|
||||
Target RejectNegative
|
||||
Set 80387 floating-point precision to 80-bit.
|
||||
|
||||
+mdaz-ftz
|
||||
+Target
|
||||
+Set the FTZ and DAZ Flags.
|
||||
+
|
||||
mpreferred-stack-boundary=
|
||||
Target RejectNegative Joined UInteger Var(ix86_preferred_stack_boundary_arg)
|
||||
Attempt to keep stack aligned to this power of 2.
|
||||
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
|
||||
index d3ca0cd02..ddbe6a405 100644
|
||||
--- a/gcc/config/i386/mingw32.h
|
||||
+++ b/gcc/config/i386/mingw32.h
|
||||
@@ -197,7 +197,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
#undef ENDFILE_SPEC
|
||||
#define ENDFILE_SPEC \
|
||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
|
||||
+ "%{mdaz-ftz:crtfastmath.o%s;Ofast|ffast-math|funsafe-math-optimizations:%{!mno-daz-ftz:crtfastmath.o%s}} \
|
||||
%{!shared:%:if-exists(default-manifest.o%s)}\
|
||||
%{fvtable-verify=none:%s; \
|
||||
fvtable-verify=preinit:vtv_end.o%s; \
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 2b376e0e9..3a48655e5 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -1437,7 +1437,7 @@ See RS/6000 and PowerPC Options.
|
||||
-m96bit-long-double -mlong-double-64 -mlong-double-80 -mlong-double-128 @gol
|
||||
-mregparm=@var{num} -msseregparm @gol
|
||||
-mveclibabi=@var{type} -mvect8-ret-in-mem @gol
|
||||
--mpc32 -mpc64 -mpc80 -mstackrealign @gol
|
||||
+-mpc32 -mpc64 -mpc80 -mdaz-ftz -mstackrealign @gol
|
||||
-momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol
|
||||
-mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol
|
||||
-m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol
|
||||
@@ -32122,6 +32122,15 @@ are enabled by default; routines in such libraries could suffer significant
|
||||
loss of accuracy, typically through so-called ``catastrophic cancellation'',
|
||||
when this option is used to set the precision to less than extended precision.
|
||||
|
||||
+@item -mdaz-ftz
|
||||
+@opindex mdaz-ftz
|
||||
+
|
||||
+The flush-to-zero (FTZ) and denormals-are-zero (DAZ) flags in the MXCSR register
|
||||
+are used to control floating-point calculations.SSE and AVX instructions
|
||||
+including scalar and vector instructions could benefit from enabling the FTZ
|
||||
+and DAZ flags when @option{-mdaz-ftz} is specified. Don't set FTZ/DAZ flags
|
||||
+when @option{-mno-daz-ftz} is specified.
|
||||
+
|
||||
@item -mstackrealign
|
||||
@opindex mstackrealign
|
||||
Realign the stack at entry. On the x86, the @option{-mstackrealign}
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,65 +0,0 @@
|
||||
From e70fa730dcfcb3a7b1d56a2e166752d4299f0504 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Mon, 5 Jun 2023 12:38:41 +0800
|
||||
Subject: [PATCH 04/32] Explicitly view_convert_expr mask to signed type when
|
||||
folding pblendvb builtins.
|
||||
|
||||
Since mask < 0 will be always false for vector char when
|
||||
-funsigned-char, but vpblendvb needs to check the most significant
|
||||
bit. The patch explicitly VCE to vector signed char.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/110108
|
||||
* config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly
|
||||
view_convert_expr mask to signed type when folding pblendvb
|
||||
builtins.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/pr110108-2.c: New test.
|
||||
---
|
||||
gcc/config/i386/i386.cc | 4 +++-
|
||||
gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
|
||||
2 files changed, 17 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
|
||||
|
||||
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||
index 462dce10e..479fc6010 100644
|
||||
--- a/gcc/config/i386/i386.cc
|
||||
+++ b/gcc/config/i386/i386.cc
|
||||
@@ -18396,8 +18396,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
||||
tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
|
||||
? intSI_type_node : intDI_type_node;
|
||||
type = get_same_sized_vectype (itype, type);
|
||||
- arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
|
||||
}
|
||||
+ else
|
||||
+ type = signed_type_for (type);
|
||||
+ arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
|
||||
tree zero_vec = build_zero_cst (type);
|
||||
tree cmp_type = truth_type_for (type);
|
||||
tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
|
||||
new file mode 100644
|
||||
index 000000000..2d1d2fd49
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mavx2 -O2 -funsigned-char" } */
|
||||
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
|
||||
+
|
||||
+#include <immintrin.h>
|
||||
+__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
|
||||
+ __m128i Result = _mm_blendv_epi8(X0, X1, X2);
|
||||
+ return Result;
|
||||
+}
|
||||
+
|
||||
+__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
|
||||
+ __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
|
||||
+ return Result;
|
||||
+}
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,138 +0,0 @@
|
||||
From 48715f03ad08f185153bfb0ff4c0802ab2d9579c Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Mon, 26 Jun 2023 09:50:25 +0800
|
||||
Subject: [PATCH 05/32] Make option mvzeroupper independent of optimization
|
||||
level.
|
||||
|
||||
pass_insert_vzeroupper is under condition
|
||||
|
||||
TARGET_AVX && TARGET_VZEROUPPER
|
||||
&& flag_expensive_optimizations && !optimize_size
|
||||
|
||||
But the document of mvzeroupper doesn't mention the insertion
|
||||
required -O2 and above, it may confuse users when they explicitly
|
||||
use -Os -mvzeroupper.
|
||||
|
||||
------------
|
||||
mvzeroupper
|
||||
Target Mask(VZEROUPPER) Save
|
||||
Generate vzeroupper instruction before a transfer of control flow out of
|
||||
the function.
|
||||
------------
|
||||
|
||||
The patch moves flag_expensive_optimizations && !optimize_size to
|
||||
ix86_option_override_internal. It makes -mvzeroupper independent of
|
||||
optimization level, but still keeps the behavior of architecture
|
||||
tuning(emit_vzeroupper) unchanged.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/i386/i386-features.cc (pass_insert_vzeroupper:gate):
|
||||
Move flag_expensive_optimizations && !optimize_size to ..
|
||||
* config/i386/i386-options.cc (ix86_option_override_internal):
|
||||
.. this, it makes -mvzeroupper independent of optimization
|
||||
level, but still keeps the behavior of architecture
|
||||
tuning(emit_vzeroupper) unchanged.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/avx-vzeroupper-29.c: New testcase.
|
||||
* gcc.target/i386/avx-vzeroupper-12.c: Adjust testcase.
|
||||
* gcc.target/i386/avx-vzeroupper-7.c: Ditto.
|
||||
* gcc.target/i386/avx-vzeroupper-9.c: Ditto.
|
||||
---
|
||||
gcc/config/i386/i386-features.cc | 3 +--
|
||||
gcc/config/i386/i386-options.cc | 4 +++-
|
||||
gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c | 3 ++-
|
||||
gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c | 14 ++++++++++++++
|
||||
gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c | 3 ++-
|
||||
gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c | 3 ++-
|
||||
6 files changed, 24 insertions(+), 6 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
||||
|
||||
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
|
||||
index 6fe41c3c2..6a2444eb6 100644
|
||||
--- a/gcc/config/i386/i386-features.cc
|
||||
+++ b/gcc/config/i386/i386-features.cc
|
||||
@@ -1875,8 +1875,7 @@ public:
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
- return TARGET_AVX && TARGET_VZEROUPPER
|
||||
- && flag_expensive_optimizations && !optimize_size;
|
||||
+ return TARGET_AVX && TARGET_VZEROUPPER;
|
||||
}
|
||||
|
||||
virtual unsigned int execute (function *)
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index ff44ad4e0..74e969b68 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -2702,7 +2702,9 @@ ix86_option_override_internal (bool main_args_p,
|
||||
sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
|
||||
|
||||
if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
|
||||
- && TARGET_EMIT_VZEROUPPER)
|
||||
+ && TARGET_EMIT_VZEROUPPER
|
||||
+ && flag_expensive_optimizations
|
||||
+ && !optimize_size)
|
||||
opts->x_target_flags |= MASK_VZEROUPPER;
|
||||
if (!(opts_set->x_target_flags & MASK_STV))
|
||||
opts->x_target_flags |= MASK_STV;
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
||||
index e694d4048..5a40e8783 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-12.c
|
||||
@@ -16,5 +16,6 @@ foo ()
|
||||
_mm256_zeroupper ();
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
||||
new file mode 100644
|
||||
index 000000000..4af637757
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-29.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O0 -mavx -mtune=generic -mvzeroupper -dp" } */
|
||||
+
|
||||
+#include <immintrin.h>
|
||||
+
|
||||
+extern __m256 x, y;
|
||||
+
|
||||
+void
|
||||
+foo ()
|
||||
+{
|
||||
+ x = y;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
||||
index ab6d68779..75fe58897 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-7.c
|
||||
@@ -12,4 +12,5 @@ foo ()
|
||||
_mm256_zeroupper ();
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 { target ia32 } } } */
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 2 { target { ! ia32 } } } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
||||
index 974e1626a..fa0a6dfca 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-9.c
|
||||
@@ -15,4 +15,5 @@ foo ()
|
||||
_mm256_zeroupper ();
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 } } */
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 4 { target ia32 } } } */
|
||||
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 5 { target { ! ia32 } } } } */
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,68 +0,0 @@
|
||||
From 8039d773354360ed8ff2f25c63843fc637eacc67 Mon Sep 17 00:00:00 2001
|
||||
From: Hongyu Wang <hongyu.wang@intel.com>
|
||||
Date: Sun, 25 Jun 2023 09:50:21 +0800
|
||||
Subject: [PATCH 06/32] i386: Sync tune_string with arch_string for target
|
||||
attribute
|
||||
|
||||
arch=*
|
||||
|
||||
For function with target attribute arch=*, current logic will set its
|
||||
tune to -mtune from command line so all target_clones will get same
|
||||
tuning flags which would affect the performance for each clone. Override
|
||||
tune with arch if tune was not explicitly specified to get proper tuning
|
||||
flags for target_clones.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/i386/i386-options.cc (ix86_valid_target_attribute_tree):
|
||||
Override tune_string with arch_string if tune_string is not
|
||||
explicitly specified.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/mvc17.c: New test.
|
||||
|
||||
(cherry picked from commit 2916278d14e9ac28c361c396a67256acbebda6e8)
|
||||
---
|
||||
gcc/config/i386/i386-options.cc | 6 +++++-
|
||||
gcc/testsuite/gcc.target/i386/mvc17.c | 11 +++++++++++
|
||||
2 files changed, 16 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/mvc17.c
|
||||
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 74e969b68..fb2ed942f 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -1378,7 +1378,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
|
||||
if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
|
||||
opts->x_ix86_tune_string
|
||||
= ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
|
||||
- else if (orig_tune_defaulted)
|
||||
+ /* If we have explicit arch string and no tune string specified, set
|
||||
+ tune_string to NULL and later it will be overriden by arch_string
|
||||
+ so target clones can get proper optimization. */
|
||||
+ else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
|
||||
+ || orig_tune_defaulted)
|
||||
opts->x_ix86_tune_string = NULL;
|
||||
|
||||
/* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c
|
||||
new file mode 100644
|
||||
index 000000000..8b83c1aec
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/mvc17.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-ifunc "" } */
|
||||
+/* { dg-options "-O2 -march=x86-64" } */
|
||||
+/* { dg-final { scan-assembler-times "rep mov" 1 } } */
|
||||
+
|
||||
+__attribute__((target_clones("default","arch=icelake-server")))
|
||||
+void
|
||||
+foo (char *a, char *b, int size)
|
||||
+{
|
||||
+ __builtin_memcpy (a, b, size & 0x7F);
|
||||
+}
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,111 +0,0 @@
|
||||
From fbcb1a5899b1bd3964aed78ed74041121e618d36 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Tue, 20 Jun 2023 15:41:00 +0800
|
||||
Subject: [PATCH 07/32] Refine maskloadmn pattern with UNSPEC_MASKLOAD.
|
||||
|
||||
If mem_addr points to a memory region with less than whole vector size
|
||||
bytes of accessible memory and k is a mask that would prevent reading
|
||||
the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
|
||||
it to be transformed to vpblendd.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/110309
|
||||
* config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
|
||||
Refine pattern with UNSPEC_MASKLOAD.
|
||||
(maskload<mode><avx512fmaskmodelower>): Ditto.
|
||||
(*<avx512>_load<mode>_mask): Extend mode iterator to
|
||||
VI12HF_AVX512VL.
|
||||
(*<avx512>_load<mode>): Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/pr110309.c: New test.
|
||||
---
|
||||
gcc/config/i386/sse.md | 32 +++++++++++++-----------
|
||||
gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
|
||||
2 files changed, 28 insertions(+), 14 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c
|
||||
|
||||
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||
index eb767e56c..b30e96cb1 100644
|
||||
--- a/gcc/config/i386/sse.md
|
||||
+++ b/gcc/config/i386/sse.md
|
||||
@@ -1411,12 +1411,12 @@
|
||||
})
|
||||
|
||||
(define_insn "*<avx512>_load<mode>_mask"
|
||||
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||||
- (vec_merge:VI12_AVX512VL
|
||||
- (unspec:VI12_AVX512VL
|
||||
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
|
||||
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
|
||||
+ (vec_merge:VI12HF_AVX512VL
|
||||
+ (unspec:VI12HF_AVX512VL
|
||||
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
|
||||
UNSPEC_MASKLOAD)
|
||||
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
|
||||
+ (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand" "0C")
|
||||
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512BW"
|
||||
"vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||
@@ -1425,9 +1425,9 @@
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_insn_and_split "*<avx512>_load<mode>"
|
||||
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
|
||||
- (unspec:VI12_AVX512VL
|
||||
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
|
||||
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
|
||||
+ (unspec:VI12HF_AVX512VL
|
||||
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand" "m")]
|
||||
UNSPEC_MASKLOAD))]
|
||||
"TARGET_AVX512BW"
|
||||
"#"
|
||||
@@ -25973,17 +25973,21 @@
|
||||
"TARGET_AVX")
|
||||
|
||||
(define_expand "maskload<mode><avx512fmaskmodelower>"
|
||||
- [(set (match_operand:V48H_AVX512VL 0 "register_operand")
|
||||
- (vec_merge:V48H_AVX512VL
|
||||
- (match_operand:V48H_AVX512VL 1 "memory_operand")
|
||||
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
|
||||
+ (vec_merge:V48_AVX512VL
|
||||
+ (unspec:V48_AVX512VL
|
||||
+ [(match_operand:V48_AVX512VL 1 "memory_operand")]
|
||||
+ UNSPEC_MASKLOAD)
|
||||
(match_dup 0)
|
||||
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||
"TARGET_AVX512F")
|
||||
|
||||
(define_expand "maskload<mode><avx512fmaskmodelower>"
|
||||
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
|
||||
- (vec_merge:VI12_AVX512VL
|
||||
- (match_operand:VI12_AVX512VL 1 "memory_operand")
|
||||
+ [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
|
||||
+ (vec_merge:VI12HF_AVX512VL
|
||||
+ (unspec:VI12HF_AVX512VL
|
||||
+ [(match_operand:VI12HF_AVX512VL 1 "memory_operand")]
|
||||
+ UNSPEC_MASKLOAD)
|
||||
(match_dup 0)
|
||||
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||
"TARGET_AVX512BW")
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
|
||||
new file mode 100644
|
||||
index 000000000..f6e9e9c3c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
|
||||
@@ -0,0 +1,10 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
|
||||
+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
|
||||
+
|
||||
+
|
||||
+void foo (int * __restrict a, int *b)
|
||||
+{
|
||||
+ for (int i = 0; i < 6; ++i)
|
||||
+ a[i] = b[i] + 42;
|
||||
+}
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,126 +0,0 @@
|
||||
From 5ad28ef4010c1248b4d94396d03f863705f7b0db Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Mon, 26 Jun 2023 21:07:09 +0800
|
||||
Subject: [PATCH 08/32] Refine maskstore patterns with UNSPEC_MASKMOV.
|
||||
|
||||
Similar like r14-2070-gc79476da46728e
|
||||
|
||||
If mem_addr points to a memory region with less than whole vector size
|
||||
bytes of accessible memory and k is a mask that would prevent reading
|
||||
the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
|
||||
it to be transformed to any other whole memory access instructions.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR rtl-optimization/110237
|
||||
* config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
|
||||
UNSPEC_MASKMOV.
|
||||
(maskstore<mode><avx512fmaskmodelower): Ditto.
|
||||
(*<avx512>_store<mode>_mask): New define_insn, it's renamed
|
||||
from original <avx512>_store<mode>_mask.
|
||||
---
|
||||
gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
|
||||
1 file changed, 57 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||
index b30e96cb1..3af159896 100644
|
||||
--- a/gcc/config/i386/sse.md
|
||||
+++ b/gcc/config/i386/sse.md
|
||||
@@ -1554,7 +1554,7 @@
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
-(define_insn "<avx512>_store<mode>_mask"
|
||||
+(define_insn "*<avx512>_store<mode>_mask"
|
||||
[(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
|
||||
(vec_merge:V48_AVX512VL
|
||||
(match_operand:V48_AVX512VL 1 "register_operand" "v")
|
||||
@@ -1582,7 +1582,7 @@
|
||||
(set_attr "memory" "store")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
-(define_insn "<avx512>_store<mode>_mask"
|
||||
+(define_insn "*<avx512>_store<mode>_mask"
|
||||
[(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
|
||||
(vec_merge:VI12HF_AVX512VL
|
||||
(match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
|
||||
@@ -26002,21 +26002,66 @@
|
||||
"TARGET_AVX")
|
||||
|
||||
(define_expand "maskstore<mode><avx512fmaskmodelower>"
|
||||
- [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
|
||||
- (vec_merge:V48H_AVX512VL
|
||||
- (match_operand:V48H_AVX512VL 1 "register_operand")
|
||||
- (match_dup 0)
|
||||
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||
+ [(set (match_operand:V48_AVX512VL 0 "memory_operand")
|
||||
+ (unspec:V48_AVX512VL
|
||||
+ [(match_operand:V48_AVX512VL 1 "register_operand")
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:<avx512fmaskmode> 2 "register_operand")]
|
||||
+ UNSPEC_MASKMOV))]
|
||||
"TARGET_AVX512F")
|
||||
|
||||
(define_expand "maskstore<mode><avx512fmaskmodelower>"
|
||||
- [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
|
||||
- (vec_merge:VI12_AVX512VL
|
||||
- (match_operand:VI12_AVX512VL 1 "register_operand")
|
||||
- (match_dup 0)
|
||||
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
|
||||
+ [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand")
|
||||
+ (unspec:VI12HF_AVX512VL
|
||||
+ [(match_operand:VI12HF_AVX512VL 1 "register_operand")
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:<avx512fmaskmode> 2 "register_operand")]
|
||||
+ UNSPEC_MASKMOV))]
|
||||
"TARGET_AVX512BW")
|
||||
|
||||
+(define_insn "<avx512>_store<mode>_mask"
|
||||
+ [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
|
||||
+ (unspec:V48_AVX512VL
|
||||
+ [(match_operand:V48_AVX512VL 1 "register_operand" "v")
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
|
||||
+ UNSPEC_MASKMOV))]
|
||||
+ "TARGET_AVX512F"
|
||||
+{
|
||||
+ if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
|
||||
+ {
|
||||
+ if (misaligned_operand (operands[0], <MODE>mode))
|
||||
+ return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||
+ else
|
||||
+ return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (misaligned_operand (operands[0], <MODE>mode))
|
||||
+ return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||
+ else
|
||||
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
|
||||
+ }
|
||||
+}
|
||||
+ [(set_attr "type" "ssemov")
|
||||
+ (set_attr "prefix" "evex")
|
||||
+ (set_attr "memory" "store")
|
||||
+ (set_attr "mode" "<sseinsnmode>")])
|
||||
+
|
||||
+(define_insn "<avx512>_store<mode>_mask"
|
||||
+ [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
|
||||
+ (unspec:VI12HF_AVX512VL
|
||||
+ [(match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
|
||||
+ UNSPEC_MASKMOV))]
|
||||
+ "TARGET_AVX512BW"
|
||||
+ "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
|
||||
+ [(set_attr "type" "ssemov")
|
||||
+ (set_attr "prefix" "evex")
|
||||
+ (set_attr "memory" "store")
|
||||
+ (set_attr "mode" "<sseinsnmode>")])
|
||||
+
|
||||
(define_expand "cbranch<mode>4"
|
||||
[(set (reg:CC FLAGS_REG)
|
||||
(compare:CC (match_operand:VI48_AVX 1 "register_operand")
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
From 50757adc93ef32a97a8a1083f5d53a9c00da6ac8 Mon Sep 17 00:00:00 2001
|
||||
From: "Cui, Lili" <lili.cui@intel.com>
|
||||
Date: Thu, 29 Jun 2023 03:10:35 +0000
|
||||
Subject: [PATCH 09/32] x86: Update model values for Alderlake and Rocketlake.
|
||||
|
||||
Update model values for Alderlake and Rocketlake according to SDM.
|
||||
|
||||
gcc/ChangeLog
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
|
||||
from Rocketlake, remove model value 0xbf from Alderlake.
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 0333da56b..28b2ff0b0 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -435,7 +435,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
cpu_model->__cpu_subtype = INTEL_COREI7_SKYLAKE;
|
||||
break;
|
||||
case 0xa7:
|
||||
- case 0xa8:
|
||||
/* Rocket Lake. */
|
||||
cpu = "rocketlake";
|
||||
CHECK___builtin_cpu_is ("corei7");
|
||||
@@ -508,7 +507,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
break;
|
||||
case 0x97:
|
||||
case 0x9a:
|
||||
- case 0xbf:
|
||||
/* Alder Lake. */
|
||||
cpu = "alderlake";
|
||||
CHECK___builtin_cpu_is ("corei7");
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,78 +0,0 @@
|
||||
From 60364b439a80c217174e1830e0b7507d6f4538c4 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Fri, 4 Aug 2023 09:27:39 +0800
|
||||
Subject: [PATCH 10/32] Workaround possible CPUID bug in Sandy Bridge.
|
||||
|
||||
Don't access leaf 7 subleaf 1 unless subleaf 0 says it is
|
||||
supported via EAX.
|
||||
|
||||
Intel documentation says invalid subleaves return 0. We had been
|
||||
relying on that behavior instead of checking the max sublef number.
|
||||
|
||||
It appears that some Sandy Bridge CPUs return at least the subleaf 0
|
||||
EDX value for subleaf 1. Best guess is that this is a bug in a
|
||||
microcode patch since all of the bits we're seeing set in EDX were
|
||||
introduced after Sandy Bridge was originally released.
|
||||
|
||||
This is causing avxvnniint16 to be incorrectly enabled with
|
||||
-march=native on these CPUs.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_available_features): Check
|
||||
max_subleaf_level for valid subleaf before use CPUID.
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 29 +++++++++++++++++------------
|
||||
1 file changed, 17 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 28b2ff0b0..316ad3cb3 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -647,7 +647,9 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
/* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
|
||||
if (max_cpuid_level >= 7)
|
||||
{
|
||||
- __cpuid_count (7, 0, eax, ebx, ecx, edx);
|
||||
+ unsigned int max_subleaf_level;
|
||||
+
|
||||
+ __cpuid_count (7, 0, max_subleaf_level, ebx, ecx, edx);
|
||||
if (ebx & bit_BMI)
|
||||
set_feature (FEATURE_BMI);
|
||||
if (ebx & bit_SGX)
|
||||
@@ -759,18 +761,21 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
set_feature (FEATURE_AVX512FP16);
|
||||
}
|
||||
|
||||
- __cpuid_count (7, 1, eax, ebx, ecx, edx);
|
||||
- if (eax & bit_HRESET)
|
||||
- set_feature (FEATURE_HRESET);
|
||||
- if (avx_usable)
|
||||
- {
|
||||
- if (eax & bit_AVXVNNI)
|
||||
- set_feature (FEATURE_AVXVNNI);
|
||||
- }
|
||||
- if (avx512_usable)
|
||||
+ if (max_subleaf_level >= 1)
|
||||
{
|
||||
- if (eax & bit_AVX512BF16)
|
||||
- set_feature (FEATURE_AVX512BF16);
|
||||
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
|
||||
+ if (eax & bit_HRESET)
|
||||
+ set_feature (FEATURE_HRESET);
|
||||
+ if (avx_usable)
|
||||
+ {
|
||||
+ if (eax & bit_AVXVNNI)
|
||||
+ set_feature (FEATURE_AVXVNNI);
|
||||
+ }
|
||||
+ if (avx512_usable)
|
||||
+ {
|
||||
+ if (eax & bit_AVX512BF16)
|
||||
+ set_feature (FEATURE_AVX512BF16);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,220 +0,0 @@
|
||||
From cfffbec938afdc45c31db5ec282ce21ad1ba2dc7 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Thu, 10 Aug 2023 11:41:39 +0800
|
||||
Subject: [PATCH 11/32] Software mitigation: Disable gather generation in
|
||||
vectorization for GDS affected Intel Processors.
|
||||
|
||||
For more details of GDS (Gather Data Sampling), refer to
|
||||
https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html
|
||||
|
||||
After microcode update, there's performance regression. To avoid that,
|
||||
the patch disables gather generation in autovectorization but uses
|
||||
gather scalar emulation instead.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/i386/i386-options.cc (m_GDS): New macro.
|
||||
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't
|
||||
enable for m_GDS.
|
||||
(X86_TUNE_USE_GATHER_4PARTS): Ditto.
|
||||
(X86_TUNE_USE_GATHER): Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/avx2-gather-2.c: Adjust options to keep
|
||||
gather vectorization.
|
||||
* gcc.target/i386/avx2-gather-6.c: Ditto.
|
||||
* gcc.target/i386/avx512f-pr88464-1.c: Ditto.
|
||||
* gcc.target/i386/avx512f-pr88464-5.c: Ditto.
|
||||
* gcc.target/i386/avx512vl-pr88464-1.c: Ditto.
|
||||
* gcc.target/i386/avx512vl-pr88464-11.c: Ditto.
|
||||
* gcc.target/i386/avx512vl-pr88464-3.c: Ditto.
|
||||
* gcc.target/i386/avx512vl-pr88464-9.c: Ditto.
|
||||
* gcc.target/i386/pr88531-1b.c: Ditto.
|
||||
* gcc.target/i386/pr88531-1c.c: Ditto.
|
||||
|
||||
(cherry picked from commit 3064d1f5c48cb6ce1b4133570dd08ecca8abb52d)
|
||||
---
|
||||
gcc/config/i386/i386-options.cc | 5 +++++
|
||||
gcc/config/i386/x86-tune.def | 9 ++++++---
|
||||
gcc/testsuite/gcc.target/i386/avx2-gather-2.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx2-gather-6.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/pr88531-1b.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/pr88531-1c.c | 2 +-
|
||||
12 files changed, 21 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index fb2ed942f..9617fc162 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -137,6 +137,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS)
|
||||
#define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT)
|
||||
#define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
|
||||
+/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828.
|
||||
+ Software mitigation. */
|
||||
+#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \
|
||||
+ | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
|
||||
+ | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
|
||||
|
||||
#define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
|
||||
#define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
|
||||
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
|
||||
index e6b9e2125..4392709fc 100644
|
||||
--- a/gcc/config/i386/x86-tune.def
|
||||
+++ b/gcc/config/i386/x86-tune.def
|
||||
@@ -467,7 +467,8 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
|
||||
/* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
|
||||
elements. */
|
||||
DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
|
||||
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
|
||||
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
|
||||
+ | m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
|
||||
elements. */
|
||||
@@ -477,7 +478,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
|
||||
/* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
|
||||
elements. */
|
||||
DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
|
||||
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
|
||||
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
|
||||
+ | m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
|
||||
elements. */
|
||||
@@ -487,7 +489,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
|
||||
/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
|
||||
elements. */
|
||||
DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
|
||||
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC))
|
||||
+ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
|
||||
+ | m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
|
||||
elements. */
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
|
||||
index ad5ef7310..978924b0f 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
|
||||
+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" } */
|
||||
|
||||
#include "avx2-gather-1.c"
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
|
||||
index b9119581a..067b251e3 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=skylake" } */
|
||||
+/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details -mtune=haswell" } */
|
||||
|
||||
#include "avx2-gather-5.c"
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
|
||||
index 06d21bb01..d1a229861 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
|
||||
index 462e951fd..d7b0b2b28 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
|
||||
index 55a28dddb..07439185e 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
|
||||
index 969600885..3a9810827 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
|
||||
index 6b0c8a859..ac669e048 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
|
||||
index 3af568ab3..14a1083b6 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
|
||||
index 812c8a10f..e6df789de 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -march=skylake -mfpmath=sse" } */
|
||||
+/* { dg-options "-O3 -march=skylake -mfpmath=sse -mtune=haswell" } */
|
||||
|
||||
#include "pr88531-1a.c"
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1c.c b/gcc/testsuite/gcc.target/i386/pr88531-1c.c
|
||||
index 43fc5913e..a093c87c0 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/pr88531-1c.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr88531-1c.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse" } */
|
||||
+/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse -mtune=haswell" } */
|
||||
|
||||
#include "pr88531-1a.c"
|
||||
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,187 +0,0 @@
|
||||
From c269629130cb23252da2db026ce9ed13f57f69f4 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Thu, 10 Aug 2023 16:26:13 +0800
|
||||
Subject: [PATCH 12/32] Support -m[no-]gather -m[no-]scatter to enable/disable
|
||||
vectorization for all gather/scatter instructions
|
||||
|
||||
Rename original use_gather to use_gather_8parts, Support
|
||||
-mtune-ctrl={,^}use_gather to set/clear tune features
|
||||
use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
|
||||
as alias of -mtune-ctrl=, use_gather, ^use_gather.
|
||||
|
||||
Similar for use_scatter.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/i386/i386-builtins.cc
|
||||
(ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
|
||||
* config/i386/i386-options.cc (parse_mtune_ctrl_str):
|
||||
Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
|
||||
8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
|
||||
* config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
|
||||
for use_scatter_8parts
|
||||
* config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
|
||||
(TARGET_USE_GATHER_8PARTS): .. this.
|
||||
(TARGET_USE_SCATTER): Rename to ..
|
||||
(TARGET_USE_SCATTER_8PARTS): .. this.
|
||||
* config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
|
||||
(X86_TUNE_USE_GATHER_8PARTS): .. this.
|
||||
(X86_TUNE_USE_SCATTER): Rename to
|
||||
(X86_TUNE_USE_SCATTER_8PARTS): .. this.
|
||||
* config/i386/i386.opt: Add new options mgather, mscatter.
|
||||
|
||||
(cherry picked from commit b2a927fb5343db363ea4361da0d6bcee227b6737)
|
||||
---
|
||||
gcc/config/i386/i386-builtins.cc | 2 +-
|
||||
gcc/config/i386/i386-options.cc | 54 +++++++++++++++++++++++---------
|
||||
gcc/config/i386/i386.cc | 2 +-
|
||||
gcc/config/i386/i386.h | 8 ++---
|
||||
gcc/config/i386/i386.opt | 4 +++
|
||||
gcc/config/i386/x86-tune.def | 4 +--
|
||||
6 files changed, 52 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
|
||||
index 050c6228a..8ed32e14f 100644
|
||||
--- a/gcc/config/i386/i386-builtins.cc
|
||||
+++ b/gcc/config/i386/i386-builtins.cc
|
||||
@@ -1790,7 +1790,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
|
||||
? !TARGET_USE_GATHER_2PARTS
|
||||
: (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
|
||||
? !TARGET_USE_GATHER_4PARTS
|
||||
- : !TARGET_USE_GATHER)))
|
||||
+ : !TARGET_USE_GATHER_8PARTS)))
|
||||
return NULL_TREE;
|
||||
|
||||
if ((TREE_CODE (index_type) != INTEGER_TYPE
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 9617fc162..3df1f0c41 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -1705,20 +1705,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool dump)
|
||||
curr_feature_string++;
|
||||
clear = true;
|
||||
}
|
||||
- for (i = 0; i < X86_TUNE_LAST; i++)
|
||||
- {
|
||||
- if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
|
||||
- {
|
||||
- ix86_tune_features[i] = !clear;
|
||||
- if (dump)
|
||||
- fprintf (stderr, "Explicitly %s feature %s\n",
|
||||
- clear ? "clear" : "set", ix86_tune_feature_names[i]);
|
||||
- break;
|
||||
- }
|
||||
- }
|
||||
- if (i == X86_TUNE_LAST)
|
||||
- error ("unknown parameter to option %<-mtune-ctrl%>: %s",
|
||||
- clear ? curr_feature_string - 1 : curr_feature_string);
|
||||
+
|
||||
+ if (!strcmp (curr_feature_string, "use_gather"))
|
||||
+ {
|
||||
+ ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
|
||||
+ ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
|
||||
+ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
|
||||
+ if (dump)
|
||||
+ fprintf (stderr, "Explicitly %s features use_gather_2parts,"
|
||||
+ " use_gather_4parts, use_gather_8parts\n",
|
||||
+ clear ? "clear" : "set");
|
||||
+
|
||||
+ }
|
||||
+ else if (!strcmp (curr_feature_string, "use_scatter"))
|
||||
+ {
|
||||
+ ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
|
||||
+ ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
|
||||
+ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
|
||||
+ if (dump)
|
||||
+ fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
|
||||
+ " use_scatter_4parts, use_scatter_8parts\n",
|
||||
+ clear ? "clear" : "set");
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ for (i = 0; i < X86_TUNE_LAST; i++)
|
||||
+ {
|
||||
+ if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
|
||||
+ {
|
||||
+ ix86_tune_features[i] = !clear;
|
||||
+ if (dump)
|
||||
+ fprintf (stderr, "Explicitly %s feature %s\n",
|
||||
+ clear ? "clear" : "set", ix86_tune_feature_names[i]);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (i == X86_TUNE_LAST)
|
||||
+ error ("unknown parameter to option %<-mtune-ctrl%>: %s",
|
||||
+ clear ? curr_feature_string - 1 : curr_feature_string);
|
||||
+ }
|
||||
curr_feature_string = next_feature_string;
|
||||
}
|
||||
while (curr_feature_string);
|
||||
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||
index 479fc6010..e75d37023 100644
|
||||
--- a/gcc/config/i386/i386.cc
|
||||
+++ b/gcc/config/i386/i386.cc
|
||||
@@ -18937,7 +18937,7 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
|
||||
? !TARGET_USE_SCATTER_2PARTS
|
||||
: (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
|
||||
? !TARGET_USE_SCATTER_4PARTS
|
||||
- : !TARGET_USE_SCATTER))
|
||||
+ : !TARGET_USE_SCATTER_8PARTS))
|
||||
return NULL_TREE;
|
||||
|
||||
if ((TREE_CODE (index_type) != INTEGER_TYPE
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index 688aaabd3..aaa136ba0 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -403,10 +403,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
||||
ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS]
|
||||
#define TARGET_USE_SCATTER_4PARTS \
|
||||
ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS]
|
||||
-#define TARGET_USE_GATHER \
|
||||
- ix86_tune_features[X86_TUNE_USE_GATHER]
|
||||
-#define TARGET_USE_SCATTER \
|
||||
- ix86_tune_features[X86_TUNE_USE_SCATTER]
|
||||
+#define TARGET_USE_GATHER_8PARTS \
|
||||
+ ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS]
|
||||
+#define TARGET_USE_SCATTER_8PARTS \
|
||||
+ ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS]
|
||||
#define TARGET_FUSE_CMP_AND_BRANCH_32 \
|
||||
ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32]
|
||||
#define TARGET_FUSE_CMP_AND_BRANCH_64 \
|
||||
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||||
index 498fb454d..b154110d8 100644
|
||||
--- a/gcc/config/i386/i386.opt
|
||||
+++ b/gcc/config/i386/i386.opt
|
||||
@@ -1222,3 +1222,7 @@ Instructions number above which STFL stall penalty can be compensated.
|
||||
munroll-only-small-loops
|
||||
Target Var(ix86_unroll_only_small_loops) Init(0) Save
|
||||
Enable conservative small loop unrolling.
|
||||
+
|
||||
+mscatter
|
||||
+Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
|
||||
+Enable vectorization for scatter instruction.
|
||||
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
|
||||
index 4392709fc..bdb455d20 100644
|
||||
--- a/gcc/config/i386/x86-tune.def
|
||||
+++ b/gcc/config/i386/x86-tune.def
|
||||
@@ -488,13 +488,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
|
||||
|
||||
/* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
|
||||
elements. */
|
||||
-DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather",
|
||||
+DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
|
||||
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
|
||||
| m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
|
||||
elements. */
|
||||
-DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter",
|
||||
+DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
|
||||
~(m_ZNVER4))
|
||||
|
||||
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,129 +0,0 @@
|
||||
From 764518a35e90a3e13c469275da9c3c7002fe1982 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Fri, 8 Sep 2023 09:22:43 +0800
|
||||
Subject: [PATCH 13/32] Remove constraint modifier % for
|
||||
fcmaddcph/fmaddcph/fcmulcph since there're not commutative.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/111306
|
||||
PR target/111335
|
||||
* config/i386/sse.md (int_comm): New int_attr.
|
||||
(fma_<complexopname>_<mode><sdc_maskz_name><round_name>):
|
||||
Remove % for Complex conjugate operations since they're not
|
||||
commutative.
|
||||
(fma_<complexpairopname>_<mode>_pair): Ditto.
|
||||
(<avx512>_<complexopname>_<mode>_mask<round_name>): Ditto.
|
||||
(cmul<conj_op><mode>3): Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/pr111306.c: New test.
|
||||
|
||||
(cherry picked from commit f197392a16ffb1327f1d12ff8ff05f9295e015cb)
|
||||
---
|
||||
gcc/config/i386/sse.md | 16 ++++++++---
|
||||
gcc/testsuite/gcc.target/i386/pr111306.c | 36 ++++++++++++++++++++++++
|
||||
2 files changed, 48 insertions(+), 4 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c
|
||||
|
||||
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||
index 3af159896..f25dd5f2b 100644
|
||||
--- a/gcc/config/i386/sse.md
|
||||
+++ b/gcc/config/i386/sse.md
|
||||
@@ -6318,6 +6318,14 @@
|
||||
[(UNSPEC_COMPLEX_FMA_PAIR "fmaddc")
|
||||
(UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")])
|
||||
|
||||
+(define_int_attr int_comm
|
||||
+ [(UNSPEC_COMPLEX_FMA "")
|
||||
+ (UNSPEC_COMPLEX_FMA_PAIR "")
|
||||
+ (UNSPEC_COMPLEX_FCMA "")
|
||||
+ (UNSPEC_COMPLEX_FCMA_PAIR "")
|
||||
+ (UNSPEC_COMPLEX_FMUL "%")
|
||||
+ (UNSPEC_COMPLEX_FCMUL "")])
|
||||
+
|
||||
(define_int_attr conj_op
|
||||
[(UNSPEC_COMPLEX_FMA "")
|
||||
(UNSPEC_COMPLEX_FCMA "_conj")
|
||||
@@ -6431,7 +6439,7 @@
|
||||
(define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
|
||||
[(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
|
||||
(unspec:VF_AVX512FP16VL
|
||||
- [(match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "%v")
|
||||
+ [(match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "<int_comm>v")
|
||||
(match_operand:VF_AVX512FP16VL 2 "<round_nimm_predicate>" "<round_constraint>")
|
||||
(match_operand:VF_AVX512FP16VL 3 "<round_nimm_predicate>" "0")]
|
||||
UNSPEC_COMPLEX_F_C_MA))]
|
||||
@@ -6495,7 +6503,7 @@
|
||||
(define_insn "fma_<complexpairopname>_<mode>_pair"
|
||||
[(set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v")
|
||||
(unspec:VF1_AVX512VL
|
||||
- [(match_operand:VF1_AVX512VL 1 "vector_operand" "%v")
|
||||
+ [(match_operand:VF1_AVX512VL 1 "vector_operand" "<int_comm>v")
|
||||
(match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr")
|
||||
(match_operand:VF1_AVX512VL 3 "vector_operand" "0")]
|
||||
UNSPEC_COMPLEX_F_C_MA_PAIR))]
|
||||
@@ -6562,7 +6570,7 @@
|
||||
[(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
|
||||
(vec_merge:VF_AVX512FP16VL
|
||||
(unspec:VF_AVX512FP16VL
|
||||
- [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
|
||||
+ [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v")
|
||||
(match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")
|
||||
(match_operand:VF_AVX512FP16VL 3 "register_operand" "0")]
|
||||
UNSPEC_COMPLEX_F_C_MA)
|
||||
@@ -6586,7 +6594,7 @@
|
||||
(define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
|
||||
[(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
|
||||
(unspec:VF_AVX512FP16VL
|
||||
- [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
|
||||
+ [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "<int_comm>v")
|
||||
(match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")]
|
||||
UNSPEC_COMPLEX_F_C_MUL))]
|
||||
"TARGET_AVX512FP16 && <round_mode512bit_condition>"
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c b/gcc/testsuite/gcc.target/i386/pr111306.c
|
||||
new file mode 100644
|
||||
index 000000000..541725ebd
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr111306.c
|
||||
@@ -0,0 +1,36 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
|
||||
+/* { dg-require-effective-target avx512fp16 } */
|
||||
+
|
||||
+#define AVX512FP16
|
||||
+#include "avx512f-helper.h"
|
||||
+
|
||||
+__attribute__((optimize("O2"),noipa))
|
||||
+void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
|
||||
+ __m512h rA = _mm512_loadu_ph(a);
|
||||
+ for (int i = 0; i < n; i += 32) {
|
||||
+ __m512h rB = _mm512_loadu_ph(b + i);
|
||||
+ _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test_512 (void)
|
||||
+{
|
||||
+ int n = 32;
|
||||
+ _Float16 a[n], b[n], c[n];
|
||||
+ _Float16 exp[n];
|
||||
+ for (int i = 1; i <= n; i++) {
|
||||
+ a[i - 1] = i & 1 ? -i : i;
|
||||
+ b[i - 1] = i;
|
||||
+ }
|
||||
+
|
||||
+ func1(a, b, n, c);
|
||||
+ for (int i = 0; i < n / 32; i += 2) {
|
||||
+ if (c[i] != a[i] * b[i] + a[i+1] * b[i+1]
|
||||
+ || c[i+1] != a[i] * b[i+1] - a[i+1]*b[i])
|
||||
+ __builtin_abort ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,106 +0,0 @@
|
||||
From afd539adfe762adb57863299a11987b7e20e7987 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Wed, 5 Jul 2023 13:45:11 +0800
|
||||
Subject: [PATCH 14/32] Disparage slightly for the alternative which move
|
||||
DFmode between SSE_REGS and GENERAL_REGS.
|
||||
|
||||
For testcase
|
||||
|
||||
void __cond_swap(double* __x, double* __y) {
|
||||
bool __r = (*__x < *__y);
|
||||
auto __tmp = __r ? *__x : *__y;
|
||||
*__y = __r ? *__y : *__x;
|
||||
*__x = __tmp;
|
||||
}
|
||||
|
||||
GCC-14 with -O2 and -march=x86-64 options generates the following code:
|
||||
|
||||
__cond_swap(double*, double*):
|
||||
movsd xmm1, QWORD PTR [rdi]
|
||||
movsd xmm0, QWORD PTR [rsi]
|
||||
comisd xmm0, xmm1
|
||||
jbe .L2
|
||||
movq rax, xmm1
|
||||
movapd xmm1, xmm0
|
||||
movq xmm0, rax
|
||||
.L2:
|
||||
movsd QWORD PTR [rsi], xmm1
|
||||
movsd QWORD PTR [rdi], xmm0
|
||||
ret
|
||||
|
||||
rax is used to save and restore DFmode value. In RA both GENERAL_REGS
|
||||
and SSE_REGS cost zero since we didn't disparage the
|
||||
alternative in movdf_internal pattern, according to register
|
||||
allocation order, GENERAL_REGS is allocated. The patch add ? for
|
||||
alternative (r,v) and (v,r) just like we did for movsf/hf/bf_internal
|
||||
pattern, after that we get optimal RA.
|
||||
|
||||
__cond_swap:
|
||||
.LFB0:
|
||||
.cfi_startproc
|
||||
movsd (%rdi), %xmm1
|
||||
movsd (%rsi), %xmm0
|
||||
comisd %xmm1, %xmm0
|
||||
jbe .L2
|
||||
movapd %xmm1, %xmm2
|
||||
movapd %xmm0, %xmm1
|
||||
movapd %xmm2, %xmm0
|
||||
.L2:
|
||||
movsd %xmm1, (%rsi)
|
||||
movsd %xmm0, (%rdi)
|
||||
ret
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/110170
|
||||
* config/i386/i386.md (movdf_internal): Disparage slightly for
|
||||
2 alternatives (r,v) and (v,r) by adding constraint modifier
|
||||
'?'.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/pr110170-3.c: New test.
|
||||
|
||||
(cherry picked from commit 37a231cc7594d12ba0822077018aad751a6fb94e)
|
||||
---
|
||||
gcc/config/i386/i386.md | 4 ++--
|
||||
gcc/testsuite/gcc.target/i386/pr110170-3.c | 11 +++++++++++
|
||||
2 files changed, 13 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/pr110170-3.c
|
||||
|
||||
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
|
||||
index be07be10d..71691f598 100644
|
||||
--- a/gcc/config/i386/i386.md
|
||||
+++ b/gcc/config/i386/i386.md
|
||||
@@ -3582,9 +3582,9 @@
|
||||
;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
|
||||
(define_insn "*movdf_internal"
|
||||
[(set (match_operand:DF 0 "nonimmediate_operand"
|
||||
- "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
|
||||
+ "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
|
||||
(match_operand:DF 1 "general_operand"
|
||||
- "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
|
||||
+ "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
|
||||
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
|
||||
&& (lra_in_progress || reload_completed
|
||||
|| !CONST_DOUBLE_P (operands[1])
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr110170-3.c b/gcc/testsuite/gcc.target/i386/pr110170-3.c
|
||||
new file mode 100644
|
||||
index 000000000..70daa89e9
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr110170-3.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile { target { ! ia32 } } } */
|
||||
+/* { dg-options "-O2 -fno-if-conversion -fno-if-conversion2" } */
|
||||
+/* { dg-final { scan-assembler-not {(?n)movq.*r} } } */
|
||||
+
|
||||
+void __cond_swap(double* __x, double* __y) {
|
||||
+ _Bool __r = (*__x < *__y);
|
||||
+ double __tmp = __r ? *__x : *__y;
|
||||
+ *__y = __r ? *__y : *__x;
|
||||
+ *__x = __tmp;
|
||||
+}
|
||||
+
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,163 +0,0 @@
|
||||
From 88516507757932c1e67ce99d240596935971d2d0 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Thu, 9 Nov 2023 13:20:05 +0800
|
||||
Subject: [PATCH 15/32] Fix wrong code due to vec_merge + pcmp to blendvb
|
||||
splitter.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/112443
|
||||
* config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition
|
||||
from LT to GT since there's not in the pattern.
|
||||
(*avx2_pcmp<mode>3_5): Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.target/i386/pr112443.C: New test.
|
||||
|
||||
(cherry picked from commit 9a0cc04b9c9b02426762892b88efc5c44ba546bd)
|
||||
---
|
||||
gcc/config/i386/sse.md | 4 +-
|
||||
gcc/testsuite/g++.target/i386/pr112443.C | 108 +++++++++++++++++++++++
|
||||
2 files changed, 110 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/g++.target/i386/pr112443.C
|
||||
|
||||
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||
index f25dd5f2b..23b858ab2 100644
|
||||
--- a/gcc/config/i386/sse.md
|
||||
+++ b/gcc/config/i386/sse.md
|
||||
@@ -16358,7 +16358,7 @@
|
||||
(match_dup 4))]
|
||||
UNSPEC_BLENDV))]
|
||||
{
|
||||
- if (INTVAL (operands[5]) == 1)
|
||||
+ if (INTVAL (operands[5]) == 5)
|
||||
std::swap (operands[1], operands[2]);
|
||||
operands[3] = gen_lowpart (<MODE>mode, operands[3]);
|
||||
})
|
||||
@@ -16388,7 +16388,7 @@
|
||||
(match_dup 4))]
|
||||
UNSPEC_BLENDV))]
|
||||
{
|
||||
- if (INTVAL (operands[5]) == 1)
|
||||
+ if (INTVAL (operands[5]) == 5)
|
||||
std::swap (operands[1], operands[2]);
|
||||
})
|
||||
|
||||
diff --git a/gcc/testsuite/g++.target/i386/pr112443.C b/gcc/testsuite/g++.target/i386/pr112443.C
|
||||
new file mode 100644
|
||||
index 000000000..ebfa9b4a7
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.target/i386/pr112443.C
|
||||
@@ -0,0 +1,108 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-require-effective-target avx512bw } */
|
||||
+/* { dg-require-effective-target avx512vl } */
|
||||
+/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */
|
||||
+
|
||||
+#include <cstdint>
|
||||
+#include <x86intrin.h>
|
||||
+#include <functional>
|
||||
+#include <ostream>
|
||||
+
|
||||
+#define AVX512BW
|
||||
+#define AVX512VL
|
||||
+
|
||||
+#include "avx512f-helper.h"
|
||||
+
|
||||
+struct TensorIteratorBase{
|
||||
+ char* in;
|
||||
+ char* out;
|
||||
+
|
||||
+ void for_each(std::function<void(char*, char*, int64_t size)> loop){
|
||||
+ loop(out, in, 32);
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+class Vectorized {
|
||||
+protected:
|
||||
+ __m256i values;
|
||||
+
|
||||
+ static inline __m256i invert(const __m256i& v) {
|
||||
+ const auto ones = _mm256_set1_epi64x(-1);
|
||||
+ return _mm256_xor_si256(ones, v);
|
||||
+ }
|
||||
+public:
|
||||
+ operator __m256i() const {
|
||||
+ return values;
|
||||
+ }
|
||||
+
|
||||
+ static constexpr int size() {
|
||||
+ return 32;
|
||||
+ }
|
||||
+
|
||||
+ Vectorized() {}
|
||||
+ Vectorized(__m256i v) : values(v) {}
|
||||
+ Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); }
|
||||
+ static Vectorized blendv(const Vectorized& a, const Vectorized& b,
|
||||
+ const Vectorized& mask) {
|
||||
+ return _mm256_blendv_epi8(a, b, mask);
|
||||
+ }
|
||||
+ static Vectorized loadu(const void* ptr) {
|
||||
+ return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
|
||||
+ }
|
||||
+ void store(void* ptr) const {
|
||||
+ _mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
|
||||
+ }
|
||||
+
|
||||
+ Vectorized operator<(const Vectorized& other) const {
|
||||
+ __m256i max = _mm256_max_epu8(values, other);
|
||||
+ return invert(_mm256_cmpeq_epi8(max, values));
|
||||
+ }
|
||||
+ Vectorized operator-(const Vectorized& b) {
|
||||
+ return _mm256_sub_epi8(values, b);
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) {
|
||||
+ uint8_t buf[Vectorized::size()];
|
||||
+ vec.store(buf);
|
||||
+ stream << "vec[";
|
||||
+ for (int i = 0; i != Vectorized::size(); i++) {
|
||||
+ if (i != 0)
|
||||
+ stream << ", ";
|
||||
+ stream << buf[i]*1;
|
||||
+ }
|
||||
+ stream << "]";
|
||||
+ return stream;
|
||||
+}
|
||||
+
|
||||
+void run(TensorIteratorBase iter){
|
||||
+ Vectorized zero_vec(0);
|
||||
+ Vectorized one_vec(1);
|
||||
+
|
||||
+ iter.for_each([=](char* out, char* in, int64_t size) {
|
||||
+ for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) {
|
||||
+ auto self_vec = Vectorized::loadu(in + i);
|
||||
+ auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec);
|
||||
+ auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec);
|
||||
+ auto outv = left - right;
|
||||
+ outv.store(out + i);
|
||||
+ }
|
||||
+ });
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test_256 (){
|
||||
+ char in[32];
|
||||
+ char out[32];
|
||||
+ for(auto& x: in) x = 1;
|
||||
+ run(TensorIteratorBase{in, out});
|
||||
+ Vectorized::loadu (out);
|
||||
+ for (int i = 0; i != 32; i++)
|
||||
+ if (out[i] != 1)
|
||||
+ __builtin_abort ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test_128 ()
|
||||
+{
|
||||
+}
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,151 +0,0 @@
|
||||
From 204ffa7f503411ccac0161c951726274648b6374 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Thu, 7 Dec 2023 09:17:27 +0800
|
||||
Subject: [PATCH 16/32] Don't assume it's AVX_U128_CLEAN after call_insn whose
|
||||
abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
|
||||
|
||||
If the function desn't clobber any sse registers or only clobber
|
||||
128-bit part, then vzeroupper isn't issued before the function exit.
|
||||
the status not CLEAN but ANY after the function.
|
||||
|
||||
Also for sibling_call, it's safe to issue an vzeroupper. Also there
|
||||
could be missing vzeroupper since there's no mode_exit for
|
||||
sibling_call_p.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/112891
|
||||
* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
|
||||
AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
|
||||
align with ix86_avx_u128_mode_needed.
|
||||
(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
|
||||
sibling_call.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/i386/pr112891.c: New test.
|
||||
* gcc.target/i386/pr112891-2.c: New test.
|
||||
|
||||
(cherry picked from commit fc189a08f5b7ad5889bd4c6b320c1dd99dd5d642)
|
||||
---
|
||||
gcc/config/i386/i386.cc | 22 +++++++++++++---
|
||||
gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++
|
||||
3 files changed, 78 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c
|
||||
|
||||
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||
index e75d37023..60f3296b0 100644
|
||||
--- a/gcc/config/i386/i386.cc
|
||||
+++ b/gcc/config/i386/i386.cc
|
||||
@@ -14416,8 +14416,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
|
||||
modes wider than 256 bits. It's only safe to issue a
|
||||
vzeroupper if all SSE registers are clobbered. */
|
||||
const function_abi &abi = insn_callee_abi (insn);
|
||||
- if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
|
||||
- abi.mode_clobbers (V4DImode)))
|
||||
+ /* Should be safe to issue an vzeroupper before sibling_call_p.
|
||||
+ Also there not mode_exit for sibling_call, so there could be
|
||||
+ missing vzeroupper for that. */
|
||||
+ if (!(SIBLING_CALL_P (insn)
|
||||
+ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
|
||||
+ abi.mode_clobbers (V4DImode))))
|
||||
return AVX_U128_ANY;
|
||||
|
||||
return AVX_U128_CLEAN;
|
||||
@@ -14555,7 +14559,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
|
||||
bool avx_upper_reg_found = false;
|
||||
note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
|
||||
|
||||
- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
|
||||
+ if (avx_upper_reg_found)
|
||||
+ return AVX_U128_DIRTY;
|
||||
+
|
||||
+ /* If the function desn't clobber any sse registers or only clobber
|
||||
+ 128-bit part, Then vzeroupper isn't issued before the function exit.
|
||||
+ the status not CLEAN but ANY after the function. */
|
||||
+ const function_abi &abi = insn_callee_abi (insn);
|
||||
+ if (!(SIBLING_CALL_P (insn)
|
||||
+ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
|
||||
+ abi.mode_clobbers (V4DImode))))
|
||||
+ return AVX_U128_ANY;
|
||||
+
|
||||
+ return AVX_U128_CLEAN;
|
||||
}
|
||||
|
||||
/* Otherwise, return current mode. Remember that if insn
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
|
||||
new file mode 100644
|
||||
index 000000000..164c3985d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
|
||||
@@ -0,0 +1,30 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mavx2 -O3" } */
|
||||
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
|
||||
+
|
||||
+void
|
||||
+__attribute__((noinline))
|
||||
+bar (double* a)
|
||||
+{
|
||||
+ a[0] = 1.0;
|
||||
+ a[1] = 2.0;
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+__attribute__((noinline))
|
||||
+foo (double* __restrict a, double* b)
|
||||
+{
|
||||
+ a[0] += b[0];
|
||||
+ a[1] += b[1];
|
||||
+ a[2] += b[2];
|
||||
+ a[3] += b[3];
|
||||
+ bar (b);
|
||||
+ return a[5] + b[5];
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+foo1 (double* __restrict a, double* b)
|
||||
+{
|
||||
+ double c = foo (a, b);
|
||||
+ return __builtin_exp (c);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
|
||||
new file mode 100644
|
||||
index 000000000..dbf6c6794
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
|
||||
@@ -0,0 +1,29 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mavx2 -O3" } */
|
||||
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
|
||||
+
|
||||
+void
|
||||
+__attribute__((noinline))
|
||||
+bar (double* a)
|
||||
+{
|
||||
+ a[0] = 1.0;
|
||||
+ a[1] = 2.0;
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+__attribute__((noinline))
|
||||
+foo (double* __restrict a, double* b)
|
||||
+{
|
||||
+ a[0] += b[0];
|
||||
+ a[1] += b[1];
|
||||
+ a[2] += b[2];
|
||||
+ a[3] += b[3];
|
||||
+ bar (b);
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+foo1 (double* __restrict a, double* b)
|
||||
+{
|
||||
+ foo (a, b);
|
||||
+ return __builtin_exp (b[1]);
|
||||
+}
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,142 +0,0 @@
|
||||
From 19ee37b11702c86d7ed271e9e1d00e23cc4ab93c Mon Sep 17 00:00:00 2001
|
||||
From: Jan Hubicka <jh@suse.cz>
|
||||
Date: Fri, 29 Dec 2023 23:51:03 +0100
|
||||
Subject: [PATCH 17/32] Disable FMADD in chains for Zen4 and generic
|
||||
|
||||
this patch disables use of FMA in matrix multiplication loop for generic (for
|
||||
x86-64-v3) and zen4. I tested this on zen4 and Xenon Gold Gold 6212U.
|
||||
|
||||
For Intel this is neutral both on the matrix multiplication microbenchmark
|
||||
(attached) and spec2k17 where the difference was within noise for Core.
|
||||
|
||||
On core the micro-benchmark runs as follows:
|
||||
|
||||
With FMA:
|
||||
|
||||
578,500,241 cycles:u # 3.645 GHz
|
||||
( +- 0.12% )
|
||||
753,318,477 instructions:u # 1.30 insn per
|
||||
cycle ( +- 0.00% )
|
||||
125,417,701 branches:u # 790.227 M/sec
|
||||
( +- 0.00% )
|
||||
0.159146 +- 0.000363 seconds time elapsed ( +- 0.23% )
|
||||
|
||||
No FMA:
|
||||
|
||||
577,573,960 cycles:u # 3.514 GHz
|
||||
( +- 0.15% )
|
||||
878,318,479 instructions:u # 1.52 insn per
|
||||
cycle ( +- 0.00% )
|
||||
125,417,702 branches:u # 763.035 M/sec
|
||||
( +- 0.00% )
|
||||
0.164734 +- 0.000321 seconds time elapsed ( +- 0.19% )
|
||||
|
||||
So the cycle count is unchanged and discrete multiply+add takes same time as
|
||||
FMA.
|
||||
|
||||
While on zen:
|
||||
|
||||
With FMA:
|
||||
484875179 cycles:u # 3.599 GHz
|
||||
( +- 0.05% ) (82.11%)
|
||||
752031517 instructions:u # 1.55 insn per
|
||||
cycle
|
||||
125106525 branches:u # 928.712 M/sec
|
||||
( +- 0.03% ) (85.09%)
|
||||
128356 branch-misses:u # 0.10% of all
|
||||
branches ( +- 0.06% ) (83.58%)
|
||||
|
||||
No FMA:
|
||||
375875209 cycles:u # 3.592 GHz
|
||||
( +- 0.08% ) (80.74%)
|
||||
875725341 instructions:u # 2.33 insn per
|
||||
cycle
|
||||
124903825 branches:u # 1.194 G/sec
|
||||
( +- 0.04% ) (84.59%)
|
||||
0.105203 +- 0.000188 seconds time elapsed ( +- 0.18% )
|
||||
|
||||
The diffrerence is that Cores understand the fact that fmadd does not need
|
||||
all three parameters to start computation, while Zen cores doesn't.
|
||||
|
||||
Since this seems noticeable win on zen and not loss on Core it seems like good
|
||||
default for generic.
|
||||
|
||||
float a[SIZE][SIZE];
|
||||
float b[SIZE][SIZE];
|
||||
float c[SIZE][SIZE];
|
||||
|
||||
void init(void)
|
||||
{
|
||||
int i, j, k;
|
||||
for(i=0; i<SIZE; ++i)
|
||||
{
|
||||
for(j=0; j<SIZE; ++j)
|
||||
{
|
||||
a[i][j] = (float)i + j;
|
||||
b[i][j] = (float)i - j;
|
||||
c[i][j] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mult(void)
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
for(i=0; i<SIZE; ++i)
|
||||
{
|
||||
for(j=0; j<SIZE; ++j)
|
||||
{
|
||||
for(k=0; k<SIZE; ++k)
|
||||
{
|
||||
c[i][j] += a[i][k] * b[k][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
clock_t s, e;
|
||||
|
||||
init();
|
||||
s=clock();
|
||||
mult();
|
||||
e=clock();
|
||||
printf(" mult took %10d clocks\n", (int)(e-s));
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/i386/x86-tune.def (X86_TUNE_AVOID_128FMA_CHAINS,
|
||||
X86_TUNE_AVOID_256FMA_CHAINS): Enable for znver4 and Core.
|
||||
---
|
||||
gcc/config/i386/x86-tune.def | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
|
||||
index bdb455d20..fd095f3ec 100644
|
||||
--- a/gcc/config/i386/x86-tune.def
|
||||
+++ b/gcc/config/i386/x86-tune.def
|
||||
@@ -499,12 +499,13 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
|
||||
|
||||
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
|
||||
smaller FMA chain. */
|
||||
-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
|
||||
+DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2
|
||||
+ | m_ZNVER3 | m_ZNVER4 | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
|
||||
smaller FMA chain. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3
|
||||
- | m_ALDERLAKE | m_SAPPHIRERAPIDS)
|
||||
+ | m_ZNVER4 | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or
|
||||
smaller FMA chain. */
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
From 411d1f0bcc0d1c8018fdf5fe84ad2404929556ec Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Fri, 16 Sep 2022 13:59:01 +0800
|
||||
Subject: [PATCH 18/32] Initial Raptorlake Support
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h:
|
||||
(get_intel_cpu): Handle Raptorlake.
|
||||
* common/config/i386/i386-common.cc:
|
||||
(processor_alias_table): Add Raptorlake.
|
||||
|
||||
(cherry picked from commit 470a0659b508d684148f362c4dc0eccf5a83a23e)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 2 ++
|
||||
gcc/common/config/i386/i386-common.cc | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 316ad3cb3..13d0f4cd8 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -508,6 +508,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
case 0x97:
|
||||
case 0x9a:
|
||||
/* Alder Lake. */
|
||||
+ case 0xb7:
|
||||
+ /* Raptor Lake. */
|
||||
cpu = "alderlake";
|
||||
CHECK___builtin_cpu_is ("corei7");
|
||||
CHECK___builtin_cpu_is ("alderlake");
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index f650e255f..c1d700f89 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1939,6 +1939,8 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
|
||||
{"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
+ {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
|
||||
+ M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,49 +0,0 @@
|
||||
From 87cea29ede520f4a5af01dff7071ab1d23bd47b5 Mon Sep 17 00:00:00 2001
|
||||
From: "Hu, Lin1" <lin1.hu@intel.com>
|
||||
Date: Fri, 16 Sep 2022 11:25:13 +0800
|
||||
Subject: [PATCH 19/32] Initial Meteorlake Support
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h:
|
||||
(get_intel_cpu): Handle Meteorlake.
|
||||
* common/config/i386/i386-common.cc:
|
||||
(processor_alias_table): Add Meteorlake.
|
||||
|
||||
(cherry picked from commit fd206f0e95fb6f41b96eaaaab1dc0c30378e5e08)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 4 ++++
|
||||
gcc/common/config/i386/i386-common.cc | 2 ++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 13d0f4cd8..37af92d6b 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -510,6 +510,10 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
/* Raptor Lake. */
|
||||
+ case 0xb5:
|
||||
+ case 0xaa:
|
||||
+ case 0xac:
|
||||
+ /* Meteor Lake. */
|
||||
cpu = "alderlake";
|
||||
CHECK___builtin_cpu_is ("corei7");
|
||||
CHECK___builtin_cpu_is ("alderlake");
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index c1d700f89..cfee672fb 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1941,6 +1941,8 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
+ {"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
|
||||
+ M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,691 +0,0 @@
|
||||
From c11301c7780213ddf46a0bcdb06079af485f431c Mon Sep 17 00:00:00 2001
|
||||
From: Hongyu Wang <hongyu.wang@intel.com>
|
||||
Date: Fri, 4 Nov 2022 15:50:55 +0800
|
||||
Subject: [PATCH 20/32] Support Intel AMX-FP16 ISA
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_available_features): Detect
|
||||
amx-fp16.
|
||||
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET,
|
||||
OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros.
|
||||
(ix86_handle_option): Handle -mamx-fp16.
|
||||
* common/config/i386/i386-cpuinfo.h (enum processor_features):
|
||||
Add FEATURE_AMX_FP16.
|
||||
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
|
||||
amx-fp16.
|
||||
* config.gcc: Add amxfp16intrin.h.
|
||||
* config/i386/cpuid.h (bit_AMX_FP16): New.
|
||||
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
|
||||
__AMX_FP16__.
|
||||
* config/i386/i386-isa.def: Add DEF_PTA for AMX_FP16.
|
||||
* config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16.
|
||||
(ix86_valid_target_attribute_inner_p): Add new ATTR.
|
||||
(ix86_option_override_internal): Handle AMX-FP16.
|
||||
* config/i386/i386.opt: Add -mamx-fp16.
|
||||
* config/i386/immintrin.h: Include amxfp16intrin.h.
|
||||
* doc/extend.texi: Document -mamx-fp16.
|
||||
* doc/invoke.texi: Document amx-fp16.
|
||||
* doc/sourcebuild.texi: Document amx_fp16.
|
||||
* config/i386/amxfp16intrin.h: New file.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.dg/other/i386-2.C: Add -mamx-fp16.
|
||||
* g++.dg/other/i386-3.C: Ditto.
|
||||
* gcc.target/i386/sse-12.c: Ditto.
|
||||
* gcc.target/i386/sse-13.c: Ditto.
|
||||
* gcc.target/i386/sse-14.c: Ditto.
|
||||
* gcc.target/i386/sse-22.c: Ditto.
|
||||
* gcc.target/i386/sse-23.c: Ditto.
|
||||
* lib/target-supports.exp: (check_effective_target_amx_fp16):
|
||||
New proc.
|
||||
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
|
||||
* gcc.target/i386/amx-check.h: Add AMX_FP16.
|
||||
* gcc.target/i386/amx-helper.h: New file to support amx-fp16.
|
||||
* gcc.target/i386/amxfp16-asmatt-1.c: New test.
|
||||
* gcc.target/i386/amxfp16-asmintel-1.c: Ditto.
|
||||
* gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto.
|
||||
|
||||
Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
|
||||
|
||||
(cherry picked from commit 2b4a03962a0fe18cadc944d90f1fb85a40004226)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 5 ++
|
||||
gcc/common/config/i386/i386-common.cc | 15 +++++
|
||||
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||||
gcc/common/config/i386/i386-isas.h | 1 +
|
||||
gcc/config.gcc | 3 +-
|
||||
gcc/config/i386/amxfp16intrin.h | 46 ++++++++++++++
|
||||
gcc/config/i386/cpuid.h | 1 +
|
||||
gcc/config/i386/i386-c.cc | 2 +
|
||||
gcc/config/i386/i386-isa.def | 1 +
|
||||
gcc/config/i386/i386-options.cc | 4 +-
|
||||
gcc/config/i386/i386.opt | 4 ++
|
||||
gcc/config/i386/immintrin.h | 2 +
|
||||
gcc/doc/extend.texi | 5 ++
|
||||
gcc/doc/invoke.texi | 9 ++-
|
||||
gcc/doc/sourcebuild.texi | 3 +
|
||||
gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
|
||||
gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/amx-check.h | 3 +
|
||||
gcc/testsuite/gcc.target/i386/amx-helper.h | 61 +++++++++++++++++++
|
||||
.../gcc.target/i386/amxfp16-asmatt-1.c | 13 ++++
|
||||
.../gcc.target/i386/amxfp16-asmintel-1.c | 10 +++
|
||||
.../gcc.target/i386/amxfp16-dpfp16ps-2.c | 57 +++++++++++++++++
|
||||
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
|
||||
gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
|
||||
gcc/testsuite/lib/target-supports.exp | 11 ++++
|
||||
29 files changed, 262 insertions(+), 13 deletions(-)
|
||||
create mode 100644 gcc/config/i386/amxfp16intrin.h
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 37af92d6b..5951a30aa 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -783,6 +783,11 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
set_feature (FEATURE_AVX512BF16);
|
||||
}
|
||||
}
|
||||
+ if (amx_usable)
|
||||
+ {
|
||||
+ if (eax & bit_AMX_FP16)
|
||||
+ set_feature (FEATURE_AMX_FP16);
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index cfee672fb..922db33ee 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -107,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
|
||||
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
||||
+#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||||
as -msse4.2. */
|
||||
@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_KL_UNSET \
|
||||
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
|
||||
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
||||
+#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||||
as -mno-sse4.1. */
|
||||
@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
}
|
||||
return true;
|
||||
|
||||
+ case OPT_mamx_fp16:
|
||||
+ if (value)
|
||||
+ {
|
||||
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET;
|
||||
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET;
|
||||
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET;
|
||||
+ }
|
||||
+ return true;
|
||||
+
|
||||
case OPT_mfma:
|
||||
if (value)
|
||||
{
|
||||
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
index 82996ebb3..8f22897de 100644
|
||||
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
@@ -240,6 +240,7 @@ enum processor_features
|
||||
FEATURE_X86_64_V2,
|
||||
FEATURE_X86_64_V3,
|
||||
FEATURE_X86_64_V4,
|
||||
+ FEATURE_AMX_FP16,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
|
||||
index 2d0646a68..95bab6da2 100644
|
||||
--- a/gcc/common/config/i386/i386-isas.h
|
||||
+++ b/gcc/common/config/i386/i386-isas.h
|
||||
@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
|
||||
+ ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
|
||||
ISA_NAMES_TABLE_END
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 4a0ae9328..e2b4a23dc 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -423,7 +423,8 @@ i[34567]86-*-* | x86_64-*-*)
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||||
- mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
|
||||
+ mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
|
||||
+ amxfp16intrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h
|
||||
new file mode 100644
|
||||
index 000000000..6a114741a
|
||||
--- /dev/null
|
||||
+++ b/gcc/config/i386/amxfp16intrin.h
|
||||
@@ -0,0 +1,46 @@
|
||||
+/* Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of GCC.
|
||||
+
|
||||
+ GCC is free software; you can redistribute it and/or modify
|
||||
+ it under the terms of the GNU General Public License as published by
|
||||
+ the Free Software Foundation; either version 3, or (at your option)
|
||||
+ any later version.
|
||||
+
|
||||
+ GCC is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+ GNU General Public License for more details.
|
||||
+
|
||||
+ Under Section 7 of GPL version 3, you are granted additional
|
||||
+ permissions described in the GCC Runtime Library Exception, version
|
||||
+ 3.1, as published by the Free Software Foundation.
|
||||
+
|
||||
+ You should have received a copy of the GNU General Public License and
|
||||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#if !defined _IMMINTRIN_H_INCLUDED
|
||||
+#error "Never use <amxfp16intrin.h> directly; include <immintrin.h> instead."
|
||||
+#endif
|
||||
+
|
||||
+#ifndef _AMXFP16INTRIN_H_INCLUDED
|
||||
+#define _AMXFP16INTRIN_H_INCLUDED
|
||||
+
|
||||
+#if defined(__x86_64__)
|
||||
+#define _tile_dpfp16ps_internal(dst,src1,src2) \
|
||||
+ __asm__ volatile \
|
||||
+ ("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
+
|
||||
+#define _tile_dpfp16ps(dst,src1,src2) \
|
||||
+ _tile_dpfp16ps_internal (dst,src1,src2)
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#ifdef __DISABLE_AMX_FP16__
|
||||
+#undef __DISABLE_AMX_FP16__
|
||||
+#pragma GCC pop_options
|
||||
+#endif /* __DISABLE_AMX_FP16__ */
|
||||
+
|
||||
+#endif /* _AMXFP16INTRIN_H_INCLUDED */
|
||||
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
|
||||
index 8b3dc2b1d..d6cd8d1bf 100644
|
||||
--- a/gcc/config/i386/cpuid.h
|
||||
+++ b/gcc/config/i386/cpuid.h
|
||||
@@ -27,6 +27,7 @@
|
||||
/* %eax */
|
||||
#define bit_AVXVNNI (1 << 4)
|
||||
#define bit_AVX512BF16 (1 << 5)
|
||||
+#define bit_AMX_FP16 (1 << 21)
|
||||
#define bit_HRESET (1 << 22)
|
||||
|
||||
/* %ecx */
|
||||
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||||
index 3fec4c7e2..4269f29e6 100644
|
||||
--- a/gcc/config/i386/i386-c.cc
|
||||
+++ b/gcc/config/i386/i386-c.cc
|
||||
@@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__WIDEKL__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI)
|
||||
def_or_undef (parse_in, "__AVXVNNI__");
|
||||
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16)
|
||||
+ def_or_undef (parse_in, "__AMX_FP16__");
|
||||
if (TARGET_IAMCU)
|
||||
{
|
||||
def_or_undef (parse_in, "__iamcu");
|
||||
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
|
||||
index 83659d0be..c7305c01b 100644
|
||||
--- a/gcc/config/i386/i386-isa.def
|
||||
+++ b/gcc/config/i386/i386-isa.def
|
||||
@@ -109,3 +109,4 @@ DEF_PTA(KL)
|
||||
DEF_PTA(WIDEKL)
|
||||
DEF_PTA(AVXVNNI)
|
||||
DEF_PTA(AVX512FP16)
|
||||
+DEF_PTA(AMX_FP16)
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 3df1f0c41..3edb7094e 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -230,7 +230,8 @@ static struct ix86_target_opts isa2_opts[] =
|
||||
{ "-mkl", OPTION_MASK_ISA2_KL },
|
||||
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
|
||||
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
||||
- { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }
|
||||
+ { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
|
||||
+ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
@@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||||
IX86_ATTR_ISA ("hreset", OPT_mhreset),
|
||||
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
|
||||
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
||||
+ IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
|
||||
|
||||
/* enum options */
|
||||
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||||
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||||
index b154110d8..52c6f02ee 100644
|
||||
--- a/gcc/config/i386/i386.opt
|
||||
+++ b/gcc/config/i386/i386.opt
|
||||
@@ -1226,3 +1226,7 @@ Enable conservative small loop unrolling.
|
||||
mscatter
|
||||
Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
|
||||
Enable vectorization for scatter instruction.
|
||||
+
|
||||
+mamx-fp16
|
||||
+Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save
|
||||
+Support AMX-FP16 built-in functions and code generation.
|
||||
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
|
||||
index 6afd78c2b..0447ca4b2 100644
|
||||
--- a/gcc/config/i386/immintrin.h
|
||||
+++ b/gcc/config/i386/immintrin.h
|
||||
@@ -128,4 +128,6 @@
|
||||
|
||||
#include <keylockerintrin.h>
|
||||
|
||||
+#include <amxfp16intrin.h>
|
||||
+
|
||||
#endif /* _IMMINTRIN_H_INCLUDED */
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index 33a776a79..4ba9d34cd 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -7038,6 +7038,11 @@ Enable/disable the generation of the WIDEKL instructions.
|
||||
@cindex @code{target("avxvnni")} function attribute, x86
|
||||
Enable/disable the generation of the AVXVNNI instructions.
|
||||
|
||||
+@item amx-fp16
|
||||
+@itemx no-amx-fp16
|
||||
+@cindex @code{target("amx-fp16")} function attribute, x86
|
||||
+Enable/disable the generation of the AMX-FP16 instructions.
|
||||
+
|
||||
@item cld
|
||||
@itemx no-cld
|
||||
@cindex @code{target("cld")} function attribute, x86
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 3a48655e5..d25f13217 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
||||
--mavx512fp16 @gol
|
||||
+-mavx512fp16 -mamx-fp16 @gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mkl -mwidekl @gol
|
||||
@@ -32442,6 +32442,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@need 200
|
||||
@itemx -mwidekl
|
||||
@opindex mwidekl
|
||||
+@need 200
|
||||
+@itemx -mamx-fp16
|
||||
+@opindex mamx-fp16
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
@@ -32451,8 +32454,8 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
|
||||
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
-UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16
|
||||
-or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
+UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
|
||||
+AMX-FP16 or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
@option{-mno-} option to disable use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
|
||||
index 71c04841d..b64b62dee 100644
|
||||
--- a/gcc/doc/sourcebuild.texi
|
||||
+++ b/gcc/doc/sourcebuild.texi
|
||||
@@ -2472,6 +2472,9 @@ Target supports the execution of @code{amx-int8} instructions.
|
||||
@item amx_bf16
|
||||
Target supports the execution of @code{amx-bf16} instructions.
|
||||
|
||||
+@item amx_fp16
|
||||
+Target supports the execution of @code{amx-fp16} instructions.
|
||||
+
|
||||
@item cell_hw
|
||||
Test system can execute AltiVec and Cell PPU instructions.
|
||||
|
||||
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
index fba3d1ac6..57a6357aa 100644
|
||||
--- a/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
index 5cc0fa834..1947547d6 100644
|
||||
--- a/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
|
||||
index 6fff5ff46..27dd37bf9 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
|
||||
@@ -213,6 +213,9 @@ main ()
|
||||
#ifdef AMX_BF16
|
||||
&& __builtin_cpu_supports ("amx-bf16")
|
||||
#endif
|
||||
+#ifdef AMX_FP16
|
||||
+ && __builtin_cpu_supports ("amx-fp16")
|
||||
+#endif
|
||||
#ifdef __linux__
|
||||
&& request_perm_xtile_data ()
|
||||
#endif
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||||
new file mode 100644
|
||||
index 000000000..fe24d7067
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||||
@@ -0,0 +1,61 @@
|
||||
+#ifndef AMX_HELPER_H_INCLUDED
|
||||
+#define AMX_HELPER_H_INCLUDED
|
||||
+#if defined(AMX_FP16)
|
||||
+#include <immintrin.h>
|
||||
+#include <xmmintrin.h>
|
||||
+#endif
|
||||
+#include "amx-check.h"
|
||||
+
|
||||
+typedef union
|
||||
+{
|
||||
+ _Float16 f16;
|
||||
+ uint16_t u;
|
||||
+} union16f_uw;
|
||||
+
|
||||
+#if defined(AMX_FP16)
|
||||
+/* Transformation functions between fp16/float */
|
||||
+static uint16_t make_f32_fp16 (float f)
|
||||
+{
|
||||
+ union16f_uw tmp;
|
||||
+ __m128 b = _mm_set_ss (f);
|
||||
+ __m128h a;
|
||||
+ tmp.f16 = _mm_cvtsh_h (_mm_cvtss_sh (a, b));
|
||||
+ return tmp.u;
|
||||
+}
|
||||
+
|
||||
+static float make_fp16_f32 (uint16_t fp)
|
||||
+{
|
||||
+ union16f_uw tmp;
|
||||
+ tmp.u = fp;
|
||||
+ __m128h b = _mm_set_sh (tmp.f16);
|
||||
+ __m128 a;
|
||||
+ return _mm_cvtss_f32 (_mm_cvtsh_ss (a, b));
|
||||
+}
|
||||
+
|
||||
+/* Init tile buffer with fp16 pairs */
|
||||
+void init_fp16_max_tile_buffer (uint8_t* buf)
|
||||
+{
|
||||
+ int i, j;
|
||||
+ uint16_t* ptr = (uint16_t *) buf;
|
||||
+
|
||||
+ for (i = 0; i < 16; i++)
|
||||
+ for (j = 0; j < 32; j++)
|
||||
+ {
|
||||
+ float f = 2.5f * i + 1.25f * j;
|
||||
+ ptr[i * 32 + j] = make_f32_fp16 (f);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Init tile fp16 pair buffer with zero */
|
||||
+void init_fp16_max_tile_zero_buffer (uint8_t* buf)
|
||||
+{
|
||||
+ int i, j;
|
||||
+ uint16_t* ptr = (uint16_t *) buf;
|
||||
+
|
||||
+ for (i = 0; i < 16; i++)
|
||||
+ for (j = 0; j < 32; j++)
|
||||
+ ptr[i * 32 + j] = make_f32_fp16 (0.0f);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
|
||||
new file mode 100644
|
||||
index 000000000..09ae6d408
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile { target { ! ia32 } } } */
|
||||
+/* { dg-options "-O2 -mamx-fp16" } */
|
||||
+/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
+#include <immintrin.h>
|
||||
+
|
||||
+#define TMM1 1
|
||||
+#define TMM2 2
|
||||
+#define TMM3 3
|
||||
+
|
||||
+void TEST ()
|
||||
+{
|
||||
+ _tile_dpfp16ps (TMM1, TMM2, TMM3);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
|
||||
new file mode 100644
|
||||
index 000000000..a8dff945f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
|
||||
@@ -0,0 +1,10 @@
|
||||
+/* { dg-do compile { target { ! ia32 } } } */
|
||||
+/* { dg-require-effective-target masm_intel } */
|
||||
+/* { dg-options "-O2 -mamx-fp16 -masm=intel" } */
|
||||
+/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
+#include <immintrin.h>
|
||||
+
|
||||
+void TEST ()
|
||||
+{
|
||||
+ _tile_dpfp16ps (1, 2, 3);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
|
||||
new file mode 100644
|
||||
index 000000000..2d359a689
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
|
||||
@@ -0,0 +1,57 @@
|
||||
+/* { dg-do run { target { ! ia32 } } } */
|
||||
+/* { dg-require-effective-target amx_tile } */
|
||||
+/* { dg-require-effective-target amx_fp16 } */
|
||||
+/* { dg-require-effective-target avx512fp16 } */
|
||||
+/* { dg-options "-O2 -mamx-tile -mamx-fp16 -mavx512fp16" } */
|
||||
+#define AMX_FP16
|
||||
+#define DO_TEST test_amx_fp16_dpfp16ps
|
||||
+void test_amx_fp16_dpfp16ps ();
|
||||
+#include "amx-helper.h"
|
||||
+
|
||||
+void calc_matrix_dpfp16ps (__tile *dst, __tile *src1, __tile *src2)
|
||||
+{
|
||||
+ uint16_t *src1_buf = (uint16_t *)src1->buf;
|
||||
+ uint16_t *src2_buf = (uint16_t *)src2->buf;
|
||||
+ float *dst_buf = (float *)dst->buf;
|
||||
+
|
||||
+ int M = src1->rows;
|
||||
+ int N = src1->colsb / 4;
|
||||
+ int K = src2->colsb / 4;
|
||||
+ int i, j, k, t;
|
||||
+
|
||||
+ for (i = 0; i < M; i++)
|
||||
+ for (j = 0; j < N; j++)
|
||||
+ for (k = 0; k < K; k++)
|
||||
+ for (t = 0; t < 2; t+=2)
|
||||
+ {
|
||||
+ dst_buf[i * K + k] +=
|
||||
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t]) *
|
||||
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t])) +
|
||||
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t + 1]) *
|
||||
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t + 1]));
|
||||
+ }
|
||||
+
|
||||
+}
|
||||
+
|
||||
+void test_amx_fp16_dpfp16ps ()
|
||||
+{
|
||||
+ __tilecfg_u cfg;
|
||||
+ __tile dst, dst_ref, src1, src2;
|
||||
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
|
||||
+
|
||||
+ init_fp16_max_tile_buffer (tmp_dst_buf);
|
||||
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
|
||||
+
|
||||
+ init_tile_config (&cfg);
|
||||
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
|
||||
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
|
||||
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
|
||||
+
|
||||
+ calc_matrix_dpfp16ps (&dst, &src1, &src2);
|
||||
+
|
||||
+ _tile_dpfp16ps (1, 2, 3);
|
||||
+ _tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
+
|
||||
+ if (!check_float_tile_register (&dst_ref, &dst))
|
||||
+ abort ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
index f34e7a977..b00cfff03 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
@@ -80,6 +80,7 @@ extern void test_keylocker (void) __attribute__((__target__("kl")));
|
||||
extern void test_widekl (void) __attribute__((__target__("widekl")));
|
||||
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
||||
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
||||
+extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
@@ -161,6 +162,7 @@ extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
|
||||
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
|
||||
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
||||
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
||||
+extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
|
||||
index 375d4d1b4..9ab4a7e0c 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
|
||||
@@ -3,7 +3,7 @@
|
||||
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
|
||||
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
index e285c307d..a1e453a98 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
|
||||
index f41493b93..eaa1a8d81 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
||||
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
|
||||
index 31492ef36..19afe639d 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
|
||||
@@ -103,7 +103,7 @@
|
||||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
||||
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
||||
|
||||
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
||||
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
index b398fd144..151201d97 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
@@ -843,6 +843,6 @@
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
||||
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||||
|
||||
#include <x86intrin.h>
|
||||
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||
index c858bd93b..0d83c780c 100644
|
||||
--- a/gcc/testsuite/lib/target-supports.exp
|
||||
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||
@@ -9972,6 +9972,17 @@ proc check_effective_target_amx_bf16 { } {
|
||||
} "-mamx-bf16" ]
|
||||
}
|
||||
|
||||
+# Return 1 if amx-fp16 instructions can be compiled.
|
||||
+proc check_effective_target_amx_fp16 { } {
|
||||
+ return [check_no_compiler_messages amx_fp16 object {
|
||||
+ void
|
||||
+ foo ()
|
||||
+ {
|
||||
+ __asm__ volatile ("tdpfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
|
||||
+ }
|
||||
+ } "-mamx-fp16" ]
|
||||
+}
|
||||
+
|
||||
# Return 1 if vpclmulqdq instructions can be compiled.
|
||||
proc check_effective_target_vpclmulqdq { } {
|
||||
return [check_no_compiler_messages vpclmulqdq object {
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,902 +0,0 @@
|
||||
From 42a38c8abaa28f67e26b9af3f434fe0107894e7d Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Fri, 4 Nov 2022 15:01:05 +0800
|
||||
Subject: [PATCH 21/32] Support Intel prefetchit0/t1
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_available_features):
|
||||
Detect PREFETCHI.
|
||||
* common/config/i386/i386-common.cc
|
||||
(OPTION_MASK_ISA2_PREFETCHI_SET,
|
||||
OPTION_MASK_ISA2_PREFETCHI_UNSET): New.
|
||||
(ix86_handle_option): Handle -mprefetchi.
|
||||
* common/config/i386/i386-cpuinfo.h
|
||||
(enum processor_features): Add FEATURE_PREFETCHI.
|
||||
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY
|
||||
for prefetchi.
|
||||
* config.gcc: Add prfchiintrin.h.
|
||||
* config/i386/cpuid.h (bit_PREFETCHI): New.
|
||||
* config/i386/i386-builtin-types.def:
|
||||
Add DEF_FUNCTION_TYPE (VOID, PCVOID, INT)
|
||||
and DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT).
|
||||
* config/i386/i386-builtin.def (BDESC): Add new builtins.
|
||||
* config/i386/i386-c.cc (ix86_target_macros_internal):
|
||||
Define __PREFETCHI__.
|
||||
* config/i386/i386-expand.cc: Handle new builtins.
|
||||
* config/i386/i386-isa.def (PREFETCHI):
|
||||
Add DEF_PTA(PREFETCHI).
|
||||
* config/i386/i386-options.cc
|
||||
(ix86_valid_target_attribute_inner_p): Handle prefetchi.
|
||||
* config/i386/i386.md (prefetchi): New define_insn.
|
||||
* config/i386/i386.opt: Add option -mprefetchi.
|
||||
* config/i386/predicates.md (local_func_symbolic_operand):
|
||||
New predicates.
|
||||
* config/i386/x86gprintrin.h: Include prfchiintrin.h.
|
||||
* config/i386/xmmintrin.h (enum _mm_hint): New enum for
|
||||
prefetchi.
|
||||
(_mm_prefetch): Handle the highest bit of enum.
|
||||
* doc/extend.texi: Document prefetchi.
|
||||
* doc/invoke.texi: Document -mprefetchi.
|
||||
* doc/sourcebuild.texi: Document target prefetchi.
|
||||
* config/i386/prfchiintrin.h: New file.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.dg/other/i386-2.C: Add -mprefetchi.
|
||||
* g++.dg/other/i386-3.C: Ditto.
|
||||
* gcc.target/i386/avx-1.c: Ditto.
|
||||
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
|
||||
* gcc.target/i386/sse-13.c: Add -mprefetchi.
|
||||
* gcc.target/i386/sse-23.c: Ditto.
|
||||
* gcc.target/i386/x86gprintrin-1.c: Ditto.
|
||||
* gcc.target/i386/x86gprintrin-2.c: Ditto.
|
||||
* gcc.target/i386/x86gprintrin-3.c: Ditto.
|
||||
* gcc.target/i386/x86gprintrin-4.c: Ditto.
|
||||
* gcc.target/i386/x86gprintrin-5.c: Ditto.
|
||||
* gcc.target/i386/prefetchi-1.c: New test.
|
||||
* gcc.target/i386/prefetchi-2.c: Ditto.
|
||||
* gcc.target/i386/prefetchi-3.c: Ditto.
|
||||
* gcc.target/i386/prefetchi-4.c: Ditto.
|
||||
|
||||
Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 2 +
|
||||
gcc/common/config/i386/i386-common.cc | 15 ++++
|
||||
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||||
gcc/common/config/i386/i386-isas.h | 1 +
|
||||
gcc/config.gcc | 2 +-
|
||||
gcc/config/i386/cpuid.h | 1 +
|
||||
gcc/config/i386/i386-builtin-types.def | 4 +
|
||||
gcc/config/i386/i386-builtin.def | 4 +
|
||||
gcc/config/i386/i386-c.cc | 2 +
|
||||
gcc/config/i386/i386-expand.cc | 77 +++++++++++++++++++
|
||||
gcc/config/i386/i386-isa.def | 1 +
|
||||
gcc/config/i386/i386-options.cc | 4 +-
|
||||
gcc/config/i386/i386.md | 23 ++++++
|
||||
gcc/config/i386/i386.opt | 4 +
|
||||
gcc/config/i386/predicates.md | 15 ++++
|
||||
gcc/config/i386/prfchiintrin.h | 49 ++++++++++++
|
||||
gcc/config/i386/x86gprintrin.h | 2 +
|
||||
gcc/config/i386/xmmintrin.h | 7 +-
|
||||
gcc/doc/extend.texi | 5 ++
|
||||
gcc/doc/invoke.texi | 7 +-
|
||||
gcc/doc/sourcebuild.texi | 3 +
|
||||
gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
|
||||
gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/avx-1.c | 4 +-
|
||||
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
|
||||
gcc/testsuite/gcc.target/i386/prefetchi-1.c | 40 ++++++++++
|
||||
gcc/testsuite/gcc.target/i386/prefetchi-2.c | 26 +++++++
|
||||
gcc/testsuite/gcc.target/i386/prefetchi-3.c | 20 +++++
|
||||
gcc/testsuite/gcc.target/i386/prefetchi-4.c | 19 +++++
|
||||
gcc/testsuite/gcc.target/i386/sse-13.c | 4 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-23.c | 4 +-
|
||||
.../gcc.target/i386/x86gprintrin-1.c | 2 +-
|
||||
.../gcc.target/i386/x86gprintrin-2.c | 2 +-
|
||||
.../gcc.target/i386/x86gprintrin-3.c | 2 +-
|
||||
.../gcc.target/i386/x86gprintrin-4.c | 2 +-
|
||||
.../gcc.target/i386/x86gprintrin-5.c | 2 +-
|
||||
36 files changed, 343 insertions(+), 19 deletions(-)
|
||||
create mode 100644 gcc/config/i386/prfchiintrin.h
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-3.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-4.c
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 5951a30aa..f17e88144 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -772,6 +772,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
__cpuid_count (7, 1, eax, ebx, ecx, edx);
|
||||
if (eax & bit_HRESET)
|
||||
set_feature (FEATURE_HRESET);
|
||||
+ if (edx & bit_PREFETCHI)
|
||||
+ set_feature (FEATURE_PREFETCHI);
|
||||
if (avx_usable)
|
||||
{
|
||||
if (eax & bit_AVXVNNI)
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index 922db33ee..c8cf532cf 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
||||
#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
|
||||
+#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||||
as -msse4.2. */
|
||||
@@ -277,6 +278,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
|
||||
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
||||
#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
|
||||
+#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||||
as -mno-sse4.1. */
|
||||
@@ -1140,6 +1142,19 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
}
|
||||
return true;
|
||||
|
||||
+ case OPT_mprefetchi:
|
||||
+ if (value)
|
||||
+ {
|
||||
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_PREFETCHI_SET;
|
||||
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_SET;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_PREFETCHI_UNSET;
|
||||
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_PREFETCHI_UNSET;
|
||||
+ }
|
||||
+ return true;
|
||||
+
|
||||
case OPT_mfma:
|
||||
if (value)
|
||||
{
|
||||
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
index 8f22897de..95b078acf 100644
|
||||
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
@@ -241,6 +241,7 @@ enum processor_features
|
||||
FEATURE_X86_64_V3,
|
||||
FEATURE_X86_64_V4,
|
||||
FEATURE_AMX_FP16,
|
||||
+ FEATURE_PREFETCHI,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
|
||||
index 95bab6da2..6caf06249 100644
|
||||
--- a/gcc/common/config/i386/i386-isas.h
|
||||
+++ b/gcc/common/config/i386/i386-isas.h
|
||||
@@ -176,4 +176,5 @@ ISA_NAMES_TABLE_START
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
|
||||
+ ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
|
||||
ISA_NAMES_TABLE_END
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index e2b4a23dc..81012c651 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -424,7 +424,7 @@ i[34567]86-*-* | x86_64-*-*)
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||||
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
|
||||
- amxfp16intrin.h"
|
||||
+ amxfp16intrin.h prfchiintrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
|
||||
index d6cd8d1bf..21100149a 100644
|
||||
--- a/gcc/config/i386/cpuid.h
|
||||
+++ b/gcc/config/i386/cpuid.h
|
||||
@@ -50,6 +50,7 @@
|
||||
|
||||
/* %edx */
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
+#define bit_PREFETCHI (1 << 14)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
|
||||
index e33f06ab3..ff3b0af84 100644
|
||||
--- a/gcc/config/i386/i386-builtin-types.def
|
||||
+++ b/gcc/config/i386/i386-builtin-types.def
|
||||
@@ -1387,3 +1387,7 @@ DEF_FUNCTION_TYPE (V32HF, V32HF)
|
||||
DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND)
|
||||
DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND)
|
||||
DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND)
|
||||
+
|
||||
+# PREFETCHI builtins
|
||||
+DEF_FUNCTION_TYPE (VOID, PCVOID, INT)
|
||||
+DEF_FUNCTION_TYPE (VOID, PCVOID, INT, INT, INT)
|
||||
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
|
||||
index 2b1d6c733..d3ab21eea 100644
|
||||
--- a/gcc/config/i386/i386-builtin.def
|
||||
+++ b/gcc/config/i386/i386-builtin.def
|
||||
@@ -469,6 +469,10 @@ BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesdecwide2
|
||||
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide128kl_u8", IX86_BUILTIN_AESENCWIDE128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide256kl_u8", IX86_BUILTIN_AESENCWIDE256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
|
||||
|
||||
+/* PREFETCHI */
|
||||
+BDESC (0, 0, CODE_FOR_prefetchi, "__builtin_ia32_prefetchi", IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT)
|
||||
+BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_prefetch", IX86_BUILTIN_PREFETCH, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT_INT_INT)
|
||||
+
|
||||
BDESC_END (SPECIAL_ARGS, PURE_ARGS)
|
||||
|
||||
/* AVX */
|
||||
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||||
index 4269f29e6..00880bd17 100644
|
||||
--- a/gcc/config/i386/i386-c.cc
|
||||
+++ b/gcc/config/i386/i386-c.cc
|
||||
@@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__AVXVNNI__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16)
|
||||
def_or_undef (parse_in, "__AMX_FP16__");
|
||||
+ if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI)
|
||||
+ def_or_undef (parse_in, "__PREFETCHI__");
|
||||
if (TARGET_IAMCU)
|
||||
{
|
||||
def_or_undef (parse_in, "__iamcu");
|
||||
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
|
||||
index 77dda5dd4..bc2e61980 100644
|
||||
--- a/gcc/config/i386/i386-expand.cc
|
||||
+++ b/gcc/config/i386/i386-expand.cc
|
||||
@@ -12850,6 +12850,83 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
|
||||
return target;
|
||||
}
|
||||
|
||||
+ case IX86_BUILTIN_PREFETCH:
|
||||
+ {
|
||||
+ arg0 = CALL_EXPR_ARG (exp, 0); // const void *
|
||||
+ arg1 = CALL_EXPR_ARG (exp, 1); // const int
|
||||
+ arg2 = CALL_EXPR_ARG (exp, 2); // const int
|
||||
+ arg3 = CALL_EXPR_ARG (exp, 3); // const int
|
||||
+
|
||||
+ op0 = expand_normal (arg0);
|
||||
+ op1 = expand_normal (arg1);
|
||||
+ op2 = expand_normal (arg2);
|
||||
+ op3 = expand_normal (arg3);
|
||||
+
|
||||
+ if (!CONST_INT_P (op1) || !CONST_INT_P (op2) || !CONST_INT_P (op3))
|
||||
+ {
|
||||
+ error ("second, third and fourth argument must be a const");
|
||||
+ return const0_rtx;
|
||||
+ }
|
||||
+
|
||||
+ if (INTVAL (op3) == 1)
|
||||
+ {
|
||||
+ if (TARGET_64BIT
|
||||
+ && local_func_symbolic_operand (op0, GET_MODE (op0)))
|
||||
+ emit_insn (gen_prefetchi (op0, op2));
|
||||
+ else
|
||||
+ {
|
||||
+ warning (0, "instruction prefetch applies when in 64-bit mode"
|
||||
+ " with RIP-relative addressing and"
|
||||
+ " option %<-mprefetchi%>;"
|
||||
+ " they stay NOPs otherwise");
|
||||
+ emit_insn (gen_nop ());
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (!address_operand (op0, VOIDmode))
|
||||
+ {
|
||||
+ op0 = convert_memory_address (Pmode, op0);
|
||||
+ op0 = copy_addr_to_reg (op0);
|
||||
+ }
|
||||
+ emit_insn (gen_prefetch (op0, op1, op2));
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ case IX86_BUILTIN_PREFETCHI:
|
||||
+ {
|
||||
+ arg0 = CALL_EXPR_ARG (exp, 0); // const void *
|
||||
+ arg1 = CALL_EXPR_ARG (exp, 1); // const int
|
||||
+
|
||||
+ op0 = expand_normal (arg0);
|
||||
+ op1 = expand_normal (arg1);
|
||||
+
|
||||
+ if (!CONST_INT_P (op1))
|
||||
+ {
|
||||
+ error ("second argument must be a const");
|
||||
+ return const0_rtx;
|
||||
+ }
|
||||
+
|
||||
+ /* GOT/PLT_PIC should not be available for instruction prefetch.
|
||||
+ It must be real instruction address. */
|
||||
+ if (TARGET_64BIT
|
||||
+ && local_func_symbolic_operand (op0, GET_MODE (op0)))
|
||||
+ emit_insn (gen_prefetchi (op0, op1));
|
||||
+ else
|
||||
+ {
|
||||
+ /* Ignore the hint. */
|
||||
+ warning (0, "instruction prefetch applies when in 64-bit mode"
|
||||
+ " with RIP-relative addressing and"
|
||||
+ " option %<-mprefetchi%>;"
|
||||
+ " they stay NOPs otherwise");
|
||||
+ emit_insn (gen_nop ());
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
case IX86_BUILTIN_VEC_INIT_V2SI:
|
||||
case IX86_BUILTIN_VEC_INIT_V4HI:
|
||||
case IX86_BUILTIN_VEC_INIT_V8QI:
|
||||
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
|
||||
index c7305c01b..744a7df85 100644
|
||||
--- a/gcc/config/i386/i386-isa.def
|
||||
+++ b/gcc/config/i386/i386-isa.def
|
||||
@@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
|
||||
DEF_PTA(AVXVNNI)
|
||||
DEF_PTA(AVX512FP16)
|
||||
DEF_PTA(AMX_FP16)
|
||||
+DEF_PTA(PREFETCHI)
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 3edb7094e..724375f02 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -231,7 +231,8 @@ static struct ix86_target_opts isa2_opts[] =
|
||||
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
|
||||
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
||||
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
|
||||
- { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }
|
||||
+ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 },
|
||||
+ { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
@@ -1076,6 +1077,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||||
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
|
||||
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
||||
IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
|
||||
+ IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi),
|
||||
|
||||
/* enum options */
|
||||
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||||
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
|
||||
index 71691f598..f08c2cfb1 100644
|
||||
--- a/gcc/config/i386/i386.md
|
||||
+++ b/gcc/config/i386/i386.md
|
||||
@@ -329,6 +329,9 @@
|
||||
|
||||
;; For HRESET support
|
||||
UNSPECV_HRESET
|
||||
+
|
||||
+ ;; For PREFETCHI support
|
||||
+ UNSPECV_PREFETCHI
|
||||
])
|
||||
|
||||
;; Constants to represent rounding modes in the ROUND instruction
|
||||
@@ -22907,6 +22910,26 @@
|
||||
(symbol_ref "memory_address_length (operands[0], false)"))
|
||||
(set_attr "memory" "none")])
|
||||
|
||||
+(define_insn "prefetchi"
|
||||
+ [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
|
||||
+ (match_operand:SI 1 "const_int_operand")]
|
||||
+ UNSPECV_PREFETCHI)]
|
||||
+ "TARGET_PREFETCHI && TARGET_64BIT"
|
||||
+{
|
||||
+ static const char * const patterns[2] = {
|
||||
+ "prefetchit1\t%0", "prefetchit0\t%0"
|
||||
+ };
|
||||
+
|
||||
+ int locality = INTVAL (operands[1]);
|
||||
+ gcc_assert (IN_RANGE (locality, 2, 3));
|
||||
+
|
||||
+ return patterns[locality - 2];
|
||||
+}
|
||||
+ [(set_attr "type" "sse")
|
||||
+ (set (attr "length_address")
|
||||
+ (symbol_ref "memory_address_length (operands[0], false)"))
|
||||
+ (set_attr "memory" "none")])
|
||||
+
|
||||
(define_expand "stack_protect_set"
|
||||
[(match_operand 0 "memory_operand")
|
||||
(match_operand 1 "memory_operand")]
|
||||
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||||
index 52c6f02ee..50cd114f6 100644
|
||||
--- a/gcc/config/i386/i386.opt
|
||||
+++ b/gcc/config/i386/i386.opt
|
||||
@@ -1230,3 +1230,7 @@ Enable vectorization for scatter instruction.
|
||||
mamx-fp16
|
||||
Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save
|
||||
Support AMX-FP16 built-in functions and code generation.
|
||||
+
|
||||
+mprefetchi
|
||||
+Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save
|
||||
+Support PREFETCHI built-in functions and code generation.
|
||||
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
|
||||
index ac02c61ac..774178b78 100644
|
||||
--- a/gcc/config/i386/predicates.md
|
||||
+++ b/gcc/config/i386/predicates.md
|
||||
@@ -610,6 +610,21 @@
|
||||
return false;
|
||||
})
|
||||
|
||||
+(define_predicate "local_func_symbolic_operand"
|
||||
+ (match_operand 0 "local_symbolic_operand")
|
||||
+{
|
||||
+ if (GET_CODE (op) == CONST
|
||||
+ && GET_CODE (XEXP (op, 0)) == PLUS
|
||||
+ && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
|
||||
+ op = XEXP (XEXP (op, 0), 0);
|
||||
+
|
||||
+ if (GET_CODE (op) == SYMBOL_REF
|
||||
+ && !SYMBOL_REF_FUNCTION_P (op))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+})
|
||||
+
|
||||
;; Test for a legitimate @GOTOFF operand.
|
||||
;;
|
||||
;; VxWorks does not impose a fixed gap between segments; the run-time
|
||||
diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h
|
||||
new file mode 100644
|
||||
index 000000000..06deef488
|
||||
--- /dev/null
|
||||
+++ b/gcc/config/i386/prfchiintrin.h
|
||||
@@ -0,0 +1,49 @@
|
||||
+/* Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of GCC.
|
||||
+
|
||||
+ GCC is free software; you can redistribute it and/or modify
|
||||
+ it under the terms of the GNU General Public License as published by
|
||||
+ the Free Software Foundation; either version 3, or (at your option)
|
||||
+ any later version.
|
||||
+
|
||||
+ GCC is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+ GNU General Public License for more details.
|
||||
+
|
||||
+ Under Section 7 of GPL version 3, you are granted additional
|
||||
+ permissions described in the GCC Runtime Library Exception, version
|
||||
+ 3.1, as published by the Free Software Foundation.
|
||||
+
|
||||
+ You should have received a copy of the GNU General Public License and
|
||||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#if !defined _X86GPRINTRIN_H_INCLUDED
|
||||
+# error "Never use <prfchiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
+#endif
|
||||
+
|
||||
+#ifndef _PRFCHIINTRIN_H_INCLUDED
|
||||
+#define _PRFCHIINTRIN_H_INCLUDED
|
||||
+
|
||||
+#ifdef __x86_64__
|
||||
+
|
||||
+extern __inline void
|
||||
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
+_m_prefetchit0 (void* __P)
|
||||
+{
|
||||
+ __builtin_ia32_prefetchi (__P, 3);
|
||||
+}
|
||||
+
|
||||
+extern __inline void
|
||||
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
+_m_prefetchit1 (void* __P)
|
||||
+{
|
||||
+ __builtin_ia32_prefetchi (__P, 2);
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#endif /* _PRFCHIINTRIN_H_INCLUDED */
|
||||
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
|
||||
index e0be01d5e..0768aa0d7 100644
|
||||
--- a/gcc/config/i386/x86gprintrin.h
|
||||
+++ b/gcc/config/i386/x86gprintrin.h
|
||||
@@ -72,6 +72,8 @@
|
||||
|
||||
#include <pkuintrin.h>
|
||||
|
||||
+#include <prfchiintrin.h>
|
||||
+
|
||||
#include <rdseedintrin.h>
|
||||
|
||||
#include <rtmintrin.h>
|
||||
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
|
||||
index f1c704a2d..7fb179430 100644
|
||||
--- a/gcc/config/i386/xmmintrin.h
|
||||
+++ b/gcc/config/i386/xmmintrin.h
|
||||
@@ -36,6 +36,8 @@
|
||||
/* Constants for use with _mm_prefetch. */
|
||||
enum _mm_hint
|
||||
{
|
||||
+ _MM_HINT_IT0 = 19,
|
||||
+ _MM_HINT_IT1 = 18,
|
||||
/* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
|
||||
_MM_HINT_ET0 = 7,
|
||||
_MM_HINT_ET1 = 6,
|
||||
@@ -51,11 +53,12 @@ enum _mm_hint
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_prefetch (const void *__P, enum _mm_hint __I)
|
||||
{
|
||||
- __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
|
||||
+ __builtin_ia32_prefetch (__P, (__I & 0x4) >> 2,
|
||||
+ __I & 0x3, (__I & 0x10) >> 4);
|
||||
}
|
||||
#else
|
||||
#define _mm_prefetch(P, I) \
|
||||
- __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
|
||||
+ __builtin_ia32_prefetch ((P), ((I) & 0x4) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4)
|
||||
#endif
|
||||
|
||||
#ifndef __SSE__
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index 4ba9d34cd..cb987f469 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -7043,6 +7043,11 @@ Enable/disable the generation of the AVXVNNI instructions.
|
||||
@cindex @code{target("amx-fp16")} function attribute, x86
|
||||
Enable/disable the generation of the AMX-FP16 instructions.
|
||||
|
||||
+@item prefetchi
|
||||
+@itemx no-prefetchi
|
||||
+@cindex @code{target("prefetchi")} function attribute, x86
|
||||
+Enable/disable the generation of the PREFETCHI instructions.
|
||||
+
|
||||
@item cld
|
||||
@itemx no-cld
|
||||
@cindex @code{target("cld")} function attribute, x86
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index d25f13217..211b970c0 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
||||
--mavx512fp16 -mamx-fp16 @gol
|
||||
+-mavx512fp16 -mamx-fp16 -mprefetchi @gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mkl -mwidekl @gol
|
||||
@@ -32445,6 +32445,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@need 200
|
||||
@itemx -mamx-fp16
|
||||
@opindex mamx-fp16
|
||||
+@need 200
|
||||
+@itemx -mprefetchi
|
||||
+@opindex mprefetchi
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
@@ -32455,7 +32458,7 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
|
||||
-AMX-FP16 or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
+AMX-FP16, PREFETCHI or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
@option{-mno-} option to disable use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
|
||||
index b64b62dee..c68e492dc 100644
|
||||
--- a/gcc/doc/sourcebuild.texi
|
||||
+++ b/gcc/doc/sourcebuild.texi
|
||||
@@ -2496,6 +2496,9 @@ Target does not require strict alignment.
|
||||
@item pie_copyreloc
|
||||
The x86-64 target linker supports PIE with copy reloc.
|
||||
|
||||
+@item prefetchi
|
||||
+Target supports the execution of @code{prefetchi} instructions.
|
||||
+
|
||||
@item rdrand
|
||||
Target supports x86 @code{rdrand} instruction.
|
||||
|
||||
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
index 57a6357aa..72ed5fed0 100644
|
||||
--- a/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
index 1947547d6..9dd53653f 100644
|
||||
--- a/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
|
||||
index 154e7b3b1..2b46e1b87 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl" } */
|
||||
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl -mprefetchi" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
@@ -153,7 +153,7 @@
|
||||
#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0)
|
||||
|
||||
/* xmmintrin.h */
|
||||
-#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, 0, _MM_HINT_NTA)
|
||||
+#define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0)
|
||||
#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0)
|
||||
#define __builtin_ia32_vec_set_v4hi(A, D, N) \
|
||||
__builtin_ia32_vec_set_v4hi(A, D, 0)
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
index b00cfff03..9f073f78c 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
@@ -81,6 +81,7 @@ extern void test_widekl (void) __attribute__((__target__("widekl")));
|
||||
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
||||
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
||||
extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
|
||||
+extern void test_prefetchi (void) __attribute__((__target__("prefetchi")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
@@ -163,6 +164,7 @@ extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
|
||||
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
||||
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
||||
extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
|
||||
+extern void test_no_prefetchi (void) __attribute__((__target__("no-prefetchi")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1.c b/gcc/testsuite/gcc.target/i386/prefetchi-1.c
|
||||
new file mode 100644
|
||||
index 000000000..80f25e70e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/prefetchi-1.c
|
||||
@@ -0,0 +1,40 @@
|
||||
+/* { dg-do compile { target { ! ia32 } } } */
|
||||
+/* { dg-options "-mprefetchi -O2" } */
|
||||
+/* { dg-final { scan-assembler-times "\[ \\t\]+prefetchit0\[ \\t\]+" 2 } } */
|
||||
+/* { dg-final { scan-assembler-times "\[ \\t\]+prefetchit1\[ \\t\]+" 2 } } */
|
||||
+
|
||||
+#include <x86intrin.h>
|
||||
+
|
||||
+int
|
||||
+bar (int a)
|
||||
+{
|
||||
+ return a + 1;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+foo1 (int b)
|
||||
+{
|
||||
+ _mm_prefetch (bar, _MM_HINT_IT0);
|
||||
+ return bar (b) + 1;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+foo2 (int b)
|
||||
+{
|
||||
+ _mm_prefetch (bar, _MM_HINT_IT1);
|
||||
+ return bar (b) + 1;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+foo3 (int b)
|
||||
+{
|
||||
+ _m_prefetchit0 (bar);
|
||||
+ return bar (b) + 1;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+foo4 (int b)
|
||||
+{
|
||||
+ _m_prefetchit1 (bar);
|
||||
+ return bar (b) + 1;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-2.c b/gcc/testsuite/gcc.target/i386/prefetchi-2.c
|
||||
new file mode 100644
|
||||
index 000000000..e05ce9c73
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/prefetchi-2.c
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* { dg-do compile { target { ia32 } } } */
|
||||
+/* { dg-options "-mprefetchi -O2" } */
|
||||
+/* { dg-final { scan-assembler-not "\[ \\t\]+prefetchit0" } } */
|
||||
+/* { dg-final { scan-assembler-not "\[ \\t\]+prefetchit1" } } */
|
||||
+
|
||||
+#include <x86intrin.h>
|
||||
+
|
||||
+int
|
||||
+bar (int a)
|
||||
+{
|
||||
+ return a + 1;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+foo1 (int b)
|
||||
+{
|
||||
+ __builtin_ia32_prefetch (bar, 0, 3, 1); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
|
||||
+ return bar (b) + 1;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+foo2 (int b)
|
||||
+{
|
||||
+ __builtin_ia32_prefetchi (bar, 2); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
|
||||
+ return bar (b) + 1;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-3.c b/gcc/testsuite/gcc.target/i386/prefetchi-3.c
|
||||
new file mode 100644
|
||||
index 000000000..f0a4173d2
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/prefetchi-3.c
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mprefetchi -O2" } */
|
||||
+/* { dg-final { scan-assembler-not "prefetchit0" } } */
|
||||
+/* { dg-final { scan-assembler-not "prefetchit1" } } */
|
||||
+
|
||||
+#include <x86intrin.h>
|
||||
+
|
||||
+void* p;
|
||||
+
|
||||
+void extern
|
||||
+prefetchi_test1 (void)
|
||||
+{
|
||||
+ __builtin_ia32_prefetchi (p, 2); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
|
||||
+}
|
||||
+
|
||||
+void extern
|
||||
+prefetchi_test2 (void)
|
||||
+{
|
||||
+ __builtin_ia32_prefetch (p, 0, 3, 1); /* { dg-warning "instruction prefetch applies when in 64-bit mode with RIP-relative addressing and option '-mprefetchi'; they stay NOPs otherwise" } */
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-4.c b/gcc/testsuite/gcc.target/i386/prefetchi-4.c
|
||||
new file mode 100644
|
||||
index 000000000..73ae596d1
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/prefetchi-4.c
|
||||
@@ -0,0 +1,19 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O0" } */
|
||||
+
|
||||
+#include <x86intrin.h>
|
||||
+
|
||||
+void* p;
|
||||
+
|
||||
+void extern
|
||||
+prefetch_test (void)
|
||||
+{
|
||||
+ __builtin_ia32_prefetch (p, 0, 3, 0);
|
||||
+ __builtin_ia32_prefetch (p, 0, 2, 0);
|
||||
+ __builtin_ia32_prefetch (p, 0, 1, 0);
|
||||
+ __builtin_ia32_prefetch (p, 0, 0, 0);
|
||||
+ __builtin_ia32_prefetch (p, 1, 3, 0);
|
||||
+ __builtin_ia32_prefetch (p, 1, 2, 0);
|
||||
+ __builtin_ia32_prefetch (p, 1, 1, 0);
|
||||
+ __builtin_ia32_prefetch (p, 1, 0, 0);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
index a1e453a98..db7c0fc7a 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
@@ -125,7 +125,7 @@
|
||||
#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0)
|
||||
|
||||
/* xmmintrin.h */
|
||||
-#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, 0, _MM_HINT_NTA)
|
||||
+#define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0)
|
||||
#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0)
|
||||
#define __builtin_ia32_vec_set_v4hi(A, D, N) \
|
||||
__builtin_ia32_vec_set_v4hi(A, D, 0)
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
index 151201d97..741694e87 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
@@ -94,7 +94,7 @@
|
||||
#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0)
|
||||
|
||||
/* xmmintrin.h */
|
||||
-#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, 0, _MM_HINT_NTA)
|
||||
+#define __builtin_ia32_prefetch(A, B, C, D) __builtin_ia32_prefetch(A, 0, 3, 0)
|
||||
#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0)
|
||||
#define __builtin_ia32_vec_set_v4hi(A, D, N) \
|
||||
__builtin_ia32_vec_set_v4hi(A, D, 0)
|
||||
@@ -843,6 +843,6 @@
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||||
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi")
|
||||
|
||||
#include <x86intrin.h>
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c
|
||||
index 293be094b..efe7df13b 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* Test that <x86gprintrin.h> is usable with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */
|
||||
-/* { dg-additional-options "-muintr" { target { ! ia32 } } } */
|
||||
+/* { dg-additional-options "-muintr -mprefetchi" { target { ! ia32 } } } */
|
||||
|
||||
#include <x86gprintrin.h>
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c
|
||||
index c63302757..5f6970df6 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
-/* { dg-additional-options "-muintr" { target { ! ia32 } } } */
|
||||
+/* { dg-additional-options "-muintr -mprefetchi" { target { ! ia32 } } } */
|
||||
|
||||
/* Test that the intrinsics in <x86gprintrin.h> compile with optimization.
|
||||
All of them are defined as inline functions that reference the proper
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c
|
||||
index 3a7e1f4a1..5c075c375 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
-/* { dg-additional-options "-muintr" { target { ! ia32 } } } */
|
||||
+/* { dg-additional-options "-muintr -mprefetchi" { target { ! ia32 } } } */
|
||||
|
||||
/* Test that the intrinsics in <x86gprintrin.h> compile without optimization.
|
||||
All of them are defined as inline functions that reference the proper
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c
|
||||
index d8a6126e5..bda4ecea3 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
#ifdef __x86_64__
|
||||
-#pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,uintr,xsaveopt")
|
||||
+#pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,prefetchi,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,uintr,xsaveopt")
|
||||
#else
|
||||
#pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,xsaveopt")
|
||||
#endif
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c
|
||||
index 9ef66fdad..4aadfd0b3 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c
|
||||
@@ -28,7 +28,7 @@
|
||||
#define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1)
|
||||
|
||||
#ifdef __x86_64__
|
||||
-#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,uintr,xsavec,xsaveopt,xsaves,wbnoinvd")
|
||||
+#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,prefetchi,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,uintr,xsavec,xsaveopt,xsaves,wbnoinvd")
|
||||
#else
|
||||
#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,xsavec,xsaveopt,xsaves,wbnoinvd")
|
||||
#endif
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,277 +0,0 @@
|
||||
From 7f0f8b585cf60b4c09bca42b5339995c2cc74633 Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Mon, 7 Nov 2022 11:04:57 +0800
|
||||
Subject: [PATCH 22/32] Initial Granite Rapids Support
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h
|
||||
(get_intel_cpu): Handle Granite Rapids.
|
||||
* common/config/i386/i386-common.cc:
|
||||
(processor_names): Add graniterapids.
|
||||
(processor_alias_table): Ditto.
|
||||
* common/config/i386/i386-cpuinfo.h
|
||||
(enum processor_subtypes): Add INTEL_GRANTIERAPIDS.
|
||||
* config.gcc: Add -march=graniterapids.
|
||||
* config/i386/driver-i386.cc (host_detect_local_cpu):
|
||||
Handle graniterapids.
|
||||
* config/i386/i386-c.cc (ix86_target_macros_internal):
|
||||
Ditto.
|
||||
* config/i386/i386-options.cc (m_GRANITERAPIDS): New.
|
||||
(processor_cost_table): Add graniterapids.
|
||||
* config/i386/i386.h (enum processor_type):
|
||||
Add PROCESSOR_GRANITERAPIDS.
|
||||
(PTA_GRANITERAPIDS): Ditto.
|
||||
* doc/extend.texi: Add graniterapids.
|
||||
* doc/invoke.texi: Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.target/i386/mv16.C: Add graniterapids.
|
||||
* gcc.target/i386/funcspec-56.inc: Handle new march.
|
||||
|
||||
(cherry picked from commit 339ffc5a792dd66647392a235f2f7f6344c5359e)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 9 +++++++++
|
||||
gcc/common/config/i386/i386-common.cc | 3 +++
|
||||
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||||
gcc/config.gcc | 2 +-
|
||||
gcc/config/i386/driver-i386.cc | 5 ++++-
|
||||
gcc/config/i386/i386-c.cc | 7 +++++++
|
||||
gcc/config/i386/i386-options.cc | 4 +++-
|
||||
gcc/config/i386/i386.h | 3 +++
|
||||
gcc/doc/extend.texi | 3 +++
|
||||
gcc/doc/invoke.texi | 11 +++++++++++
|
||||
gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++
|
||||
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 +
|
||||
12 files changed, 52 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index f17e88144..1f75ff1ca 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -528,6 +528,15 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
cpu_model->__cpu_type = INTEL_COREI7;
|
||||
cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
|
||||
break;
|
||||
+ case 0xad:
|
||||
+ case 0xae:
|
||||
+ /* Granite Rapids. */
|
||||
+ cpu = "graniterapids";
|
||||
+ CHECK___builtin_cpu_is ("corei7");
|
||||
+ CHECK___builtin_cpu_is ("graniterapids");
|
||||
+ cpu_model->__cpu_type = INTEL_COREI7;
|
||||
+ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
|
||||
+ break;
|
||||
case 0x17:
|
||||
case 0x1d:
|
||||
/* Penryn. */
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index c8cf532cf..1aa163463 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1855,6 +1855,7 @@ const char *const processor_names[] =
|
||||
"sapphirerapids",
|
||||
"alderlake",
|
||||
"rocketlake",
|
||||
+ "graniterapids",
|
||||
"intel",
|
||||
"geode",
|
||||
"k6",
|
||||
@@ -1973,6 +1974,8 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"meteorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
+ {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
|
||||
+ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
index 95b078acf..7b2d4d242 100644
|
||||
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
@@ -92,6 +92,7 @@ enum processor_subtypes
|
||||
AMDFAM19H_ZNVER3,
|
||||
INTEL_COREI7_ROCKETLAKE,
|
||||
AMDFAM19H_ZNVER4,
|
||||
+ INTEL_COREI7_GRANITERAPIDS,
|
||||
CPU_SUBTYPE_MAX
|
||||
};
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 81012c651..9bad238e3 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -670,7 +670,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
|
||||
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
|
||||
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
|
||||
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
|
||||
-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native"
|
||||
+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
|
||||
|
||||
# Additional x86 processors supported by --with-cpu=. Each processor
|
||||
# MUST be separated by exactly one space.
|
||||
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
|
||||
index 3b5161aed..ea8c3d8d1 100644
|
||||
--- a/gcc/config/i386/driver-i386.cc
|
||||
+++ b/gcc/config/i386/driver-i386.cc
|
||||
@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
/* This is unknown family 0x6 CPU. */
|
||||
if (has_feature (FEATURE_AVX))
|
||||
{
|
||||
+ /* Assume Granite Rapids. */
|
||||
+ if (has_feature (FEATURE_AMX_FP16))
|
||||
+ cpu = "graniterapids";
|
||||
/* Assume Tiger Lake */
|
||||
- if (has_feature (FEATURE_AVX512VP2INTERSECT))
|
||||
+ else if (has_feature (FEATURE_AVX512VP2INTERSECT))
|
||||
cpu = "tigerlake";
|
||||
/* Assume Sapphire Rapids. */
|
||||
else if (has_feature (FEATURE_TSXLDTRK))
|
||||
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||||
index 00880bd17..04f1dd682 100644
|
||||
--- a/gcc/config/i386/i386-c.cc
|
||||
+++ b/gcc/config/i386/i386-c.cc
|
||||
@@ -242,6 +242,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__sapphirerapids");
|
||||
def_or_undef (parse_in, "__sapphirerapids__");
|
||||
break;
|
||||
+ case PROCESSOR_GRANITERAPIDS:
|
||||
+ def_or_undef (parse_in, "__graniterapids");
|
||||
+ def_or_undef (parse_in, "__graniterapids__");
|
||||
+ break;
|
||||
case PROCESSOR_ALDERLAKE:
|
||||
def_or_undef (parse_in, "__alderlake");
|
||||
def_or_undef (parse_in, "__alderlake__");
|
||||
@@ -419,6 +423,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
case PROCESSOR_ROCKETLAKE:
|
||||
def_or_undef (parse_in, "__tune_rocketlake__");
|
||||
break;
|
||||
+ case PROCESSOR_GRANITERAPIDS:
|
||||
+ def_or_undef (parse_in, "__tune_graniterapids__");
|
||||
+ break;
|
||||
case PROCESSOR_INTEL:
|
||||
case PROCESSOR_GENERIC:
|
||||
break;
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 724375f02..6645e3259 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -127,10 +127,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define m_SAPPHIRERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_SAPPHIRERAPIDS)
|
||||
#define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
|
||||
#define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
|
||||
+#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
|
||||
#define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
|
||||
| m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
|
||||
| m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
|
||||
- | m_ROCKETLAKE)
|
||||
+ | m_ROCKETLAKE | m_GRANITERAPIDS)
|
||||
#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
|
||||
#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2)
|
||||
#define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
|
||||
@@ -761,6 +762,7 @@ static const struct processor_costs *processor_cost_table[] =
|
||||
&icelake_cost,
|
||||
&alderlake_cost,
|
||||
&icelake_cost,
|
||||
+ &icelake_cost,
|
||||
&intel_cost,
|
||||
&geode_cost,
|
||||
&k6_cost,
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index aaa136ba0..75953defc 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2250,6 +2250,7 @@ enum processor_type
|
||||
PROCESSOR_SAPPHIRERAPIDS,
|
||||
PROCESSOR_ALDERLAKE,
|
||||
PROCESSOR_ROCKETLAKE,
|
||||
+ PROCESSOR_GRANITERAPIDS,
|
||||
PROCESSOR_INTEL,
|
||||
PROCESSOR_GEODE,
|
||||
PROCESSOR_K6,
|
||||
@@ -2356,6 +2357,8 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
|
||||
| PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
|
||||
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
|
||||
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
|
||||
+constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
|
||||
+ | PTA_PREFETCHI;
|
||||
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
|
||||
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
|
||||
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index cb987f469..ba9faf4b2 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -21829,6 +21829,9 @@ Intel Core i7 Alderlake CPU.
|
||||
@item rocketlake
|
||||
Intel Core i7 Rocketlake CPU.
|
||||
|
||||
+@item graniterapids
|
||||
+Intel Core i7 graniterapids CPU.
|
||||
+
|
||||
@item bonnell
|
||||
Intel Atom Bonnell CPU.
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 211b970c0..8ca831dc1 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -31615,6 +31615,17 @@ CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD
|
||||
PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support.
|
||||
|
||||
+@item graniterapids
|
||||
+Intel graniterapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
|
||||
+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
+AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
+AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
+VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
+MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
+SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
+AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
|
||||
+
|
||||
@item k6
|
||||
AMD K6 CPU with MMX instruction set support.
|
||||
|
||||
diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C
|
||||
index 683928729..65cc24f32 100644
|
||||
--- a/gcc/testsuite/g++.target/i386/mv16.C
|
||||
+++ b/gcc/testsuite/g++.target/i386/mv16.C
|
||||
@@ -92,6 +92,10 @@ int __attribute__ ((target("arch=rocketlake"))) foo () {
|
||||
return 24;
|
||||
}
|
||||
|
||||
+int __attribute__ ((target("arch=graniterapids"))) foo () {
|
||||
+ return 26;
|
||||
+}
|
||||
+
|
||||
int main ()
|
||||
{
|
||||
int val = foo ();
|
||||
@@ -130,6 +134,8 @@ int main ()
|
||||
assert (val == 23);
|
||||
else if (__builtin_cpu_is ("rocketlake"))
|
||||
assert (val == 24);
|
||||
+ else if (__builtin_cpu_is ("graniterapids"))
|
||||
+ assert (val == 25);
|
||||
else
|
||||
assert (val == 0);
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
index 9f073f78c..bdcfdbc88 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
@@ -188,6 +188,7 @@ extern void test_arch_cooperlake (void) __attribute__((__target__("arch=
|
||||
extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapphirerapids")));
|
||||
extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake")));
|
||||
extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake")));
|
||||
+extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids")));
|
||||
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
|
||||
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));
|
||||
extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron")));
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,722 +0,0 @@
|
||||
From 4f1aff10d93cabe8dfbaf076b6d826a142efb6e1 Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Wed, 31 May 2023 10:45:00 +0800
|
||||
Subject: [PATCH 23/32] Support Intel AMX-COMPLEX
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_available_features):
|
||||
Detect AMX-COMPLEX.
|
||||
* common/config/i386/i386-common.cc
|
||||
(OPTION_MASK_ISA2_AMX_COMPLEX_SET,
|
||||
OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New.
|
||||
(ix86_handle_option): Handle -mamx-complex.
|
||||
* common/config/i386/i386-cpuinfo.h (enum processor_features):
|
||||
Add FEATURE_AMX_COMPLEX.
|
||||
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
|
||||
amx-complex.
|
||||
* config.gcc: Add amxcomplexintrin.h.
|
||||
* config/i386/cpuid.h (bit_AMX_COMPLEX): New.
|
||||
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
|
||||
__AMX_COMPLEX__.
|
||||
* config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX).
|
||||
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
|
||||
Handle amx-complex.
|
||||
* config/i386/i386.opt: Add option -mamx-complex.
|
||||
* config/i386/immintrin.h: Include amxcomplexintrin.h.
|
||||
* doc/extend.texi: Document amx-complex.
|
||||
* doc/invoke.texi: Document -mamx-complex.
|
||||
* doc/sourcebuild.texi: Document target amx-complex.
|
||||
* config/i386/amxcomplexintrin.h: New file.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.dg/other/i386-2.C: Add -mamx-complex.
|
||||
* g++.dg/other/i386-3.C: Ditto.
|
||||
* gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX.
|
||||
* gcc.target/i386/amx-helper.h: Add amx-complex support.
|
||||
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
|
||||
* gcc.target/i386/sse-12.c: Add -mamx-complex.
|
||||
* gcc.target/i386/sse-13.c: Ditto.
|
||||
* gcc.target/i386/sse-14.c: Ditto.
|
||||
* gcc.target/i386/sse-22.c: Add amx-complex.
|
||||
* gcc.target/i386/sse-23.c: Ditto.
|
||||
* lib/target-supports.exp (check_effective_target_amx_complex): New.
|
||||
* gcc.target/i386/amxcomplex-asmatt-1.c: New test.
|
||||
* gcc.target/i386/amxcomplex-asmintel-1.c: Ditto.
|
||||
* gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto.
|
||||
* gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto.
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 2 +
|
||||
gcc/common/config/i386/i386-common.cc | 19 +++++-
|
||||
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||||
gcc/common/config/i386/i386-isas.h | 2 +
|
||||
gcc/config.gcc | 2 +-
|
||||
gcc/config/i386/amxcomplexintrin.h | 59 +++++++++++++++++++
|
||||
gcc/config/i386/cpuid.h | 1 +
|
||||
gcc/config/i386/i386-c.cc | 2 +
|
||||
gcc/config/i386/i386-isa.def | 1 +
|
||||
gcc/config/i386/i386-options.cc | 4 +-
|
||||
gcc/config/i386/i386.opt | 4 ++
|
||||
gcc/config/i386/immintrin.h | 2 +
|
||||
gcc/doc/extend.texi | 5 ++
|
||||
gcc/doc/invoke.texi | 7 ++-
|
||||
gcc/doc/sourcebuild.texi | 3 +
|
||||
gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
|
||||
gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/amx-check.h | 3 +
|
||||
gcc/testsuite/gcc.target/i386/amx-helper.h | 4 +-
|
||||
.../gcc.target/i386/amxcomplex-asmatt-1.c | 15 +++++
|
||||
.../gcc.target/i386/amxcomplex-asmintel-1.c | 12 ++++
|
||||
.../i386/amxcomplex-cmmimfp16ps-2.c | 53 +++++++++++++++++
|
||||
.../i386/amxcomplex-cmmrlfp16ps-2.c | 53 +++++++++++++++++
|
||||
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
|
||||
gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
|
||||
gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
|
||||
gcc/testsuite/lib/target-supports.exp | 11 ++++
|
||||
30 files changed, 268 insertions(+), 15 deletions(-)
|
||||
create mode 100644 gcc/config/i386/amxcomplexintrin.h
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 1f75ff1ca..39d3351db 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -798,6 +798,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
{
|
||||
if (eax & bit_AMX_FP16)
|
||||
set_feature (FEATURE_AMX_FP16);
|
||||
+ if (edx & bit_AMX_COMPLEX)
|
||||
+ set_feature (FEATURE_AMX_COMPLEX);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index 1aa163463..87e8afe9b 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -109,6 +109,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
||||
#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
|
||||
#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
|
||||
+#define OPTION_MASK_ISA2_AMX_COMPLEX_SET \
|
||||
+ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX)
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||||
as -msse4.2. */
|
||||
@@ -269,7 +271,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
|
||||
#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
|
||||
#define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
|
||||
-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
|
||||
+#define OPTION_MASK_ISA2_AMX_TILE_UNSET \
|
||||
+ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET)
|
||||
#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
|
||||
#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
|
||||
@@ -279,6 +282,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
||||
#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
|
||||
#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
|
||||
+#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||||
as -mno-sse4.1. */
|
||||
@@ -1155,6 +1159,19 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
}
|
||||
return true;
|
||||
|
||||
+ case OPT_mamx_complex:
|
||||
+ if (value)
|
||||
+ {
|
||||
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
|
||||
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
|
||||
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
|
||||
+ }
|
||||
+ return true;
|
||||
+
|
||||
case OPT_mfma:
|
||||
if (value)
|
||||
{
|
||||
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
index 7b2d4d242..56020faac 100644
|
||||
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
@@ -243,6 +243,7 @@ enum processor_features
|
||||
FEATURE_X86_64_V4,
|
||||
FEATURE_AMX_FP16,
|
||||
FEATURE_PREFETCHI,
|
||||
+ FEATURE_AMX_COMPLEX,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
|
||||
index 6caf06249..cbef68479 100644
|
||||
--- a/gcc/common/config/i386/i386-isas.h
|
||||
+++ b/gcc/common/config/i386/i386-isas.h
|
||||
@@ -177,4 +177,6 @@ ISA_NAMES_TABLE_START
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
|
||||
ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
|
||||
+ ISA_NAMES_TABLE_ENTRY("amx-complex", FEATURE_AMX_COMPLEX,
|
||||
+ P_NONE, "-mamx-complex")
|
||||
ISA_NAMES_TABLE_END
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 9bad238e3..ca5c8f8a0 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -424,7 +424,7 @@ i[34567]86-*-* | x86_64-*-*)
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||||
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
|
||||
- amxfp16intrin.h prfchiintrin.h"
|
||||
+ amxfp16intrin.h prfchiintrin.h amxcomplexintrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h
|
||||
new file mode 100644
|
||||
index 000000000..6ea1eca04
|
||||
--- /dev/null
|
||||
+++ b/gcc/config/i386/amxcomplexintrin.h
|
||||
@@ -0,0 +1,59 @@
|
||||
+/* Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+ This file is part of GCC.
|
||||
+
|
||||
+ GCC is free software; you can redistribute it and/or modify
|
||||
+ it under the terms of the GNU General Public License as published by
|
||||
+ the Free Software Foundation; either version 3, or (at your option)
|
||||
+ any later version.
|
||||
+
|
||||
+ GCC is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+ GNU General Public License for more details.
|
||||
+
|
||||
+ Under Section 7 of GPL version 3, you are granted additional
|
||||
+ permissions described in the GCC Runtime Library Exception, version
|
||||
+ 3.1, as published by the Free Software Foundation.
|
||||
+
|
||||
+ You should have received a copy of the GNU General Public License and
|
||||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#if !defined _IMMINTRIN_H_INCLUDED
|
||||
+#error "Never use <amxcomplexintrin.h> directly; include <immintrin.h> instead."
|
||||
+#endif
|
||||
+
|
||||
+#ifndef _AMXCOMPLEXINTRIN_H_INCLUDED
|
||||
+#define _AMXCOMPLEXINTRIN_H_INCLUDED
|
||||
+
|
||||
+#if !defined(__AMX_COMPLEX__)
|
||||
+#pragma GCC push_options
|
||||
+#pragma GCC target("amx-complex")
|
||||
+#define __DISABLE_AMX_COMPLEX__
|
||||
+#endif /* __AMX_COMPLEX__ */
|
||||
+
|
||||
+#if defined(__x86_64__)
|
||||
+#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \
|
||||
+ __asm__ volatile\
|
||||
+ ("{tcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
|
||||
+
|
||||
+#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \
|
||||
+ __asm__ volatile\
|
||||
+ ("{tcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
|
||||
+
|
||||
+#define _tile_cmmimfp16ps(src1_dst,src2,src3) \
|
||||
+ _tile_cmmimfp16ps_internal (src1_dst, src2, src3)
|
||||
+
|
||||
+#define _tile_cmmrlfp16ps(src1_dst,src2,src3) \
|
||||
+ _tile_cmmrlfp16ps_internal (src1_dst, src2, src3)
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#ifdef __DISABLE_AMX_COMPLEX__
|
||||
+#undef __DISABLE_AMX_COMPLEX__
|
||||
+#pragma GCC pop_options
|
||||
+#endif /* __DISABLE_AMX_COMPLEX__ */
|
||||
+
|
||||
+#endif /* _AMXCOMPLEXINTRIN_H_INCLUDED */
|
||||
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
|
||||
index 21100149a..530a45fad 100644
|
||||
--- a/gcc/config/i386/cpuid.h
|
||||
+++ b/gcc/config/i386/cpuid.h
|
||||
@@ -136,6 +136,7 @@
|
||||
#define bit_AMX_BF16 (1 << 22)
|
||||
#define bit_AMX_TILE (1 << 24)
|
||||
#define bit_AMX_INT8 (1 << 25)
|
||||
+#define bit_AMX_COMPLEX (1 << 8)
|
||||
|
||||
/* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */
|
||||
#define bit_XSAVEOPT (1 << 0)
|
||||
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||||
index 04f1dd682..5e0ac278c 100644
|
||||
--- a/gcc/config/i386/i386-c.cc
|
||||
+++ b/gcc/config/i386/i386-c.cc
|
||||
@@ -644,6 +644,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__AMX_FP16__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI)
|
||||
def_or_undef (parse_in, "__PREFETCHI__");
|
||||
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_COMPLEX)
|
||||
+ def_or_undef (parse_in, "__AMX_COMPLEX__");
|
||||
if (TARGET_IAMCU)
|
||||
{
|
||||
def_or_undef (parse_in, "__iamcu");
|
||||
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
|
||||
index 744a7df85..7445b1bf7 100644
|
||||
--- a/gcc/config/i386/i386-isa.def
|
||||
+++ b/gcc/config/i386/i386-isa.def
|
||||
@@ -111,3 +111,4 @@ DEF_PTA(AVXVNNI)
|
||||
DEF_PTA(AVX512FP16)
|
||||
DEF_PTA(AMX_FP16)
|
||||
DEF_PTA(PREFETCHI)
|
||||
+DEF_PTA(AMX_COMPLEX)
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 6645e3259..7efd25084 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -233,7 +233,8 @@ static struct ix86_target_opts isa2_opts[] =
|
||||
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
||||
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
|
||||
{ "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 },
|
||||
- { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI }
|
||||
+ { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI },
|
||||
+ { "-mamx-complex", OPTION_MASK_ISA2_AMX_COMPLEX }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
@@ -1080,6 +1081,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||||
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
||||
IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
|
||||
IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi),
|
||||
+ IX86_ATTR_ISA ("amx-complex", OPT_mamx_complex),
|
||||
|
||||
/* enum options */
|
||||
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||||
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||||
index 50cd114f6..fba94f3f6 100644
|
||||
--- a/gcc/config/i386/i386.opt
|
||||
+++ b/gcc/config/i386/i386.opt
|
||||
@@ -1234,3 +1234,7 @@ Support AMX-FP16 built-in functions and code generation.
|
||||
mprefetchi
|
||||
Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save
|
||||
Support PREFETCHI built-in functions and code generation.
|
||||
+
|
||||
+mamx-complex
|
||||
+Target Mask(ISA2_AMX_COMPLEX) Var(ix86_isa_flags2) Save
|
||||
+Support AMX-COMPLEX built-in functions and code generation.
|
||||
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
|
||||
index 0447ca4b2..bd819c7f4 100644
|
||||
--- a/gcc/config/i386/immintrin.h
|
||||
+++ b/gcc/config/i386/immintrin.h
|
||||
@@ -124,6 +124,8 @@
|
||||
|
||||
#include <amxbf16intrin.h>
|
||||
|
||||
+#include <amxcomplexintrin.h>
|
||||
+
|
||||
#include <prfchwintrin.h>
|
||||
|
||||
#include <keylockerintrin.h>
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index ba9faf4b2..d7b0bc802 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -7048,6 +7048,11 @@ Enable/disable the generation of the AMX-FP16 instructions.
|
||||
@cindex @code{target("prefetchi")} function attribute, x86
|
||||
Enable/disable the generation of the PREFETCHI instructions.
|
||||
|
||||
+@cindex @code{target("amx-complex")} function attribute, x86
|
||||
+@item amx-complex
|
||||
+@itemx no-amx-complex
|
||||
+Enable/disable the generation of the AMX-COMPLEX instructions.
|
||||
+
|
||||
@item cld
|
||||
@itemx no-cld
|
||||
@cindex @code{target("cld")} function attribute, x86
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 8ca831dc1..186b33481 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
||||
--mavx512fp16 -mamx-fp16 -mprefetchi @gol
|
||||
+-mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex @gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mkl -mwidekl @gol
|
||||
@@ -32459,6 +32459,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@need 200
|
||||
@itemx -mprefetchi
|
||||
@opindex mprefetchi
|
||||
+@need 200
|
||||
+@opindex mamx-complex
|
||||
+@itemx -mamx-complex
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
@@ -32469,7 +32472,7 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
|
||||
-AMX-FP16, PREFETCHI or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
+AMX-FP16, PREFETCHI, AMX-COMPLEX or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
@option{-mno-} option to disable use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
|
||||
index c68e492dc..454fae11a 100644
|
||||
--- a/gcc/doc/sourcebuild.texi
|
||||
+++ b/gcc/doc/sourcebuild.texi
|
||||
@@ -2472,6 +2472,9 @@ Target supports the execution of @code{amx-int8} instructions.
|
||||
@item amx_bf16
|
||||
Target supports the execution of @code{amx-bf16} instructions.
|
||||
|
||||
+@item amx_complex
|
||||
+Target supports the execution of @code{amx-complex} instructions.
|
||||
+
|
||||
@item amx_fp16
|
||||
Target supports the execution of @code{amx-fp16} instructions.
|
||||
|
||||
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
index 72ed5fed0..ae1b8f632 100644
|
||||
--- a/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||||
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
index 9dd53653f..783e35774 100644
|
||||
--- a/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||||
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
|
||||
index 27dd37bf9..f1a04cf1f 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
|
||||
@@ -216,6 +216,9 @@ main ()
|
||||
#ifdef AMX_FP16
|
||||
&& __builtin_cpu_supports ("amx-fp16")
|
||||
#endif
|
||||
+#ifdef AMX_COMPLEX
|
||||
+ && __builtin_cpu_supports ("amx-complex")
|
||||
+#endif
|
||||
#ifdef __linux__
|
||||
&& request_perm_xtile_data ()
|
||||
#endif
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||||
index fe24d7067..6ed9f5eb3 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||||
@@ -1,6 +1,6 @@
|
||||
#ifndef AMX_HELPER_H_INCLUDED
|
||||
#define AMX_HELPER_H_INCLUDED
|
||||
-#if defined(AMX_FP16)
|
||||
+#if defined(AMX_FP16) || defined(AMX_COMPLEX)
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
@@ -12,7 +12,7 @@ typedef union
|
||||
uint16_t u;
|
||||
} union16f_uw;
|
||||
|
||||
-#if defined(AMX_FP16)
|
||||
+#if defined(AMX_FP16) || defined(AMX_COMPLEX)
|
||||
/* Transformation functions between fp16/float */
|
||||
static uint16_t make_f32_fp16 (float f)
|
||||
{
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
|
||||
new file mode 100644
|
||||
index 000000000..b6745e34b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile { target { ! ia32 } } } */
|
||||
+/* { dg-options "-O2 -mamx-complex" } */
|
||||
+/* { dg-final { scan-assembler "tcmmimfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
+/* { dg-final { scan-assembler "tcmmrlfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||||
+#include <immintrin.h>
|
||||
+
|
||||
+#define TMM1 1
|
||||
+#define TMM2 2
|
||||
+#define TMM3 3
|
||||
+
|
||||
+void TEST()
|
||||
+{
|
||||
+ _tile_cmmimfp16ps (TMM1, TMM2, TMM3);
|
||||
+ _tile_cmmrlfp16ps (TMM1, TMM2, TMM3);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
|
||||
new file mode 100644
|
||||
index 000000000..305465e88
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile { target { ! ia32 } } } */
|
||||
+/* { dg-require-effective-target masm_intel } */
|
||||
+/* { dg-options "-O2 -mamx-complex -masm=intel" } */
|
||||
+/* { dg-final { scan-assembler "tcmmimfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
+/* { dg-final { scan-assembler "tcmmrlfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||||
+#include <immintrin.h>
|
||||
+
|
||||
+void TEST()
|
||||
+{
|
||||
+ _tile_cmmimfp16ps (1, 2, 3);
|
||||
+ _tile_cmmrlfp16ps (1, 2, 3);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
|
||||
new file mode 100644
|
||||
index 000000000..6e3762c9f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* { dg-do run { target { ! ia32 } } } */
|
||||
+/* { dg-require-effective-target amx_complex } */
|
||||
+/* { dg-require-effective-target avx512fp16 } */
|
||||
+/* { dg-options "-O2 -mamx-complex -mavx512fp16" } */
|
||||
+#define AMX_COMPLEX
|
||||
+#define DO_TEST test_amx_complex_cmmimfp16ps
|
||||
+void test_amx_complex_cmmimfp16ps ();
|
||||
+#include "amx-helper.h"
|
||||
+
|
||||
+void calc_matrix_cmmimfp16ps (__tile *dst, __tile *src1, __tile *src2)
|
||||
+{
|
||||
+ uint16_t *src1_buf = (uint16_t *) src1->buf;
|
||||
+ uint16_t *src2_buf = (uint16_t *) src2->buf;
|
||||
+ float *dst_buf = (float *) dst->buf;
|
||||
+
|
||||
+ int M = src1->rows;
|
||||
+ int N = src1->colsb / 4;
|
||||
+ int K = src2->colsb / 4;
|
||||
+ int i, j, k, t;
|
||||
+
|
||||
+ for (i = 0; i < M; i++)
|
||||
+ for (j = 0; j < N; j++)
|
||||
+ for (k = 0; k < K; k++)
|
||||
+ for (t = 0; t < 2; t+=2)
|
||||
+ dst_buf[i * N + k] +=
|
||||
+ (make_fp16_f32(src1_buf[i * 2 * N + 2 * j + t]) *
|
||||
+ make_fp16_f32(src2_buf[j * 2 * K + 2 * k + t + 1])) +
|
||||
+ (make_fp16_f32(src1_buf[i * 2 * N + 2 * j + t + 1]) *
|
||||
+ make_fp16_f32(src2_buf[j * 2 * K + 2 * k + t]));
|
||||
+}
|
||||
+
|
||||
+void test_amx_complex_cmmimfp16ps ()
|
||||
+{
|
||||
+ __tilecfg_u cfg;
|
||||
+ __tile dst, dst_ref, src1, src2;
|
||||
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
|
||||
+
|
||||
+ init_fp16_max_tile_buffer (tmp_dst_buf);
|
||||
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
|
||||
+
|
||||
+ init_tile_config (&cfg);
|
||||
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
|
||||
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
|
||||
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
|
||||
+
|
||||
+ calc_matrix_cmmimfp16ps (&dst, &src1, &src2);
|
||||
+
|
||||
+ _tile_cmmimfp16ps (1, 2, 3);
|
||||
+ _tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
+
|
||||
+ if (!check_tile_register (&dst_ref, &dst))
|
||||
+ abort ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
|
||||
new file mode 100644
|
||||
index 000000000..15940708a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* { dg-do run { target { ! ia32 } } } */
|
||||
+/* { dg-require-effective-target amx_complex } */
|
||||
+/* { dg-require-effective-target avx512fp16 } */
|
||||
+/* { dg-options "-O2 -mamx-complex -mavx512fp16" } */
|
||||
+#define AMX_COMPLEX
|
||||
+#define DO_TEST test_amx_complex_cmmrlfp16ps
|
||||
+void test_amx_complex_cmmrlfp16ps();
|
||||
+#include "amx-helper.h"
|
||||
+
|
||||
+void calc_matrix_cmmrlfp16ps (__tile *dst, __tile *src1, __tile *src2)
|
||||
+{
|
||||
+ uint16_t *src1_buf = (uint16_t *) src1->buf;
|
||||
+ uint16_t *src2_buf = (uint16_t *) src2->buf;
|
||||
+ float *dst_buf = (float *) dst->buf;
|
||||
+
|
||||
+ int M = src1->rows;
|
||||
+ int N = src1->colsb / 4;
|
||||
+ int K = src2->colsb / 4;
|
||||
+ int i, j, k, t;
|
||||
+
|
||||
+ for (i = 0; i < M; i++)
|
||||
+ for (j = 0; j < N; j++)
|
||||
+ for (k = 0; k < K; k++)
|
||||
+ for (t = 0; t < 2; t+=2)
|
||||
+ dst_buf[i * N + k] +=
|
||||
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t]) *
|
||||
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t])) -
|
||||
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t + 1]) *
|
||||
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t + 1]));
|
||||
+}
|
||||
+
|
||||
+void test_amx_complex_cmmrlfp16ps ()
|
||||
+{
|
||||
+ __tilecfg_u cfg;
|
||||
+ __tile dst, dst_ref, src1, src2;
|
||||
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
|
||||
+
|
||||
+ init_fp16_max_tile_buffer (tmp_dst_buf);
|
||||
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
|
||||
+
|
||||
+ init_tile_config (&cfg);
|
||||
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
|
||||
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
|
||||
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
|
||||
+
|
||||
+ calc_matrix_cmmrlfp16ps (&dst, &src1, &src2);
|
||||
+
|
||||
+ _tile_cmmrlfp16ps (1, 2, 3);
|
||||
+ _tile_stored (1, dst_ref.buf, _STRIDE);
|
||||
+
|
||||
+ if (!check_tile_register (&dst_ref, &dst))
|
||||
+ abort ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
index bdcfdbc88..1a2f3b83d 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
@@ -82,6 +82,7 @@ extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
||||
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
||||
extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
|
||||
extern void test_prefetchi (void) __attribute__((__target__("prefetchi")));
|
||||
+extern void test_amx_complex (void) __attribute__((__target__("amx-complex")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
@@ -165,6 +166,7 @@ extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
||||
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
||||
extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
|
||||
extern void test_no_prefetchi (void) __attribute__((__target__("no-prefetchi")));
|
||||
+extern void test_no_amx_complex (void) __attribute__((__target__("no-amx-complex")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
|
||||
index 9ab4a7e0c..d2aadd506 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
|
||||
@@ -3,7 +3,7 @@
|
||||
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16" } */
|
||||
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16 -mamx-complex" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
index db7c0fc7a..c39382836 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||||
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
|
||||
index eaa1a8d81..c34ac1aec 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||||
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mamx-complex" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
|
||||
index 19afe639d..c3667b829 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
|
||||
@@ -103,7 +103,7 @@
|
||||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||||
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,amx-complex")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
||||
|
||||
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||||
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,amx-complex")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
index 741694e87..756b6eb9c 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||||
@@ -843,6 +843,6 @@
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi")
|
||||
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi,amx-complex")
|
||||
|
||||
#include <x86intrin.h>
|
||||
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||
index 0d83c780c..d404058fd 100644
|
||||
--- a/gcc/testsuite/lib/target-supports.exp
|
||||
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||
@@ -9421,6 +9421,17 @@ proc check_effective_target_avxvnni { } {
|
||||
} "-mavxvnni" ]
|
||||
}
|
||||
|
||||
+# Return 1 if amx-complex instructions can be compiled.
|
||||
+proc check_effective_target_amx_complex { } {
|
||||
+ return [check_no_compiler_messages amx_complex object {
|
||||
+ void
|
||||
+ foo ()
|
||||
+ {
|
||||
+ __asm__ volatile ("tcmmimfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
|
||||
+ }
|
||||
+ } "-mamx-complex" ]
|
||||
+}
|
||||
+
|
||||
# Return 1 if sse instructions can be compiled.
|
||||
proc check_effective_target_sse { } {
|
||||
return [check_no_compiler_messages sse object {
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Fri, 31 Mar 2023 10:49:14 +0800
|
||||
Subject: [PATCH 24/32] i386: Add AMX-COMPLEX to Granite Rapids
|
||||
|
||||
gcc/Changelog:
|
||||
|
||||
* config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX.
|
||||
|
||||
(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e)
|
||||
---
|
||||
gcc/config/i386/i386.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index 75953defc..56d7794dc 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
|
||||
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
|
||||
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
|
||||
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
|
||||
- | PTA_PREFETCHI;
|
||||
+ | PTA_PREFETCHI | PTA_AMX_COMPLEX;
|
||||
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
|
||||
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
|
||||
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,212 +0,0 @@
|
||||
From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001
|
||||
From: "Mo, Zewei" <zewei.mo@intel.com>
|
||||
Date: Mon, 3 Jul 2023 11:00:26 +0800
|
||||
Subject: [PATCH 25/32] Initial Granite Rapids D Support
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h
|
||||
(get_intel_cpu): Handle Granite Rapids D.
|
||||
* common/config/i386/i386-common.cc:
|
||||
(processor_alias_table): Add graniterapids-d.
|
||||
* common/config/i386/i386-cpuinfo.h
|
||||
(enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D.
|
||||
* config.gcc: Add -march=graniterapids-d.
|
||||
* config/i386/driver-i386.cc (host_detect_local_cpu):
|
||||
Handle graniterapids-d.
|
||||
* config/i386/i386.h: (PTA_GRANITERAPIDS_D): New.
|
||||
* doc/extend.texi: Add graniterapids-d.
|
||||
* doc/invoke.texi: Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.target/i386/mv16.C: Add graniterapids-d.
|
||||
* gcc.target/i386/funcspec-56.inc: Handle new march.
|
||||
|
||||
(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 9 ++++++++-
|
||||
gcc/common/config/i386/i386-common.cc | 2 ++
|
||||
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||||
gcc/config.gcc | 3 ++-
|
||||
gcc/config/i386/driver-i386.cc | 5 ++++-
|
||||
gcc/config/i386/i386.h | 4 +++-
|
||||
gcc/doc/extend.texi | 3 +++
|
||||
gcc/doc/invoke.texi | 11 +++++++++++
|
||||
gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++
|
||||
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 +
|
||||
10 files changed, 41 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 39d3351db..1e53248ef 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
|
||||
break;
|
||||
case 0xad:
|
||||
- case 0xae:
|
||||
/* Granite Rapids. */
|
||||
cpu = "graniterapids";
|
||||
CHECK___builtin_cpu_is ("corei7");
|
||||
@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
cpu_model->__cpu_type = INTEL_COREI7;
|
||||
cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
|
||||
break;
|
||||
+ case 0xae:
|
||||
+ /* Granite Rapids D. */
|
||||
+ cpu = "graniterapids-d";
|
||||
+ CHECK___builtin_cpu_is ("corei7");
|
||||
+ CHECK___builtin_cpu_is ("graniterapids-d");
|
||||
+ cpu_model->__cpu_type = INTEL_COREI7;
|
||||
+ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D;
|
||||
+ break;
|
||||
case 0x17:
|
||||
case 0x1d:
|
||||
/* Penryn. */
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index 87e8afe9b..28f468f48 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1993,6 +1993,8 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
|
||||
+ {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
|
||||
+ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
index 56020faac..a32f32c97 100644
|
||||
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
@@ -93,6 +93,7 @@ enum processor_subtypes
|
||||
INTEL_COREI7_ROCKETLAKE,
|
||||
AMDFAM19H_ZNVER4,
|
||||
INTEL_COREI7_GRANITERAPIDS,
|
||||
+ INTEL_COREI7_GRANITERAPIDS_D,
|
||||
CPU_SUBTYPE_MAX
|
||||
};
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index ca5c8f8a0..3108ac4eb 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
|
||||
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
|
||||
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
|
||||
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
|
||||
-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
|
||||
+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \
|
||||
+graniterapids-d native"
|
||||
|
||||
# Additional x86 processors supported by --with-cpu=. Each processor
|
||||
# MUST be separated by exactly one space.
|
||||
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
|
||||
index ea8c3d8d1..e3bca4b49 100644
|
||||
--- a/gcc/config/i386/driver-i386.cc
|
||||
+++ b/gcc/config/i386/driver-i386.cc
|
||||
@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
/* This is unknown family 0x6 CPU. */
|
||||
if (has_feature (FEATURE_AVX))
|
||||
{
|
||||
+ /* Assume Granite Rapids D. */
|
||||
+ if (has_feature (FEATURE_AMX_COMPLEX))
|
||||
+ cpu = "graniterapids-d";
|
||||
/* Assume Granite Rapids. */
|
||||
- if (has_feature (FEATURE_AMX_FP16))
|
||||
+ else if (has_feature (FEATURE_AMX_FP16))
|
||||
cpu = "graniterapids";
|
||||
/* Assume Tiger Lake */
|
||||
else if (has_feature (FEATURE_AVX512VP2INTERSECT))
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index 56d7794dc..eda3e5e5b 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
|
||||
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
|
||||
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
|
||||
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
|
||||
- | PTA_PREFETCHI | PTA_AMX_COMPLEX;
|
||||
+ | PTA_PREFETCHI;
|
||||
+constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
|
||||
+ | PTA_AMX_COMPLEX;
|
||||
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
|
||||
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
|
||||
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index d7b0bc802..674db2f1a 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU.
|
||||
@item graniterapids
|
||||
Intel Core i7 graniterapids CPU.
|
||||
|
||||
+@item graniterapids-d
|
||||
+Intel Core i7 graniterapids D CPU.
|
||||
+
|
||||
@item bonnell
|
||||
Intel Atom Bonnell CPU.
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 186b33481..a2ec060fd 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
|
||||
|
||||
+@item graniterapids-d
|
||||
+Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
|
||||
+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
+AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
+AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
+VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
+MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
+SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
+AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
|
||||
+
|
||||
@item k6
|
||||
AMD K6 CPU with MMX instruction set support.
|
||||
|
||||
diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C
|
||||
index 65cc24f32..17b1fc722 100644
|
||||
--- a/gcc/testsuite/g++.target/i386/mv16.C
|
||||
+++ b/gcc/testsuite/g++.target/i386/mv16.C
|
||||
@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () {
|
||||
return 26;
|
||||
}
|
||||
|
||||
+int __attribute__ ((target("arch=graniterapids-d"))) foo () {
|
||||
+ return 28;
|
||||
+}
|
||||
+
|
||||
int main ()
|
||||
{
|
||||
int val = foo ();
|
||||
@@ -136,6 +140,8 @@ int main ()
|
||||
assert (val == 24);
|
||||
else if (__builtin_cpu_is ("graniterapids"))
|
||||
assert (val == 25);
|
||||
+ else if (__builtin_cpu_is ("graniterapids-d"))
|
||||
+ assert (val == 26);
|
||||
else
|
||||
assert (val == 0);
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
index 1a2f3b83d..f0f3397a7 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
@@ -191,6 +191,7 @@ extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapp
|
||||
extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake")));
|
||||
extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake")));
|
||||
extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids")));
|
||||
+extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=graniterapids-d")));
|
||||
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
|
||||
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));
|
||||
extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron")));
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Thu, 20 Jul 2023 10:47:18 +0800
|
||||
Subject: [PATCH 26/32] Correct Granite Rapids{, D} documentation
|
||||
|
||||
gcc/Changelog:
|
||||
|
||||
* doc/invoke.texi: Remove AVX512VP2INTERSECT in
|
||||
Granite Rapids{, D} from documentation.
|
||||
|
||||
(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9)
|
||||
---
|
||||
gcc/doc/invoke.texi | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index a2ec060fd..4d3eccdb2 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
|
||||
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
|
||||
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16
|
||||
+and PREFETCHI instruction set support.
|
||||
|
||||
@item graniterapids-d
|
||||
Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
|
||||
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
|
||||
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16,
|
||||
+PREFETCHI and AMX-COMPLEX instruction set support.
|
||||
|
||||
@item k6
|
||||
AMD K6 CPU with MMX instruction set support.
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
From 62852213bc6d3e56804ca05826bb95a3a2fe4eba Mon Sep 17 00:00:00 2001
|
||||
From: "Hu, Lin1" <lin1.hu@intel.com>
|
||||
Date: Thu, 15 Dec 2022 15:51:18 +0800
|
||||
Subject: [PATCH 27/32] i386: Remove Meteorlake's family_model
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5
|
||||
for meteorlake.
|
||||
|
||||
(cherry picked from commit 9e74b7ec0b218364905e3e7de5c41e8148ffc61b)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 1e53248ef..348bc0c12 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -510,7 +510,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
/* Raptor Lake. */
|
||||
- case 0xb5:
|
||||
case 0xaa:
|
||||
case 0xac:
|
||||
/* Meteor Lake. */
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001
|
||||
From: "Cui, Lili" <lili.cui@intel.com>
|
||||
Date: Thu, 29 Jun 2023 03:10:35 +0000
|
||||
Subject: [PATCH 28/32] x86: Update model values for Alderlake, Rocketlake and
|
||||
Raptorlake.
|
||||
|
||||
Update model values for Alderlake, Rocketlake and Raptorlake according to SDM.
|
||||
|
||||
gcc/ChangeLog
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
|
||||
from Rocketlake, move model value 0xbf from Alderlake to Raptorlake.
|
||||
|
||||
(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 348bc0c12..f9bcb6fad 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
case 0x9a:
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
+ case 0xbf:
|
||||
/* Raptor Lake. */
|
||||
case 0xaa:
|
||||
case 0xac:
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,32 +0,0 @@
|
||||
From 3dbe28984e0f9c24d6670cfba42983bc32c08b0a Mon Sep 17 00:00:00 2001
|
||||
From: "Cui, Lili" <lili.cui@intel.com>
|
||||
Date: Mon, 14 Aug 2023 02:06:00 +0000
|
||||
Subject: [PATCH 29/32] x86: Update model values for Raptorlake.
|
||||
|
||||
Update model values for Raptorlake according to SDM.
|
||||
|
||||
gcc/ChangeLog
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba
|
||||
to Raptorlake.
|
||||
|
||||
(cherry picked from commit 614052dd4ea083e086712809c754ffebd9361316)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index f9bcb6fad..da1568fd1 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
case 0x9a:
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
+ case 0xba:
|
||||
case 0xbf:
|
||||
/* Raptor Lake. */
|
||||
case 0xaa:
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -1,159 +0,0 @@
|
||||
From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Tue, 22 Aug 2023 18:18:31 +0800
|
||||
Subject: [PATCH 30/32] Fix target_clone ("arch=graniterapids-d")
|
||||
|
||||
Both "graniterapid-d" and "graniterapids" are attached with
|
||||
PROCESSOR_GRANITERAPID in processor_alias_table but mapped to
|
||||
different __cpu_subtype in get_intel_cpu.
|
||||
|
||||
And get_builtin_code_for_version will try to match the first
|
||||
PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to
|
||||
"granitepraids" here.
|
||||
|
||||
861 else if (new_target->arch_specified && new_target->arch > 0)
|
||||
1862 for (i = 0; i < pta_size; i++)
|
||||
1863 if (processor_alias_table[i].processor == new_target->arch)
|
||||
1864 {
|
||||
1865 const pta *arch_info = &processor_alias_table[i];
|
||||
1866 switch (arch_info->priority)
|
||||
1867 {
|
||||
1868 default:
|
||||
1869 arg_str = arch_info->name;
|
||||
|
||||
This mismatch makes dispatch_function_versions check the preidcate
|
||||
of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes
|
||||
the issue.
|
||||
The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction.
|
||||
|
||||
For "alderlake","raptorlake", "meteorlake" they share same isa, cost,
|
||||
tuning, and mapped to the same __cpu_type/__cpu_subtype in
|
||||
get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/i386-common.cc (processor_names): Add new
|
||||
member graniterapids-s.
|
||||
* config/i386/i386-options.cc (processor_alias_table): Update
|
||||
table with and PROCESSOR_GRANITERAPIDS_D.
|
||||
(m_GRANITERAPID_D): New macro.
|
||||
(m_CORE_AVX512): Add m_GRANITERAPIDS_D.
|
||||
(processor_cost_table): Add icelake_cost for
|
||||
PROCESSOR_GRANITERAPIDS_D.
|
||||
* config/i386/i386.h (enum processor_type): Add new member
|
||||
PROCESSOR_GRANITERAPIDS_D.
|
||||
* config/i386/i386-c.cc (ix86_target_macros_internal): Handle
|
||||
PROCESSOR_GRANITERAPIDS_D
|
||||
---
|
||||
gcc/common/config/i386/i386-common.cc | 6 ++++--
|
||||
gcc/config/i386/i386-c.cc | 8 ++++++++
|
||||
gcc/config/i386/i386-options.cc | 4 +++-
|
||||
gcc/config/i386/i386.h | 3 ++-
|
||||
4 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index 28f468f48..bec6801ce 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1873,6 +1873,7 @@ const char *const processor_names[] =
|
||||
"alderlake",
|
||||
"rocketlake",
|
||||
"graniterapids",
|
||||
+ "graniterapids-d",
|
||||
"intel",
|
||||
"geode",
|
||||
"k6",
|
||||
@@ -1993,8 +1994,9 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
|
||||
- {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
|
||||
- M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
|
||||
+ {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL,
|
||||
+ PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D),
|
||||
+ P_PROC_AVX512F},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||||
index 5e0ac278c..49f0db2b8 100644
|
||||
--- a/gcc/config/i386/i386-c.cc
|
||||
+++ b/gcc/config/i386/i386-c.cc
|
||||
@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__graniterapids");
|
||||
def_or_undef (parse_in, "__graniterapids__");
|
||||
break;
|
||||
+ case PROCESSOR_GRANITERAPIDS_D:
|
||||
+ def_or_undef (parse_in, "__graniterapids_d");
|
||||
+ def_or_undef (parse_in, "__graniterapids_d__");
|
||||
+ break;
|
||||
case PROCESSOR_ALDERLAKE:
|
||||
def_or_undef (parse_in, "__alderlake");
|
||||
def_or_undef (parse_in, "__alderlake__");
|
||||
@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__rocketlake");
|
||||
def_or_undef (parse_in, "__rocketlake__");
|
||||
break;
|
||||
+
|
||||
/* use PROCESSOR_max to not set/unset the arch macro. */
|
||||
case PROCESSOR_max:
|
||||
break;
|
||||
@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
case PROCESSOR_GRANITERAPIDS:
|
||||
def_or_undef (parse_in, "__tune_graniterapids__");
|
||||
break;
|
||||
+ case PROCESSOR_GRANITERAPIDS_D:
|
||||
+ def_or_undef (parse_in, "__tune_graniterapids_d__");
|
||||
+ break;
|
||||
case PROCESSOR_INTEL:
|
||||
case PROCESSOR_GENERIC:
|
||||
break;
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 7efd25084..86932d719 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
|
||||
#define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
|
||||
#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
|
||||
+#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D)
|
||||
#define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
|
||||
| m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
|
||||
| m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
|
||||
- | m_ROCKETLAKE | m_GRANITERAPIDS)
|
||||
+ | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
|
||||
#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
|
||||
#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2)
|
||||
#define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
|
||||
@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table[] =
|
||||
&alderlake_cost,
|
||||
&icelake_cost,
|
||||
&icelake_cost,
|
||||
+ &icelake_cost,
|
||||
&intel_cost,
|
||||
&geode_cost,
|
||||
&k6_cost,
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index eda3e5e5b..5052f878d 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
|
||||
#define DEFAULT_LARGE_SECTION_THRESHOLD 65536
|
||||
|
||||
/* Which processor to tune code generation for. These must be in sync
|
||||
- with processor_target_table in i386.cc. */
|
||||
+ with processor_cost_table in i386-options.cc. */
|
||||
|
||||
enum processor_type
|
||||
{
|
||||
@@ -2251,6 +2251,7 @@ enum processor_type
|
||||
PROCESSOR_ALDERLAKE,
|
||||
PROCESSOR_ROCKETLAKE,
|
||||
PROCESSOR_GRANITERAPIDS,
|
||||
+ PROCESSOR_GRANITERAPIDS_D,
|
||||
PROCESSOR_INTEL,
|
||||
PROCESSOR_GEODE,
|
||||
PROCESSOR_K6,
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
62
gcc.spec
62
gcc.spec
@ -2,7 +2,7 @@
|
||||
%global gcc_major 12
|
||||
# Note, gcc_release must be integer, if you want to add suffixes to
|
||||
# %%{release}, append them after %%{gcc_release} on Release: line.
|
||||
%global gcc_release 25
|
||||
%global gcc_release 26
|
||||
|
||||
%global _unpackaged_files_terminate_build 0
|
||||
%global _performance_build 1
|
||||
@ -195,34 +195,6 @@ Patch54: 0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch
|
||||
Patch55: 0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
|
||||
Patch56: 0056-Fix-bug-that-verifying-gimple-failed-when-reorg-leve.patch
|
||||
Patch57: 0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch
|
||||
Patch58: 0058-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
|
||||
Patch59: 0059-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
|
||||
Patch60: 0060-Make-option-mvzeroupper-independent-of-optimization-.patch
|
||||
Patch61: 0061-i386-Sync-tune_string-with-arch_string-for-target-at.patch
|
||||
Patch62: 0062-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
|
||||
Patch63: 0063-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
|
||||
Patch64: 0064-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
|
||||
Patch65: 0065-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
|
||||
Patch66: 0066-Software-mitigation-Disable-gather-generation-in-vec.patch
|
||||
Patch67: 0067-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
|
||||
Patch68: 0068-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
|
||||
Patch69: 0069-Disparage-slightly-for-the-alternative-which-move-DF.patch
|
||||
Patch70: 0070-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
|
||||
Patch71: 0071-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
|
||||
Patch72: 0072-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
|
||||
Patch73: 0073-Initial-Raptorlake-Support.patch
|
||||
Patch74: 0074-Initial-Meteorlake-Support.patch
|
||||
Patch75: 0075-Support-Intel-AMX-FP16-ISA.patch
|
||||
Patch76: 0076-Support-Intel-prefetchit0-t1.patch
|
||||
Patch77: 0077-Initial-Granite-Rapids-Support.patch
|
||||
Patch78: 0078-Support-Intel-AMX-COMPLEX.patch
|
||||
Patch79: 0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
|
||||
Patch80: 0080-Initial-Granite-Rapids-D-Support.patch
|
||||
Patch81: 0081-Correct-Granite-Rapids-D-documentation.patch
|
||||
Patch82: 0082-i386-Remove-Meteorlake-s-family_model.patch
|
||||
Patch83: 0083-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
|
||||
Patch84: 0084-x86-Update-model-values-for-Raptorlake.patch
|
||||
Patch85: 0085-Fix-target_clone-arch-graniterapids-d.patch
|
||||
Patch86: 0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
|
||||
Patch87: 0087-Add-cost-calculation-for-reg-equivalence-invariants.patch
|
||||
|
||||
@ -881,34 +853,6 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch55 -p1
|
||||
%patch56 -p1
|
||||
%patch57 -p1
|
||||
%patch58 -p1
|
||||
%patch59 -p1
|
||||
%patch60 -p1
|
||||
%patch61 -p1
|
||||
%patch62 -p1
|
||||
%patch63 -p1
|
||||
%patch64 -p1
|
||||
%patch65 -p1
|
||||
%patch66 -p1
|
||||
%patch67 -p1
|
||||
%patch68 -p1
|
||||
%patch69 -p1
|
||||
%patch70 -p1
|
||||
%patch71 -p1
|
||||
%patch72 -p1
|
||||
%patch73 -p1
|
||||
%patch74 -p1
|
||||
%patch75 -p1
|
||||
%patch76 -p1
|
||||
%patch77 -p1
|
||||
%patch78 -p1
|
||||
%patch79 -p1
|
||||
%patch80 -p1
|
||||
%patch81 -p1
|
||||
%patch82 -p1
|
||||
%patch83 -p1
|
||||
%patch84 -p1
|
||||
%patch85 -p1
|
||||
%patch86 -p1
|
||||
%patch87 -p1
|
||||
|
||||
@ -3302,6 +3246,10 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Fri Apr 26 2024 Zheng Chenhui <zhengchenhui1@huawei.com> - 12.3.1-26
|
||||
- Type: Revert
|
||||
- DESC: Revert Intel patches.
|
||||
|
||||
* Wed Apr 24 2024 Wang Ding <wangding16@huawei.com> - 12.3.1-25
|
||||
- Type: Sync
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user