692 lines
34 KiB
Diff
692 lines
34 KiB
Diff
From c11301c7780213ddf46a0bcdb06079af485f431c Mon Sep 17 00:00:00 2001
|
|
From: Hongyu Wang <hongyu.wang@intel.com>
|
|
Date: Fri, 4 Nov 2022 15:50:55 +0800
|
|
Subject: [PATCH 20/32] Support Intel AMX-FP16 ISA
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* common/config/i386/cpuinfo.h (get_available_features): Detect
|
|
amx-fp16.
|
|
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_FP16_SET,
|
|
OPTION_MASK_ISA2_AMX_FP16_UNSET): New macros.
|
|
(ix86_handle_option): Handle -mamx-fp16.
|
|
* common/config/i386/i386-cpuinfo.h (enum processor_features):
|
|
Add FEATURE_AMX_FP16.
|
|
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
|
|
amx-fp16.
|
|
* config.gcc: Add amxfp16intrin.h.
|
|
* config/i386/cpuid.h (bit_AMX_FP16): New.
|
|
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
|
|
__AMX_FP16__.
|
|
* config/i386/i386-isa.def: Add DEF_PTA for AMX_FP16.
|
|
* config/i386/i386-options.cc (isa2_opts): Add -mamx-fp16.
|
|
(ix86_valid_target_attribute_inner_p): Add new ATTR.
|
|
(ix86_option_override_internal): Handle AMX-FP16.
|
|
* config/i386/i386.opt: Add -mamx-fp16.
|
|
* config/i386/immintrin.h: Include amxfp16intrin.h.
|
|
* doc/extend.texi: Document -mamx-fp16.
|
|
* doc/invoke.texi: Document amx-fp16.
|
|
* doc/sourcebuild.texi: Document amx_fp16.
|
|
* config/i386/amxfp16intrin.h: New file.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* g++.dg/other/i386-2.C: Add -mamx-fp16.
|
|
* g++.dg/other/i386-3.C: Ditto.
|
|
* gcc.target/i386/sse-12.c: Ditto.
|
|
* gcc.target/i386/sse-13.c: Ditto.
|
|
* gcc.target/i386/sse-14.c: Ditto.
|
|
* gcc.target/i386/sse-22.c: Ditto.
|
|
* gcc.target/i386/sse-23.c: Ditto.
|
|
* lib/target-supports.exp: (check_effective_target_amx_fp16):
|
|
New proc.
|
|
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
|
|
* gcc.target/i386/amx-check.h: Add AMX_FP16.
|
|
* gcc.target/i386/amx-helper.h: New file to support amx-fp16.
|
|
* gcc.target/i386/amxfp16-asmatt-1.c: New test.
|
|
* gcc.target/i386/amxfp16-asmintel-1.c: Ditto.
|
|
* gcc.target/i386/amxfp16-dpfp16ps-2.c: Ditto.
|
|
|
|
Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
|
|
|
|
(cherry picked from commit 2b4a03962a0fe18cadc944d90f1fb85a40004226)
|
|
---
|
|
gcc/common/config/i386/cpuinfo.h | 5 ++
|
|
gcc/common/config/i386/i386-common.cc | 15 +++++
|
|
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
|
gcc/common/config/i386/i386-isas.h | 1 +
|
|
gcc/config.gcc | 3 +-
|
|
gcc/config/i386/amxfp16intrin.h | 46 ++++++++++++++
|
|
gcc/config/i386/cpuid.h | 1 +
|
|
gcc/config/i386/i386-c.cc | 2 +
|
|
gcc/config/i386/i386-isa.def | 1 +
|
|
gcc/config/i386/i386-options.cc | 4 +-
|
|
gcc/config/i386/i386.opt | 4 ++
|
|
gcc/config/i386/immintrin.h | 2 +
|
|
gcc/doc/extend.texi | 5 ++
|
|
gcc/doc/invoke.texi | 9 ++-
|
|
gcc/doc/sourcebuild.texi | 3 +
|
|
gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
|
|
gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
|
|
gcc/testsuite/gcc.target/i386/amx-check.h | 3 +
|
|
gcc/testsuite/gcc.target/i386/amx-helper.h | 61 +++++++++++++++++++
|
|
.../gcc.target/i386/amxfp16-asmatt-1.c | 13 ++++
|
|
.../gcc.target/i386/amxfp16-asmintel-1.c | 10 +++
|
|
.../gcc.target/i386/amxfp16-dpfp16ps-2.c | 57 +++++++++++++++++
|
|
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
|
|
gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
|
|
gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
|
|
gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
|
|
gcc/testsuite/lib/target-supports.exp | 11 ++++
|
|
29 files changed, 262 insertions(+), 13 deletions(-)
|
|
create mode 100644 gcc/config/i386/amxfp16intrin.h
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amx-helper.h
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
|
|
|
|
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
|
index 37af92d6b..5951a30aa 100644
|
|
--- a/gcc/common/config/i386/cpuinfo.h
|
|
+++ b/gcc/common/config/i386/cpuinfo.h
|
|
@@ -783,6 +783,11 @@ get_available_features (struct __processor_model *cpu_model,
|
|
set_feature (FEATURE_AVX512BF16);
|
|
}
|
|
}
|
|
+ if (amx_usable)
|
|
+ {
|
|
+ if (eax & bit_AMX_FP16)
|
|
+ set_feature (FEATURE_AMX_FP16);
|
|
+ }
|
|
}
|
|
|
|
/* Get Advanced Features at level 0xd (eax = 0xd, ecx = 1). */
|
|
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
|
index cfee672fb..922db33ee 100644
|
|
--- a/gcc/common/config/i386/i386-common.cc
|
|
+++ b/gcc/common/config/i386/i386-common.cc
|
|
@@ -107,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
|
|
#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
|
|
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
|
|
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
|
+#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
|
|
|
|
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
|
as -msse4.2. */
|
|
@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3. If not see
|
|
#define OPTION_MASK_ISA2_KL_UNSET \
|
|
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
|
|
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
|
+#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
|
|
|
|
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
|
as -mno-sse4.1. */
|
|
@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
|
|
}
|
|
return true;
|
|
|
|
+ case OPT_mamx_fp16:
|
|
+ if (value)
|
|
+ {
|
|
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_FP16_SET;
|
|
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_SET;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_FP16_UNSET;
|
|
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_FP16_UNSET;
|
|
+ }
|
|
+ return true;
|
|
+
|
|
case OPT_mfma:
|
|
if (value)
|
|
{
|
|
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
|
index 82996ebb3..8f22897de 100644
|
|
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
|
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
|
@@ -240,6 +240,7 @@ enum processor_features
|
|
FEATURE_X86_64_V2,
|
|
FEATURE_X86_64_V3,
|
|
FEATURE_X86_64_V4,
|
|
+ FEATURE_AMX_FP16,
|
|
CPU_FEATURE_MAX
|
|
};
|
|
|
|
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
|
|
index 2d0646a68..95bab6da2 100644
|
|
--- a/gcc/common/config/i386/i386-isas.h
|
|
+++ b/gcc/common/config/i386/i386-isas.h
|
|
@@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START
|
|
ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL)
|
|
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
|
|
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
|
|
+ ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
|
|
ISA_NAMES_TABLE_END
|
|
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
|
index 4a0ae9328..e2b4a23dc 100644
|
|
--- a/gcc/config.gcc
|
|
+++ b/gcc/config.gcc
|
|
@@ -423,7 +423,8 @@ i[34567]86-*-* | x86_64-*-*)
|
|
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
|
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
|
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
|
- mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
|
|
+ mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
|
|
+ amxfp16intrin.h"
|
|
;;
|
|
ia64-*-*)
|
|
extra_headers=ia64intrin.h
|
|
diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h
|
|
new file mode 100644
|
|
index 000000000..6a114741a
|
|
--- /dev/null
|
|
+++ b/gcc/config/i386/amxfp16intrin.h
|
|
@@ -0,0 +1,46 @@
|
|
+/* Copyright (C) 2020 Free Software Foundation, Inc.
|
|
+
|
|
+ This file is part of GCC.
|
|
+
|
|
+ GCC is free software; you can redistribute it and/or modify
|
|
+ it under the terms of the GNU General Public License as published by
|
|
+ the Free Software Foundation; either version 3, or (at your option)
|
|
+ any later version.
|
|
+
|
|
+ GCC is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ GNU General Public License for more details.
|
|
+
|
|
+ Under Section 7 of GPL version 3, you are granted additional
|
|
+ permissions described in the GCC Runtime Library Exception, version
|
|
+ 3.1, as published by the Free Software Foundation.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License and
|
|
+ a copy of the GCC Runtime Library Exception along with this program;
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
+ <http://www.gnu.org/licenses/>. */
|
|
+
|
|
+#if !defined _IMMINTRIN_H_INCLUDED
|
|
+#error "Never use <amxfp16intrin.h> directly; include <immintrin.h> instead."
|
|
+#endif
|
|
+
|
|
+#ifndef _AMXFP16INTRIN_H_INCLUDED
|
|
+#define _AMXFP16INTRIN_H_INCLUDED
|
|
+
|
|
+#if defined(__x86_64__)
|
|
+#define _tile_dpfp16ps_internal(dst,src1,src2) \
|
|
+ __asm__ volatile \
|
|
+ ("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
|
+
|
|
+#define _tile_dpfp16ps(dst,src1,src2) \
|
|
+ _tile_dpfp16ps_internal (dst,src1,src2)
|
|
+
|
|
+#endif
|
|
+
|
|
+#ifdef __DISABLE_AMX_FP16__
|
|
+#undef __DISABLE_AMX_FP16__
|
|
+#pragma GCC pop_options
|
|
+#endif /* __DISABLE_AMX_FP16__ */
|
|
+
|
|
+#endif /* _AMXFP16INTRIN_H_INCLUDED */
|
|
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
|
|
index 8b3dc2b1d..d6cd8d1bf 100644
|
|
--- a/gcc/config/i386/cpuid.h
|
|
+++ b/gcc/config/i386/cpuid.h
|
|
@@ -27,6 +27,7 @@
|
|
/* %eax */
|
|
#define bit_AVXVNNI (1 << 4)
|
|
#define bit_AVX512BF16 (1 << 5)
|
|
+#define bit_AMX_FP16 (1 << 21)
|
|
#define bit_HRESET (1 << 22)
|
|
|
|
/* %ecx */
|
|
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
|
index 3fec4c7e2..4269f29e6 100644
|
|
--- a/gcc/config/i386/i386-c.cc
|
|
+++ b/gcc/config/i386/i386-c.cc
|
|
@@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
|
def_or_undef (parse_in, "__WIDEKL__");
|
|
if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI)
|
|
def_or_undef (parse_in, "__AVXVNNI__");
|
|
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP16)
|
|
+ def_or_undef (parse_in, "__AMX_FP16__");
|
|
if (TARGET_IAMCU)
|
|
{
|
|
def_or_undef (parse_in, "__iamcu");
|
|
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
|
|
index 83659d0be..c7305c01b 100644
|
|
--- a/gcc/config/i386/i386-isa.def
|
|
+++ b/gcc/config/i386/i386-isa.def
|
|
@@ -109,3 +109,4 @@ DEF_PTA(KL)
|
|
DEF_PTA(WIDEKL)
|
|
DEF_PTA(AVXVNNI)
|
|
DEF_PTA(AVX512FP16)
|
|
+DEF_PTA(AMX_FP16)
|
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
|
index 3df1f0c41..3edb7094e 100644
|
|
--- a/gcc/config/i386/i386-options.cc
|
|
+++ b/gcc/config/i386/i386-options.cc
|
|
@@ -230,7 +230,8 @@ static struct ix86_target_opts isa2_opts[] =
|
|
{ "-mkl", OPTION_MASK_ISA2_KL },
|
|
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
|
|
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
|
- { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }
|
|
+ { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
|
|
+ { "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 }
|
|
};
|
|
static struct ix86_target_opts isa_opts[] =
|
|
{
|
|
@@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
|
IX86_ATTR_ISA ("hreset", OPT_mhreset),
|
|
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
|
|
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
|
+ IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
|
|
|
|
/* enum options */
|
|
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
|
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
|
index b154110d8..52c6f02ee 100644
|
|
--- a/gcc/config/i386/i386.opt
|
|
+++ b/gcc/config/i386/i386.opt
|
|
@@ -1226,3 +1226,7 @@ Enable conservative small loop unrolling.
|
|
mscatter
|
|
Target Alias(mtune-ctrl=, use_scatter, ^use_scatter)
|
|
Enable vectorization for scatter instruction.
|
|
+
|
|
+mamx-fp16
|
|
+Target Mask(ISA2_AMX_FP16) Var(ix86_isa_flags2) Save
|
|
+Support AMX-FP16 built-in functions and code generation.
|
|
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
|
|
index 6afd78c2b..0447ca4b2 100644
|
|
--- a/gcc/config/i386/immintrin.h
|
|
+++ b/gcc/config/i386/immintrin.h
|
|
@@ -128,4 +128,6 @@
|
|
|
|
#include <keylockerintrin.h>
|
|
|
|
+#include <amxfp16intrin.h>
|
|
+
|
|
#endif /* _IMMINTRIN_H_INCLUDED */
|
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
|
index 33a776a79..4ba9d34cd 100644
|
|
--- a/gcc/doc/extend.texi
|
|
+++ b/gcc/doc/extend.texi
|
|
@@ -7038,6 +7038,11 @@ Enable/disable the generation of the WIDEKL instructions.
|
|
@cindex @code{target("avxvnni")} function attribute, x86
|
|
Enable/disable the generation of the AVXVNNI instructions.
|
|
|
|
+@item amx-fp16
|
|
+@itemx no-amx-fp16
|
|
+@cindex @code{target("amx-fp16")} function attribute, x86
|
|
+Enable/disable the generation of the AMX-FP16 instructions.
|
|
+
|
|
@item cld
|
|
@itemx no-cld
|
|
@cindex @code{target("cld")} function attribute, x86
|
|
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
|
index 3a48655e5..d25f13217 100644
|
|
--- a/gcc/doc/invoke.texi
|
|
+++ b/gcc/doc/invoke.texi
|
|
@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options.
|
|
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
|
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
|
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
|
--mavx512fp16 @gol
|
|
+-mavx512fp16 -mamx-fp16 @gol
|
|
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
|
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
|
-mkl -mwidekl @gol
|
|
@@ -32442,6 +32442,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
|
@need 200
|
|
@itemx -mwidekl
|
|
@opindex mwidekl
|
|
+@need 200
|
|
+@itemx -mamx-fp16
|
|
+@opindex mamx-fp16
|
|
These switches enable the use of instructions in the MMX, SSE,
|
|
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
|
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
|
@@ -32451,8 +32454,8 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
|
|
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
|
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
|
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
|
-UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16
|
|
-or CLDEMOTE extended instruction sets. Each has a corresponding
|
|
+UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
|
|
+AMX-FP16 or CLDEMOTE extended instruction sets. Each has a corresponding
|
|
@option{-mno-} option to disable use of these instructions.
|
|
|
|
These extensions are also available as built-in functions: see
|
|
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
|
|
index 71c04841d..b64b62dee 100644
|
|
--- a/gcc/doc/sourcebuild.texi
|
|
+++ b/gcc/doc/sourcebuild.texi
|
|
@@ -2472,6 +2472,9 @@ Target supports the execution of @code{amx-int8} instructions.
|
|
@item amx_bf16
|
|
Target supports the execution of @code{amx-bf16} instructions.
|
|
|
|
+@item amx_fp16
|
|
+Target supports the execution of @code{amx-fp16} instructions.
|
|
+
|
|
@item cell_hw
|
|
Test system can execute AltiVec and Cell PPU instructions.
|
|
|
|
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
|
|
index fba3d1ac6..57a6357aa 100644
|
|
--- a/gcc/testsuite/g++.dg/other/i386-2.C
|
|
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
|
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
|
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
|
|
|
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
|
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
|
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
|
|
index 5cc0fa834..1947547d6 100644
|
|
--- a/gcc/testsuite/g++.dg/other/i386-3.C
|
|
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
|
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
|
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
|
|
|
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
|
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
|
|
index 6fff5ff46..27dd37bf9 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
|
|
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
|
|
@@ -213,6 +213,9 @@ main ()
|
|
#ifdef AMX_BF16
|
|
&& __builtin_cpu_supports ("amx-bf16")
|
|
#endif
|
|
+#ifdef AMX_FP16
|
|
+ && __builtin_cpu_supports ("amx-fp16")
|
|
+#endif
|
|
#ifdef __linux__
|
|
&& request_perm_xtile_data ()
|
|
#endif
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
|
new file mode 100644
|
|
index 000000000..fe24d7067
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
|
@@ -0,0 +1,61 @@
|
|
+#ifndef AMX_HELPER_H_INCLUDED
|
|
+#define AMX_HELPER_H_INCLUDED
|
|
+#if defined(AMX_FP16)
|
|
+#include <immintrin.h>
|
|
+#include <xmmintrin.h>
|
|
+#endif
|
|
+#include "amx-check.h"
|
|
+
|
|
+typedef union
|
|
+{
|
|
+ _Float16 f16;
|
|
+ uint16_t u;
|
|
+} union16f_uw;
|
|
+
|
|
+#if defined(AMX_FP16)
|
|
+/* Transformation functions between fp16/float */
|
|
+static uint16_t make_f32_fp16 (float f)
|
|
+{
|
|
+ union16f_uw tmp;
|
|
+ __m128 b = _mm_set_ss (f);
|
|
+ __m128h a;
|
|
+ tmp.f16 = _mm_cvtsh_h (_mm_cvtss_sh (a, b));
|
|
+ return tmp.u;
|
|
+}
|
|
+
|
|
+static float make_fp16_f32 (uint16_t fp)
|
|
+{
|
|
+ union16f_uw tmp;
|
|
+ tmp.u = fp;
|
|
+ __m128h b = _mm_set_sh (tmp.f16);
|
|
+ __m128 a;
|
|
+ return _mm_cvtss_f32 (_mm_cvtsh_ss (a, b));
|
|
+}
|
|
+
|
|
+/* Init tile buffer with fp16 pairs */
|
|
+void init_fp16_max_tile_buffer (uint8_t* buf)
|
|
+{
|
|
+ int i, j;
|
|
+ uint16_t* ptr = (uint16_t *) buf;
|
|
+
|
|
+ for (i = 0; i < 16; i++)
|
|
+ for (j = 0; j < 32; j++)
|
|
+ {
|
|
+ float f = 2.5f * i + 1.25f * j;
|
|
+ ptr[i * 32 + j] = make_f32_fp16 (f);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Init tile fp16 pair buffer with zero */
|
|
+void init_fp16_max_tile_zero_buffer (uint8_t* buf)
|
|
+{
|
|
+ int i, j;
|
|
+ uint16_t* ptr = (uint16_t *) buf;
|
|
+
|
|
+ for (i = 0; i < 16; i++)
|
|
+ for (j = 0; j < 32; j++)
|
|
+ ptr[i * 32 + j] = make_f32_fp16 (0.0f);
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
|
|
new file mode 100644
|
|
index 000000000..09ae6d408
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmatt-1.c
|
|
@@ -0,0 +1,13 @@
|
|
+/* { dg-do compile { target { ! ia32 } } } */
|
|
+/* { dg-options "-O2 -mamx-fp16" } */
|
|
+/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
|
+#include <immintrin.h>
|
|
+
|
|
+#define TMM1 1
|
|
+#define TMM2 2
|
|
+#define TMM3 3
|
|
+
|
|
+void TEST ()
|
|
+{
|
|
+ _tile_dpfp16ps (TMM1, TMM2, TMM3);
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
|
|
new file mode 100644
|
|
index 000000000..a8dff945f
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-asmintel-1.c
|
|
@@ -0,0 +1,10 @@
|
|
+/* { dg-do compile { target { ! ia32 } } } */
|
|
+/* { dg-require-effective-target masm_intel } */
|
|
+/* { dg-options "-O2 -mamx-fp16 -masm=intel" } */
|
|
+/* { dg-final { scan-assembler "tdpfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
|
+#include <immintrin.h>
|
|
+
|
|
+void TEST ()
|
|
+{
|
|
+ _tile_dpfp16ps (1, 2, 3);
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
|
|
new file mode 100644
|
|
index 000000000..2d359a689
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxfp16-dpfp16ps-2.c
|
|
@@ -0,0 +1,57 @@
|
|
+/* { dg-do run { target { ! ia32 } } } */
|
|
+/* { dg-require-effective-target amx_tile } */
|
|
+/* { dg-require-effective-target amx_fp16 } */
|
|
+/* { dg-require-effective-target avx512fp16 } */
|
|
+/* { dg-options "-O2 -mamx-tile -mamx-fp16 -mavx512fp16" } */
|
|
+#define AMX_FP16
|
|
+#define DO_TEST test_amx_fp16_dpfp16ps
|
|
+void test_amx_fp16_dpfp16ps ();
|
|
+#include "amx-helper.h"
|
|
+
|
|
+void calc_matrix_dpfp16ps (__tile *dst, __tile *src1, __tile *src2)
|
|
+{
|
|
+ uint16_t *src1_buf = (uint16_t *)src1->buf;
|
|
+ uint16_t *src2_buf = (uint16_t *)src2->buf;
|
|
+ float *dst_buf = (float *)dst->buf;
|
|
+
|
|
+ int M = src1->rows;
|
|
+ int N = src1->colsb / 4;
|
|
+ int K = src2->colsb / 4;
|
|
+ int i, j, k, t;
|
|
+
|
|
+ for (i = 0; i < M; i++)
|
|
+ for (j = 0; j < N; j++)
|
|
+ for (k = 0; k < K; k++)
|
|
+ for (t = 0; t < 2; t+=2)
|
|
+ {
|
|
+ dst_buf[i * K + k] +=
|
|
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t]) *
|
|
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t])) +
|
|
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t + 1]) *
|
|
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t + 1]));
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+void test_amx_fp16_dpfp16ps ()
|
|
+{
|
|
+ __tilecfg_u cfg;
|
|
+ __tile dst, dst_ref, src1, src2;
|
|
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
|
|
+
|
|
+ init_fp16_max_tile_buffer (tmp_dst_buf);
|
|
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
|
|
+
|
|
+ init_tile_config (&cfg);
|
|
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
|
|
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
|
|
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
|
|
+
|
|
+ calc_matrix_dpfp16ps (&dst, &src1, &src2);
|
|
+
|
|
+ _tile_dpfp16ps (1, 2, 3);
|
|
+ _tile_stored (1, dst_ref.buf, _STRIDE);
|
|
+
|
|
+ if (!check_float_tile_register (&dst_ref, &dst))
|
|
+ abort ();
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
|
index f34e7a977..b00cfff03 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
|
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
|
@@ -80,6 +80,7 @@ extern void test_keylocker (void) __attribute__((__target__("kl")));
|
|
extern void test_widekl (void) __attribute__((__target__("widekl")));
|
|
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
|
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
|
+extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
|
|
|
|
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
|
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
|
@@ -161,6 +162,7 @@ extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
|
|
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
|
|
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
|
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
|
+extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
|
|
|
|
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
|
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
|
|
index 375d4d1b4..9ab4a7e0c 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
|
|
@@ -3,7 +3,7 @@
|
|
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
|
with -O -std=c89 -pedantic-errors. */
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */
|
|
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16" } */
|
|
|
|
#include <x86intrin.h>
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
|
|
index e285c307d..a1e453a98 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
|
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
#include <mm_malloc.h>
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
|
|
index f41493b93..eaa1a8d81 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
|
|
@@ -1,5 +1,5 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */
|
|
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
#include <mm_malloc.h>
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
|
|
index 31492ef36..19afe639d 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
|
|
@@ -103,7 +103,7 @@
|
|
|
|
|
|
#ifndef DIFFERENT_PRAGMAS
|
|
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
|
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
|
#endif
|
|
|
|
/* Following intrinsics require immediate arguments. They
|
|
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
|
|
|
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
|
#ifdef DIFFERENT_PRAGMAS
|
|
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
|
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
|
#endif
|
|
#include <immintrin.h>
|
|
test_1 (_cvtss_sh, unsigned short, float, 1)
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
|
|
index b398fd144..151201d97 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
|
|
@@ -843,6 +843,6 @@
|
|
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
|
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
|
|
|
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16")
|
|
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
|
|
|
#include <x86intrin.h>
|
|
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
|
index c858bd93b..0d83c780c 100644
|
|
--- a/gcc/testsuite/lib/target-supports.exp
|
|
+++ b/gcc/testsuite/lib/target-supports.exp
|
|
@@ -9972,6 +9972,17 @@ proc check_effective_target_amx_bf16 { } {
|
|
} "-mamx-bf16" ]
|
|
}
|
|
|
|
+# Return 1 if amx-fp16 instructions can be compiled.
|
|
+proc check_effective_target_amx_fp16 { } {
|
|
+ return [check_no_compiler_messages amx_fp16 object {
|
|
+ void
|
|
+ foo ()
|
|
+ {
|
|
+ __asm__ volatile ("tdpfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
|
|
+ }
|
|
+ } "-mamx-fp16" ]
|
|
+}
|
|
+
|
|
# Return 1 if vpclmulqdq instructions can be compiled.
|
|
proc check_effective_target_vpclmulqdq { } {
|
|
return [check_no_compiler_messages vpclmulqdq object {
|
|
--
|
|
2.28.0.windows.1
|
|
|