723 lines
37 KiB
Diff
723 lines
37 KiB
Diff
|
|
From 4f1aff10d93cabe8dfbaf076b6d826a142efb6e1 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Haochen Jiang <haochen.jiang@intel.com>
|
||
|
|
Date: Wed, 31 May 2023 10:45:00 +0800
|
||
|
|
Subject: [PATCH 23/32] Support Intel AMX-COMPLEX
|
||
|
|
|
||
|
|
gcc/ChangeLog:
|
||
|
|
|
||
|
|
* common/config/i386/cpuinfo.h (get_available_features):
|
||
|
|
Detect AMX-COMPLEX.
|
||
|
|
* common/config/i386/i386-common.cc
|
||
|
|
(OPTION_MASK_ISA2_AMX_COMPLEX_SET,
|
||
|
|
OPTION_MASK_ISA2_AMX_COMPLEX_UNSET): New.
|
||
|
|
(ix86_handle_option): Handle -mamx-complex.
|
||
|
|
* common/config/i386/i386-cpuinfo.h (enum processor_features):
|
||
|
|
Add FEATURE_AMX_COMPLEX.
|
||
|
|
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
|
||
|
|
amx-complex.
|
||
|
|
* config.gcc: Add amxcomplexintrin.h.
|
||
|
|
* config/i386/cpuid.h (bit_AMX_COMPLEX): New.
|
||
|
|
* config/i386/i386-c.cc (ix86_target_macros_internal): Define
|
||
|
|
__AMX_COMPLEX__.
|
||
|
|
* config/i386/i386-isa.def (AMX_COMPLEX): Add DEF_PTA(AMX_COMPLEX).
|
||
|
|
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
|
||
|
|
Handle amx-complex.
|
||
|
|
* config/i386/i386.opt: Add option -mamx-complex.
|
||
|
|
* config/i386/immintrin.h: Include amxcomplexintrin.h.
|
||
|
|
* doc/extend.texi: Document amx-complex.
|
||
|
|
* doc/invoke.texi: Document -mamx-complex.
|
||
|
|
* doc/sourcebuild.texi: Document target amx-complex.
|
||
|
|
* config/i386/amxcomplexintrin.h: New file.
|
||
|
|
|
||
|
|
gcc/testsuite/ChangeLog:
|
||
|
|
|
||
|
|
* g++.dg/other/i386-2.C: Add -mamx-complex.
|
||
|
|
* g++.dg/other/i386-3.C: Ditto.
|
||
|
|
* gcc.target/i386/amx-check.h: Add cpu check for AMX-COMPLEX.
|
||
|
|
* gcc.target/i386/amx-helper.h: Add amx-complex support.
|
||
|
|
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
|
||
|
|
* gcc.target/i386/sse-12.c: Add -mamx-complex.
|
||
|
|
* gcc.target/i386/sse-13.c: Ditto.
|
||
|
|
* gcc.target/i386/sse-14.c: Ditto.
|
||
|
|
* gcc.target/i386/sse-22.c: Add amx-complex.
|
||
|
|
* gcc.target/i386/sse-23.c: Ditto.
|
||
|
|
* lib/target-supports.exp (check_effective_target_amx_complex): New.
|
||
|
|
* gcc.target/i386/amxcomplex-asmatt-1.c: New test.
|
||
|
|
* gcc.target/i386/amxcomplex-asmintel-1.c: Ditto.
|
||
|
|
* gcc.target/i386/amxcomplex-cmmimfp16ps-2.c: Ditto.
|
||
|
|
* gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c: Ditto.
|
||
|
|
---
|
||
|
|
gcc/common/config/i386/cpuinfo.h | 2 +
|
||
|
|
gcc/common/config/i386/i386-common.cc | 19 +++++-
|
||
|
|
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||
|
|
gcc/common/config/i386/i386-isas.h | 2 +
|
||
|
|
gcc/config.gcc | 2 +-
|
||
|
|
gcc/config/i386/amxcomplexintrin.h | 59 +++++++++++++++++++
|
||
|
|
gcc/config/i386/cpuid.h | 1 +
|
||
|
|
gcc/config/i386/i386-c.cc | 2 +
|
||
|
|
gcc/config/i386/i386-isa.def | 1 +
|
||
|
|
gcc/config/i386/i386-options.cc | 4 +-
|
||
|
|
gcc/config/i386/i386.opt | 4 ++
|
||
|
|
gcc/config/i386/immintrin.h | 2 +
|
||
|
|
gcc/doc/extend.texi | 5 ++
|
||
|
|
gcc/doc/invoke.texi | 7 ++-
|
||
|
|
gcc/doc/sourcebuild.texi | 3 +
|
||
|
|
gcc/testsuite/g++.dg/other/i386-2.C | 2 +-
|
||
|
|
gcc/testsuite/g++.dg/other/i386-3.C | 2 +-
|
||
|
|
gcc/testsuite/gcc.target/i386/amx-check.h | 3 +
|
||
|
|
gcc/testsuite/gcc.target/i386/amx-helper.h | 4 +-
|
||
|
|
.../gcc.target/i386/amxcomplex-asmatt-1.c | 15 +++++
|
||
|
|
.../gcc.target/i386/amxcomplex-asmintel-1.c | 12 ++++
|
||
|
|
.../i386/amxcomplex-cmmimfp16ps-2.c | 53 +++++++++++++++++
|
||
|
|
.../i386/amxcomplex-cmmrlfp16ps-2.c | 53 +++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 +
|
||
|
|
gcc/testsuite/gcc.target/i386/sse-12.c | 2 +-
|
||
|
|
gcc/testsuite/gcc.target/i386/sse-13.c | 2 +-
|
||
|
|
gcc/testsuite/gcc.target/i386/sse-14.c | 2 +-
|
||
|
|
gcc/testsuite/gcc.target/i386/sse-22.c | 4 +-
|
||
|
|
gcc/testsuite/gcc.target/i386/sse-23.c | 2 +-
|
||
|
|
gcc/testsuite/lib/target-supports.exp | 11 ++++
|
||
|
|
30 files changed, 268 insertions(+), 15 deletions(-)
|
||
|
|
create mode 100644 gcc/config/i386/amxcomplexintrin.h
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
|
||
|
|
|
||
|
|
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||
|
|
index 1f75ff1ca..39d3351db 100644
|
||
|
|
--- a/gcc/common/config/i386/cpuinfo.h
|
||
|
|
+++ b/gcc/common/config/i386/cpuinfo.h
|
||
|
|
@@ -798,6 +798,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||
|
|
{
|
||
|
|
if (eax & bit_AMX_FP16)
|
||
|
|
set_feature (FEATURE_AMX_FP16);
|
||
|
|
+ if (edx & bit_AMX_COMPLEX)
|
||
|
|
+ set_feature (FEATURE_AMX_COMPLEX);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||
|
|
index 1aa163463..87e8afe9b 100644
|
||
|
|
--- a/gcc/common/config/i386/i386-common.cc
|
||
|
|
+++ b/gcc/common/config/i386/i386-common.cc
|
||
|
|
@@ -109,6 +109,8 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
||
|
|
#define OPTION_MASK_ISA2_AMX_FP16_SET OPTION_MASK_ISA2_AMX_FP16
|
||
|
|
#define OPTION_MASK_ISA2_PREFETCHI_SET OPTION_MASK_ISA2_PREFETCHI
|
||
|
|
+#define OPTION_MASK_ISA2_AMX_COMPLEX_SET \
|
||
|
|
+ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX)
|
||
|
|
|
||
|
|
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||
|
|
as -msse4.2. */
|
||
|
|
@@ -269,7 +271,8 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
#define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
|
||
|
|
#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
|
||
|
|
#define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
|
||
|
|
-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
|
||
|
|
+#define OPTION_MASK_ISA2_AMX_TILE_UNSET \
|
||
|
|
+ (OPTION_MASK_ISA2_AMX_TILE | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET)
|
||
|
|
#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
|
||
|
|
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
|
||
|
|
#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
|
||
|
|
@@ -279,6 +282,7 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
||
|
|
#define OPTION_MASK_ISA2_AMX_FP16_UNSET OPTION_MASK_ISA2_AMX_FP16
|
||
|
|
#define OPTION_MASK_ISA2_PREFETCHI_UNSET OPTION_MASK_ISA2_PREFETCHI
|
||
|
|
+#define OPTION_MASK_ISA2_AMX_COMPLEX_UNSET OPTION_MASK_ISA2_AMX_COMPLEX
|
||
|
|
|
||
|
|
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||
|
|
as -mno-sse4.1. */
|
||
|
|
@@ -1155,6 +1159,19 @@ ix86_handle_option (struct gcc_options *opts,
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
|
||
|
|
+ case OPT_mamx_complex:
|
||
|
|
+ if (value)
|
||
|
|
+ {
|
||
|
|
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
|
||
|
|
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_SET;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
|
||
|
|
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_COMPLEX_UNSET;
|
||
|
|
+ }
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
case OPT_mfma:
|
||
|
|
if (value)
|
||
|
|
{
|
||
|
|
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||
|
|
index 7b2d4d242..56020faac 100644
|
||
|
|
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||
|
|
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||
|
|
@@ -243,6 +243,7 @@ enum processor_features
|
||
|
|
FEATURE_X86_64_V4,
|
||
|
|
FEATURE_AMX_FP16,
|
||
|
|
FEATURE_PREFETCHI,
|
||
|
|
+ FEATURE_AMX_COMPLEX,
|
||
|
|
CPU_FEATURE_MAX
|
||
|
|
};
|
||
|
|
|
||
|
|
diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h
|
||
|
|
index 6caf06249..cbef68479 100644
|
||
|
|
--- a/gcc/common/config/i386/i386-isas.h
|
||
|
|
+++ b/gcc/common/config/i386/i386-isas.h
|
||
|
|
@@ -177,4 +177,6 @@ ISA_NAMES_TABLE_START
|
||
|
|
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
|
||
|
|
ISA_NAMES_TABLE_ENTRY("amx-fp16", FEATURE_AMX_FP16, P_NONE, "-mamx-fp16")
|
||
|
|
ISA_NAMES_TABLE_ENTRY("prefetchi", FEATURE_PREFETCHI, P_NONE, "-mprefetchi")
|
||
|
|
+ ISA_NAMES_TABLE_ENTRY("amx-complex", FEATURE_AMX_COMPLEX,
|
||
|
|
+ P_NONE, "-mamx-complex")
|
||
|
|
ISA_NAMES_TABLE_END
|
||
|
|
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||
|
|
index 9bad238e3..ca5c8f8a0 100644
|
||
|
|
--- a/gcc/config.gcc
|
||
|
|
+++ b/gcc/config.gcc
|
||
|
|
@@ -424,7 +424,7 @@ i[34567]86-*-* | x86_64-*-*)
|
||
|
|
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||
|
|
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||
|
|
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
|
||
|
|
- amxfp16intrin.h prfchiintrin.h"
|
||
|
|
+ amxfp16intrin.h prfchiintrin.h amxcomplexintrin.h"
|
||
|
|
;;
|
||
|
|
ia64-*-*)
|
||
|
|
extra_headers=ia64intrin.h
|
||
|
|
diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..6ea1eca04
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/config/i386/amxcomplexintrin.h
|
||
|
|
@@ -0,0 +1,59 @@
|
||
|
|
+/* Copyright (C) 2023 Free Software Foundation, Inc.
|
||
|
|
+
|
||
|
|
+ This file is part of GCC.
|
||
|
|
+
|
||
|
|
+ GCC is free software; you can redistribute it and/or modify
|
||
|
|
+ it under the terms of the GNU General Public License as published by
|
||
|
|
+ the Free Software Foundation; either version 3, or (at your option)
|
||
|
|
+ any later version.
|
||
|
|
+
|
||
|
|
+ GCC is distributed in the hope that it will be useful,
|
||
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
|
+ GNU General Public License for more details.
|
||
|
|
+
|
||
|
|
+ Under Section 7 of GPL version 3, you are granted additional
|
||
|
|
+ permissions described in the GCC Runtime Library Exception, version
|
||
|
|
+ 3.1, as published by the Free Software Foundation.
|
||
|
|
+
|
||
|
|
+ You should have received a copy of the GNU General Public License and
|
||
|
|
+ a copy of the GCC Runtime Library Exception along with this program;
|
||
|
|
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||
|
|
+ <http://www.gnu.org/licenses/>. */
|
||
|
|
+
|
||
|
|
+#if !defined _IMMINTRIN_H_INCLUDED
|
||
|
|
+#error "Never use <amxcomplexintrin.h> directly; include <immintrin.h> instead."
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#ifndef _AMXCOMPLEXINTRIN_H_INCLUDED
|
||
|
|
+#define _AMXCOMPLEXINTRIN_H_INCLUDED
|
||
|
|
+
|
||
|
|
+#if !defined(__AMX_COMPLEX__)
|
||
|
|
+#pragma GCC push_options
|
||
|
|
+#pragma GCC target("amx-complex")
|
||
|
|
+#define __DISABLE_AMX_COMPLEX__
|
||
|
|
+#endif /* __AMX_COMPLEX__ */
|
||
|
|
+
|
||
|
|
+#if defined(__x86_64__)
|
||
|
|
+#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \
|
||
|
|
+ __asm__ volatile\
|
||
|
|
+ ("{tcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
|
||
|
|
+
|
||
|
|
+#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \
|
||
|
|
+ __asm__ volatile\
|
||
|
|
+ ("{tcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
|
||
|
|
+
|
||
|
|
+#define _tile_cmmimfp16ps(src1_dst,src2,src3) \
|
||
|
|
+ _tile_cmmimfp16ps_internal (src1_dst, src2, src3)
|
||
|
|
+
|
||
|
|
+#define _tile_cmmrlfp16ps(src1_dst,src2,src3) \
|
||
|
|
+ _tile_cmmrlfp16ps_internal (src1_dst, src2, src3)
|
||
|
|
+
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#ifdef __DISABLE_AMX_COMPLEX__
|
||
|
|
+#undef __DISABLE_AMX_COMPLEX__
|
||
|
|
+#pragma GCC pop_options
|
||
|
|
+#endif /* __DISABLE_AMX_COMPLEX__ */
|
||
|
|
+
|
||
|
|
+#endif /* _AMXCOMPLEXINTRIN_H_INCLUDED */
|
||
|
|
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
|
||
|
|
index 21100149a..530a45fad 100644
|
||
|
|
--- a/gcc/config/i386/cpuid.h
|
||
|
|
+++ b/gcc/config/i386/cpuid.h
|
||
|
|
@@ -136,6 +136,7 @@
|
||
|
|
#define bit_AMX_BF16 (1 << 22)
|
||
|
|
#define bit_AMX_TILE (1 << 24)
|
||
|
|
#define bit_AMX_INT8 (1 << 25)
|
||
|
|
+#define bit_AMX_COMPLEX (1 << 8)
|
||
|
|
|
||
|
|
/* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */
|
||
|
|
#define bit_XSAVEOPT (1 << 0)
|
||
|
|
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||
|
|
index 04f1dd682..5e0ac278c 100644
|
||
|
|
--- a/gcc/config/i386/i386-c.cc
|
||
|
|
+++ b/gcc/config/i386/i386-c.cc
|
||
|
|
@@ -644,6 +644,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||
|
|
def_or_undef (parse_in, "__AMX_FP16__");
|
||
|
|
if (isa_flag2 & OPTION_MASK_ISA2_PREFETCHI)
|
||
|
|
def_or_undef (parse_in, "__PREFETCHI__");
|
||
|
|
+ if (isa_flag2 & OPTION_MASK_ISA2_AMX_COMPLEX)
|
||
|
|
+ def_or_undef (parse_in, "__AMX_COMPLEX__");
|
||
|
|
if (TARGET_IAMCU)
|
||
|
|
{
|
||
|
|
def_or_undef (parse_in, "__iamcu");
|
||
|
|
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
|
||
|
|
index 744a7df85..7445b1bf7 100644
|
||
|
|
--- a/gcc/config/i386/i386-isa.def
|
||
|
|
+++ b/gcc/config/i386/i386-isa.def
|
||
|
|
@@ -111,3 +111,4 @@ DEF_PTA(AVXVNNI)
|
||
|
|
DEF_PTA(AVX512FP16)
|
||
|
|
DEF_PTA(AMX_FP16)
|
||
|
|
DEF_PTA(PREFETCHI)
|
||
|
|
+DEF_PTA(AMX_COMPLEX)
|
||
|
|
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||
|
|
index 6645e3259..7efd25084 100644
|
||
|
|
--- a/gcc/config/i386/i386-options.cc
|
||
|
|
+++ b/gcc/config/i386/i386-options.cc
|
||
|
|
@@ -233,7 +233,8 @@ static struct ix86_target_opts isa2_opts[] =
|
||
|
|
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
||
|
|
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
|
||
|
|
{ "-mamx-fp16", OPTION_MASK_ISA2_AMX_FP16 },
|
||
|
|
- { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI }
|
||
|
|
+ { "-mprefetchi", OPTION_MASK_ISA2_PREFETCHI },
|
||
|
|
+ { "-mamx-complex", OPTION_MASK_ISA2_AMX_COMPLEX }
|
||
|
|
};
|
||
|
|
static struct ix86_target_opts isa_opts[] =
|
||
|
|
{
|
||
|
|
@@ -1080,6 +1081,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||
|
|
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
||
|
|
IX86_ATTR_ISA ("amx-fp16", OPT_mamx_fp16),
|
||
|
|
IX86_ATTR_ISA ("prefetchi", OPT_mprefetchi),
|
||
|
|
+ IX86_ATTR_ISA ("amx-complex", OPT_mamx_complex),
|
||
|
|
|
||
|
|
/* enum options */
|
||
|
|
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||
|
|
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
|
||
|
|
index 50cd114f6..fba94f3f6 100644
|
||
|
|
--- a/gcc/config/i386/i386.opt
|
||
|
|
+++ b/gcc/config/i386/i386.opt
|
||
|
|
@@ -1234,3 +1234,7 @@ Support AMX-FP16 built-in functions and code generation.
|
||
|
|
mprefetchi
|
||
|
|
Target Mask(ISA2_PREFETCHI) Var(ix86_isa_flags2) Save
|
||
|
|
Support PREFETCHI built-in functions and code generation.
|
||
|
|
+
|
||
|
|
+mamx-complex
|
||
|
|
+Target Mask(ISA2_AMX_COMPLEX) Var(ix86_isa_flags2) Save
|
||
|
|
+Support AMX-COMPLEX built-in functions and code generation.
|
||
|
|
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
|
||
|
|
index 0447ca4b2..bd819c7f4 100644
|
||
|
|
--- a/gcc/config/i386/immintrin.h
|
||
|
|
+++ b/gcc/config/i386/immintrin.h
|
||
|
|
@@ -124,6 +124,8 @@
|
||
|
|
|
||
|
|
#include <amxbf16intrin.h>
|
||
|
|
|
||
|
|
+#include <amxcomplexintrin.h>
|
||
|
|
+
|
||
|
|
#include <prfchwintrin.h>
|
||
|
|
|
||
|
|
#include <keylockerintrin.h>
|
||
|
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||
|
|
index ba9faf4b2..d7b0bc802 100644
|
||
|
|
--- a/gcc/doc/extend.texi
|
||
|
|
+++ b/gcc/doc/extend.texi
|
||
|
|
@@ -7048,6 +7048,11 @@ Enable/disable the generation of the AMX-FP16 instructions.
|
||
|
|
@cindex @code{target("prefetchi")} function attribute, x86
|
||
|
|
Enable/disable the generation of the PREFETCHI instructions.
|
||
|
|
|
||
|
|
+@cindex @code{target("amx-complex")} function attribute, x86
|
||
|
|
+@item amx-complex
|
||
|
|
+@itemx no-amx-complex
|
||
|
|
+Enable/disable the generation of the AMX-COMPLEX instructions.
|
||
|
|
+
|
||
|
|
@item cld
|
||
|
|
@itemx no-cld
|
||
|
|
@cindex @code{target("cld")} function attribute, x86
|
||
|
|
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||
|
|
index 8ca831dc1..186b33481 100644
|
||
|
|
--- a/gcc/doc/invoke.texi
|
||
|
|
+++ b/gcc/doc/invoke.texi
|
||
|
|
@@ -1428,7 +1428,7 @@ See RS/6000 and PowerPC Options.
|
||
|
|
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||
|
|
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||
|
|
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
||
|
|
--mavx512fp16 -mamx-fp16 -mprefetchi @gol
|
||
|
|
+-mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex @gol
|
||
|
|
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||
|
|
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||
|
|
-mkl -mwidekl @gol
|
||
|
|
@@ -32459,6 +32459,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||
|
|
@need 200
|
||
|
|
@itemx -mprefetchi
|
||
|
|
@opindex mprefetchi
|
||
|
|
+@need 200
|
||
|
|
+@opindex mamx-complex
|
||
|
|
+@itemx -mamx-complex
|
||
|
|
These switches enable the use of instructions in the MMX, SSE,
|
||
|
|
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||
|
|
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||
|
|
@@ -32469,7 +32472,7 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||
|
|
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||
|
|
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||
|
|
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16,
|
||
|
|
-AMX-FP16, PREFETCHI or CLDEMOTE extended instruction sets. Each has a corresponding
|
||
|
|
+AMX-FP16, PREFETCHI, AMX-COMPLEX or CLDEMOTE extended instruction sets. Each has a corresponding
|
||
|
|
@option{-mno-} option to disable use of these instructions.
|
||
|
|
|
||
|
|
These extensions are also available as built-in functions: see
|
||
|
|
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
|
||
|
|
index c68e492dc..454fae11a 100644
|
||
|
|
--- a/gcc/doc/sourcebuild.texi
|
||
|
|
+++ b/gcc/doc/sourcebuild.texi
|
||
|
|
@@ -2472,6 +2472,9 @@ Target supports the execution of @code{amx-int8} instructions.
|
||
|
|
@item amx_bf16
|
||
|
|
Target supports the execution of @code{amx-bf16} instructions.
|
||
|
|
|
||
|
|
+@item amx_complex
|
||
|
|
+Target supports the execution of @code{amx-complex} instructions.
|
||
|
|
+
|
||
|
|
@item amx_fp16
|
||
|
|
Target supports the execution of @code{amx-fp16} instructions.
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
|
||
|
|
index 72ed5fed0..ae1b8f632 100644
|
||
|
|
--- a/gcc/testsuite/g++.dg/other/i386-2.C
|
||
|
|
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
|
||
|
|
@@ -1,5 +1,5 @@
|
||
|
|
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||
|
|
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||
|
|
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */
|
||
|
|
|
||
|
|
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||
|
|
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||
|
|
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
|
||
|
|
index 9dd53653f..783e35774 100644
|
||
|
|
--- a/gcc/testsuite/g++.dg/other/i386-3.C
|
||
|
|
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
|
||
|
|
@@ -1,5 +1,5 @@
|
||
|
|
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||
|
|
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||
|
|
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */
|
||
|
|
|
||
|
|
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||
|
|
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h
|
||
|
|
index 27dd37bf9..f1a04cf1f 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/amx-check.h
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/amx-check.h
|
||
|
|
@@ -216,6 +216,9 @@ main ()
|
||
|
|
#ifdef AMX_FP16
|
||
|
|
&& __builtin_cpu_supports ("amx-fp16")
|
||
|
|
#endif
|
||
|
|
+#ifdef AMX_COMPLEX
|
||
|
|
+ && __builtin_cpu_supports ("amx-complex")
|
||
|
|
+#endif
|
||
|
|
#ifdef __linux__
|
||
|
|
&& request_perm_xtile_data ()
|
||
|
|
#endif
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amx-helper.h b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||
|
|
index fe24d7067..6ed9f5eb3 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/amx-helper.h
|
||
|
|
@@ -1,6 +1,6 @@
|
||
|
|
#ifndef AMX_HELPER_H_INCLUDED
|
||
|
|
#define AMX_HELPER_H_INCLUDED
|
||
|
|
-#if defined(AMX_FP16)
|
||
|
|
+#if defined(AMX_FP16) || defined(AMX_COMPLEX)
|
||
|
|
#include <immintrin.h>
|
||
|
|
#include <xmmintrin.h>
|
||
|
|
#endif
|
||
|
|
@@ -12,7 +12,7 @@ typedef union
|
||
|
|
uint16_t u;
|
||
|
|
} union16f_uw;
|
||
|
|
|
||
|
|
-#if defined(AMX_FP16)
|
||
|
|
+#if defined(AMX_FP16) || defined(AMX_COMPLEX)
|
||
|
|
/* Transformation functions between fp16/float */
|
||
|
|
static uint16_t make_f32_fp16 (float f)
|
||
|
|
{
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..b6745e34b
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmatt-1.c
|
||
|
|
@@ -0,0 +1,15 @@
|
||
|
|
+/* { dg-do compile { target { ! ia32 } } } */
|
||
|
|
+/* { dg-options "-O2 -mamx-complex" } */
|
||
|
|
+/* { dg-final { scan-assembler "tcmmimfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||
|
|
+/* { dg-final { scan-assembler "tcmmrlfp16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */
|
||
|
|
+#include <immintrin.h>
|
||
|
|
+
|
||
|
|
+#define TMM1 1
|
||
|
|
+#define TMM2 2
|
||
|
|
+#define TMM3 3
|
||
|
|
+
|
||
|
|
+void TEST()
|
||
|
|
+{
|
||
|
|
+ _tile_cmmimfp16ps (TMM1, TMM2, TMM3);
|
||
|
|
+ _tile_cmmrlfp16ps (TMM1, TMM2, TMM3);
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..305465e88
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-asmintel-1.c
|
||
|
|
@@ -0,0 +1,12 @@
|
||
|
|
+/* { dg-do compile { target { ! ia32 } } } */
|
||
|
|
+/* { dg-require-effective-target masm_intel } */
|
||
|
|
+/* { dg-options "-O2 -mamx-complex -masm=intel" } */
|
||
|
|
+/* { dg-final { scan-assembler "tcmmimfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||
|
|
+/* { dg-final { scan-assembler "tcmmrlfp16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */
|
||
|
|
+#include <immintrin.h>
|
||
|
|
+
|
||
|
|
+void TEST()
|
||
|
|
+{
|
||
|
|
+ _tile_cmmimfp16ps (1, 2, 3);
|
||
|
|
+ _tile_cmmrlfp16ps (1, 2, 3);
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..6e3762c9f
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmimfp16ps-2.c
|
||
|
|
@@ -0,0 +1,53 @@
|
||
|
|
+/* { dg-do run { target { ! ia32 } } } */
|
||
|
|
+/* { dg-require-effective-target amx_complex } */
|
||
|
|
+/* { dg-require-effective-target avx512fp16 } */
|
||
|
|
+/* { dg-options "-O2 -mamx-complex -mavx512fp16" } */
|
||
|
|
+#define AMX_COMPLEX
|
||
|
|
+#define DO_TEST test_amx_complex_cmmimfp16ps
|
||
|
|
+void test_amx_complex_cmmimfp16ps ();
|
||
|
|
+#include "amx-helper.h"
|
||
|
|
+
|
||
|
|
+void calc_matrix_cmmimfp16ps (__tile *dst, __tile *src1, __tile *src2)
|
||
|
|
+{
|
||
|
|
+ uint16_t *src1_buf = (uint16_t *) src1->buf;
|
||
|
|
+ uint16_t *src2_buf = (uint16_t *) src2->buf;
|
||
|
|
+ float *dst_buf = (float *) dst->buf;
|
||
|
|
+
|
||
|
|
+ int M = src1->rows;
|
||
|
|
+ int N = src1->colsb / 4;
|
||
|
|
+ int K = src2->colsb / 4;
|
||
|
|
+ int i, j, k, t;
|
||
|
|
+
|
||
|
|
+ for (i = 0; i < M; i++)
|
||
|
|
+ for (j = 0; j < N; j++)
|
||
|
|
+ for (k = 0; k < K; k++)
|
||
|
|
+ for (t = 0; t < 2; t+=2)
|
||
|
|
+ dst_buf[i * N + k] +=
|
||
|
|
+ (make_fp16_f32(src1_buf[i * 2 * N + 2 * j + t]) *
|
||
|
|
+ make_fp16_f32(src2_buf[j * 2 * K + 2 * k + t + 1])) +
|
||
|
|
+ (make_fp16_f32(src1_buf[i * 2 * N + 2 * j + t + 1]) *
|
||
|
|
+ make_fp16_f32(src2_buf[j * 2 * K + 2 * k + t]));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void test_amx_complex_cmmimfp16ps ()
|
||
|
|
+{
|
||
|
|
+ __tilecfg_u cfg;
|
||
|
|
+ __tile dst, dst_ref, src1, src2;
|
||
|
|
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
|
||
|
|
+
|
||
|
|
+ init_fp16_max_tile_buffer (tmp_dst_buf);
|
||
|
|
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
|
||
|
|
+
|
||
|
|
+ init_tile_config (&cfg);
|
||
|
|
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
|
||
|
|
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
|
||
|
|
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
|
||
|
|
+
|
||
|
|
+ calc_matrix_cmmimfp16ps (&dst, &src1, &src2);
|
||
|
|
+
|
||
|
|
+ _tile_cmmimfp16ps (1, 2, 3);
|
||
|
|
+ _tile_stored (1, dst_ref.buf, _STRIDE);
|
||
|
|
+
|
||
|
|
+ if (!check_tile_register (&dst_ref, &dst))
|
||
|
|
+ abort ();
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..15940708a
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/amxcomplex-cmmrlfp16ps-2.c
|
||
|
|
@@ -0,0 +1,53 @@
|
||
|
|
+/* { dg-do run { target { ! ia32 } } } */
|
||
|
|
+/* { dg-require-effective-target amx_complex } */
|
||
|
|
+/* { dg-require-effective-target avx512fp16 } */
|
||
|
|
+/* { dg-options "-O2 -mamx-complex -mavx512fp16" } */
|
||
|
|
+#define AMX_COMPLEX
|
||
|
|
+#define DO_TEST test_amx_complex_cmmrlfp16ps
|
||
|
|
+void test_amx_complex_cmmrlfp16ps();
|
||
|
|
+#include "amx-helper.h"
|
||
|
|
+
|
||
|
|
+void calc_matrix_cmmrlfp16ps (__tile *dst, __tile *src1, __tile *src2)
|
||
|
|
+{
|
||
|
|
+ uint16_t *src1_buf = (uint16_t *) src1->buf;
|
||
|
|
+ uint16_t *src2_buf = (uint16_t *) src2->buf;
|
||
|
|
+ float *dst_buf = (float *) dst->buf;
|
||
|
|
+
|
||
|
|
+ int M = src1->rows;
|
||
|
|
+ int N = src1->colsb / 4;
|
||
|
|
+ int K = src2->colsb / 4;
|
||
|
|
+ int i, j, k, t;
|
||
|
|
+
|
||
|
|
+ for (i = 0; i < M; i++)
|
||
|
|
+ for (j = 0; j < N; j++)
|
||
|
|
+ for (k = 0; k < K; k++)
|
||
|
|
+ for (t = 0; t < 2; t+=2)
|
||
|
|
+ dst_buf[i * N + k] +=
|
||
|
|
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t]) *
|
||
|
|
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t])) -
|
||
|
|
+ (make_fp16_f32 (src1_buf[i * 2 * N + 2 * j + t + 1]) *
|
||
|
|
+ make_fp16_f32 (src2_buf[j * 2 * K + 2 * k + t + 1]));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+void test_amx_complex_cmmrlfp16ps ()
|
||
|
|
+{
|
||
|
|
+ __tilecfg_u cfg;
|
||
|
|
+ __tile dst, dst_ref, src1, src2;
|
||
|
|
+ uint8_t tmp_dst_buf[1024], tmp_dst_zero_buf[1024];
|
||
|
|
+
|
||
|
|
+ init_fp16_max_tile_buffer (tmp_dst_buf);
|
||
|
|
+ init_fp16_max_tile_zero_buffer (tmp_dst_zero_buf);
|
||
|
|
+
|
||
|
|
+ init_tile_config (&cfg);
|
||
|
|
+ init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_zero_buf);
|
||
|
|
+ init_tile_reg_and_src_with_buffer (2, src1, tmp_dst_buf);
|
||
|
|
+ init_tile_reg_and_src_with_buffer (3, src2, tmp_dst_buf);
|
||
|
|
+
|
||
|
|
+ calc_matrix_cmmrlfp16ps (&dst, &src1, &src2);
|
||
|
|
+
|
||
|
|
+ _tile_cmmrlfp16ps (1, 2, 3);
|
||
|
|
+ _tile_stored (1, dst_ref.buf, _STRIDE);
|
||
|
|
+
|
||
|
|
+ if (!check_tile_register (&dst_ref, &dst))
|
||
|
|
+ abort ();
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||
|
|
index bdcfdbc88..1a2f3b83d 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||
|
|
@@ -82,6 +82,7 @@ extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
||
|
|
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
||
|
|
extern void test_amx_fp16 (void) __attribute__((__target__("amx-fp16")));
|
||
|
|
extern void test_prefetchi (void) __attribute__((__target__("prefetchi")));
|
||
|
|
+extern void test_amx_complex (void) __attribute__((__target__("amx-complex")));
|
||
|
|
|
||
|
|
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||
|
|
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||
|
|
@@ -165,6 +166,7 @@ extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
||
|
|
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
||
|
|
extern void test_no_amx_fp16 (void) __attribute__((__target__("no-amx-fp16")));
|
||
|
|
extern void test_no_prefetchi (void) __attribute__((__target__("no-prefetchi")));
|
||
|
|
+extern void test_no_amx_complex (void) __attribute__((__target__("no-amx-complex")));
|
||
|
|
|
||
|
|
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||
|
|
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
|
||
|
|
index 9ab4a7e0c..d2aadd506 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
|
||
|
|
@@ -3,7 +3,7 @@
|
||
|
|
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||
|
|
with -O -std=c89 -pedantic-errors. */
|
||
|
|
/* { dg-do compile } */
|
||
|
|
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16" } */
|
||
|
|
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mamx-fp16 -mamx-complex" } */
|
||
|
|
|
||
|
|
#include <x86intrin.h>
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||
|
|
index db7c0fc7a..c39382836 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
|
||
|
|
@@ -1,5 +1,5 @@
|
||
|
|
/* { dg-do compile } */
|
||
|
|
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi" } */
|
||
|
|
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mprefetchi -mamx-complex" } */
|
||
|
|
/* { dg-add-options bind_pic_locally } */
|
||
|
|
|
||
|
|
#include <mm_malloc.h>
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
|
||
|
|
index eaa1a8d81..c34ac1aec 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
|
||
|
|
@@ -1,5 +1,5 @@
|
||
|
|
/* { dg-do compile } */
|
||
|
|
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16" } */
|
||
|
|
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mamx-fp16 -mamx-complex" } */
|
||
|
|
/* { dg-add-options bind_pic_locally } */
|
||
|
|
|
||
|
|
#include <mm_malloc.h>
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
|
||
|
|
index 19afe639d..c3667b829 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
|
||
|
|
@@ -103,7 +103,7 @@
|
||
|
|
|
||
|
|
|
||
|
|
#ifndef DIFFERENT_PRAGMAS
|
||
|
|
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||
|
|
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,amx-complex")
|
||
|
|
#endif
|
||
|
|
|
||
|
|
/* Following intrinsics require immediate arguments. They
|
||
|
|
@@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
||
|
|
|
||
|
|
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||
|
|
#ifdef DIFFERENT_PRAGMAS
|
||
|
|
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16")
|
||
|
|
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,amx-complex")
|
||
|
|
#endif
|
||
|
|
#include <immintrin.h>
|
||
|
|
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||
|
|
index 741694e87..756b6eb9c 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
|
||
|
|
@@ -843,6 +843,6 @@
|
||
|
|
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||
|
|
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||
|
|
|
||
|
|
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi")
|
||
|
|
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,amx-fp16,prefetchi,amx-complex")
|
||
|
|
|
||
|
|
#include <x86intrin.h>
|
||
|
|
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||
|
|
index 0d83c780c..d404058fd 100644
|
||
|
|
--- a/gcc/testsuite/lib/target-supports.exp
|
||
|
|
+++ b/gcc/testsuite/lib/target-supports.exp
|
||
|
|
@@ -9421,6 +9421,17 @@ proc check_effective_target_avxvnni { } {
|
||
|
|
} "-mavxvnni" ]
|
||
|
|
}
|
||
|
|
|
||
|
|
+# Return 1 if amx-complex instructions can be compiled.
|
||
|
|
+proc check_effective_target_amx_complex { } {
|
||
|
|
+ return [check_no_compiler_messages amx_complex object {
|
||
|
|
+ void
|
||
|
|
+ foo ()
|
||
|
|
+ {
|
||
|
|
+ __asm__ volatile ("tcmmimfp16ps\t%%tmm1, %%tmm2, %%tmm3" ::);
|
||
|
|
+ }
|
||
|
|
+ } "-mamx-complex" ]
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
# Return 1 if sse instructions can be compiled.
|
||
|
|
proc check_effective_target_sse { } {
|
||
|
|
return [check_no_compiler_messages sse object {
|
||
|
|
--
|
||
|
|
2.28.0.windows.1
|
||
|
|
|