[Sync] Sync patch from openeuler/gcc

This commit is contained in:
wangding16 2024-04-24 12:47:45 +08:00
parent baeac561ff
commit 79c5f73966
9 changed files with 914 additions and 0 deletions

View File

@ -0,0 +1,30 @@
From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001
From: Haochen Jiang <haochen.jiang@intel.com>
Date: Fri, 31 Mar 2023 10:49:14 +0800
Subject: [PATCH 24/32] i386: Add AMX-COMPLEX to Granite Rapids
gcc/Changelog:
* config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX.
(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e)
---
gcc/config/i386/i386.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 75953defc..56d7794dc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
- | PTA_PREFETCHI;
+ | PTA_PREFETCHI | PTA_AMX_COMPLEX;
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
--
2.28.0.windows.1

View File

@ -0,0 +1,212 @@
From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001
From: "Mo, Zewei" <zewei.mo@intel.com>
Date: Mon, 3 Jul 2023 11:00:26 +0800
Subject: [PATCH 25/32] Initial Granite Rapids D Support
gcc/ChangeLog:
* common/config/i386/cpuinfo.h
(get_intel_cpu): Handle Granite Rapids D.
* common/config/i386/i386-common.cc:
(processor_alias_table): Add graniterapids-d.
* common/config/i386/i386-cpuinfo.h
(enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D.
* config.gcc: Add -march=graniterapids-d.
* config/i386/driver-i386.cc (host_detect_local_cpu):
Handle graniterapids-d.
* config/i386/i386.h: (PTA_GRANITERAPIDS_D): New.
* doc/extend.texi: Add graniterapids-d.
* doc/invoke.texi: Ditto.
gcc/testsuite/ChangeLog:
* g++.target/i386/mv16.C: Add graniterapids-d.
* gcc.target/i386/funcspec-56.inc: Handle new march.
(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d)
---
gcc/common/config/i386/cpuinfo.h | 9 ++++++++-
gcc/common/config/i386/i386-common.cc | 2 ++
gcc/common/config/i386/i386-cpuinfo.h | 1 +
gcc/config.gcc | 3 ++-
gcc/config/i386/driver-i386.cc | 5 ++++-
gcc/config/i386/i386.h | 4 +++-
gcc/doc/extend.texi | 3 +++
gcc/doc/invoke.texi | 11 +++++++++++
gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 +
10 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 39d3351db..1e53248ef 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
break;
case 0xad:
- case 0xae:
/* Granite Rapids. */
cpu = "graniterapids";
CHECK___builtin_cpu_is ("corei7");
@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model,
cpu_model->__cpu_type = INTEL_COREI7;
cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
break;
+ case 0xae:
+ /* Granite Rapids D. */
+ cpu = "graniterapids-d";
+ CHECK___builtin_cpu_is ("corei7");
+ CHECK___builtin_cpu_is ("graniterapids-d");
+ cpu_model->__cpu_type = INTEL_COREI7;
+ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D;
+ break;
case 0x17:
case 0x1d:
/* Penryn. */
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 87e8afe9b..28f468f48 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1993,6 +1993,8 @@ const pta processor_alias_table[] =
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
{"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
+ {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
+ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
index 56020faac..a32f32c97 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -93,6 +93,7 @@ enum processor_subtypes
INTEL_COREI7_ROCKETLAKE,
AMDFAM19H_ZNVER4,
INTEL_COREI7_GRANITERAPIDS,
+ INTEL_COREI7_GRANITERAPIDS_D,
CPU_SUBTYPE_MAX
};
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ca5c8f8a0..3108ac4eb 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \
+graniterapids-d native"
# Additional x86 processors supported by --with-cpu=. Each processor
# MUST be separated by exactly one space.
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index ea8c3d8d1..e3bca4b49 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* This is unknown family 0x6 CPU. */
if (has_feature (FEATURE_AVX))
{
+ /* Assume Granite Rapids D. */
+ if (has_feature (FEATURE_AMX_COMPLEX))
+ cpu = "graniterapids-d";
/* Assume Granite Rapids. */
- if (has_feature (FEATURE_AMX_FP16))
+ else if (has_feature (FEATURE_AMX_FP16))
cpu = "graniterapids";
/* Assume Tiger Lake */
else if (has_feature (FEATURE_AVX512VP2INTERSECT))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 56d7794dc..eda3e5e5b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
- | PTA_PREFETCHI | PTA_AMX_COMPLEX;
+ | PTA_PREFETCHI;
+constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
+ | PTA_AMX_COMPLEX;
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d7b0bc802..674db2f1a 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU.
@item graniterapids
Intel Core i7 graniterapids CPU.
+@item graniterapids-d
+Intel Core i7 graniterapids D CPU.
+
@item bonnell
Intel Atom Bonnell CPU.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 186b33481..a2ec060fd 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
+@item graniterapids-d
+Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
+AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
+AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
+VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
+MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
+SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
+AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
+
@item k6
AMD K6 CPU with MMX instruction set support.
diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C
index 65cc24f32..17b1fc722 100644
--- a/gcc/testsuite/g++.target/i386/mv16.C
+++ b/gcc/testsuite/g++.target/i386/mv16.C
@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () {
return 26;
}
+int __attribute__ ((target("arch=graniterapids-d"))) foo () {
+ return 28;
+}
+
int main ()
{
int val = foo ();
@@ -136,6 +140,8 @@ int main ()
assert (val == 24);
else if (__builtin_cpu_is ("graniterapids"))
assert (val == 25);
+ else if (__builtin_cpu_is ("graniterapids-d"))
+ assert (val == 26);
else
assert (val == 0);
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 1a2f3b83d..f0f3397a7 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -191,6 +191,7 @@ extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapp
extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake")));
extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake")));
extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids")));
+extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=graniterapids-d")));
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));
extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron")));
--
2.28.0.windows.1

View File

@ -0,0 +1,48 @@
From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001
From: Haochen Jiang <haochen.jiang@intel.com>
Date: Thu, 20 Jul 2023 10:47:18 +0800
Subject: [PATCH 26/32] Correct Granite Rapids{, D} documentation
gcc/Changelog:
* doc/invoke.texi: Remove AVX512VP2INTERSECT in
Granite Rapids{, D} from documentation.
(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9)
---
gcc/doc/invoke.texi | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a2ec060fd..4d3eccdb2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16
+and PREFETCHI instruction set support.
@item graniterapids-d
Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16,
+PREFETCHI and AMX-COMPLEX instruction set support.
@item k6
AMD K6 CPU with MMX instruction set support.
--
2.28.0.windows.1

View File

@ -0,0 +1,30 @@
From 62852213bc6d3e56804ca05826bb95a3a2fe4eba Mon Sep 17 00:00:00 2001
From: "Hu, Lin1" <lin1.hu@intel.com>
Date: Thu, 15 Dec 2022 15:51:18 +0800
Subject: [PATCH 27/32] i386: Remove Meteorlake's family_model
gcc/ChangeLog:
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5
for meteorlake.
(cherry picked from commit 9e74b7ec0b218364905e3e7de5c41e8148ffc61b)
---
gcc/common/config/i386/cpuinfo.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 1e53248ef..348bc0c12 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -510,7 +510,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
/* Alder Lake. */
case 0xb7:
/* Raptor Lake. */
- case 0xb5:
case 0xaa:
case 0xac:
/* Meteor Lake. */
--
2.28.0.windows.1

View File

@ -0,0 +1,33 @@
From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001
From: "Cui, Lili" <lili.cui@intel.com>
Date: Thu, 29 Jun 2023 03:10:35 +0000
Subject: [PATCH 28/32] x86: Update model values for Alderlake, Rocketlake and
Raptorlake.
Update model values for Alderlake, Rocketlake and Raptorlake according to SDM.
gcc/ChangeLog
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
from Rocketlake, move model value 0xbf from Alderlake to Raptorlake.
(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335)
---
gcc/common/config/i386/cpuinfo.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 348bc0c12..f9bcb6fad 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
case 0x9a:
/* Alder Lake. */
case 0xb7:
+ case 0xbf:
/* Raptor Lake. */
case 0xaa:
case 0xac:
--
2.28.0.windows.1

View File

@ -0,0 +1,32 @@
From 3dbe28984e0f9c24d6670cfba42983bc32c08b0a Mon Sep 17 00:00:00 2001
From: "Cui, Lili" <lili.cui@intel.com>
Date: Mon, 14 Aug 2023 02:06:00 +0000
Subject: [PATCH 29/32] x86: Update model values for Raptorlake.
Update model values for Raptorlake according to SDM.
gcc/ChangeLog
* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba
to Raptorlake.
(cherry picked from commit 614052dd4ea083e086712809c754ffebd9361316)
---
gcc/common/config/i386/cpuinfo.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index f9bcb6fad..da1568fd1 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
case 0x9a:
/* Alder Lake. */
case 0xb7:
+ case 0xba:
case 0xbf:
/* Raptor Lake. */
case 0xaa:
--
2.28.0.windows.1

View File

@ -0,0 +1,159 @@
From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 22 Aug 2023 18:18:31 +0800
Subject: [PATCH 30/32] Fix target_clone ("arch=graniterapids-d")
Both "graniterapid-d" and "graniterapids" are attached with
PROCESSOR_GRANITERAPID in processor_alias_table but mapped to
different __cpu_subtype in get_intel_cpu.
And get_builtin_code_for_version will try to match the first
PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to
"granitepraids" here.
861 else if (new_target->arch_specified && new_target->arch > 0)
1862 for (i = 0; i < pta_size; i++)
1863 if (processor_alias_table[i].processor == new_target->arch)
1864 {
1865 const pta *arch_info = &processor_alias_table[i];
1866 switch (arch_info->priority)
1867 {
1868 default:
1869 arg_str = arch_info->name;
This mismatch makes dispatch_function_versions check the preidcate
of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes
the issue.
The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction.
For "alderlake","raptorlake", "meteorlake" they share same isa, cost,
tuning, and mapped to the same __cpu_type/__cpu_subtype in
get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others.
gcc/ChangeLog:
* common/config/i386/i386-common.cc (processor_names): Add new
member graniterapids-s.
* config/i386/i386-options.cc (processor_alias_table): Update
table with and PROCESSOR_GRANITERAPIDS_D.
(m_GRANITERAPID_D): New macro.
(m_CORE_AVX512): Add m_GRANITERAPIDS_D.
(processor_cost_table): Add icelake_cost for
PROCESSOR_GRANITERAPIDS_D.
* config/i386/i386.h (enum processor_type): Add new member
PROCESSOR_GRANITERAPIDS_D.
* config/i386/i386-c.cc (ix86_target_macros_internal): Handle
PROCESSOR_GRANITERAPIDS_D
---
gcc/common/config/i386/i386-common.cc | 6 ++++--
gcc/config/i386/i386-c.cc | 8 ++++++++
gcc/config/i386/i386-options.cc | 4 +++-
gcc/config/i386/i386.h | 3 ++-
4 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 28f468f48..bec6801ce 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1873,6 +1873,7 @@ const char *const processor_names[] =
"alderlake",
"rocketlake",
"graniterapids",
+ "graniterapids-d",
"intel",
"geode",
"k6",
@@ -1993,8 +1994,9 @@ const pta processor_alias_table[] =
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
{"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
- {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
- M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
+ {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL,
+ PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D),
+ P_PROC_AVX512F},
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 5e0ac278c..49f0db2b8 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__graniterapids");
def_or_undef (parse_in, "__graniterapids__");
break;
+ case PROCESSOR_GRANITERAPIDS_D:
+ def_or_undef (parse_in, "__graniterapids_d");
+ def_or_undef (parse_in, "__graniterapids_d__");
+ break;
case PROCESSOR_ALDERLAKE:
def_or_undef (parse_in, "__alderlake");
def_or_undef (parse_in, "__alderlake__");
@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__rocketlake");
def_or_undef (parse_in, "__rocketlake__");
break;
+
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
break;
@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_GRANITERAPIDS:
def_or_undef (parse_in, "__tune_graniterapids__");
break;
+ case PROCESSOR_GRANITERAPIDS_D:
+ def_or_undef (parse_in, "__tune_graniterapids_d__");
+ break;
case PROCESSOR_INTEL:
case PROCESSOR_GENERIC:
break;
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 7efd25084..86932d719 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3. If not see
#define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
#define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
+#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D)
#define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
| m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
| m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
- | m_ROCKETLAKE | m_GRANITERAPIDS)
+ | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2)
#define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table[] =
&alderlake_cost,
&icelake_cost,
&icelake_cost,
+ &icelake_cost,
&intel_cost,
&geode_cost,
&k6_cost,
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index eda3e5e5b..5052f878d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
#define DEFAULT_LARGE_SECTION_THRESHOLD 65536
/* Which processor to tune code generation for. These must be in sync
- with processor_target_table in i386.cc. */
+ with processor_cost_table in i386-options.cc. */
enum processor_type
{
@@ -2251,6 +2251,7 @@ enum processor_type
PROCESSOR_ALDERLAKE,
PROCESSOR_ROCKETLAKE,
PROCESSOR_GRANITERAPIDS,
+ PROCESSOR_GRANITERAPIDS_D,
PROCESSOR_INTEL,
PROCESSOR_GEODE,
PROCESSOR_K6,
--
2.28.0.windows.1

View File

@ -0,0 +1,321 @@
From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001
From: "Vladimir N. Makarov" <vmakarov@redhat.com>
Date: Thu, 26 Oct 2023 09:50:40 -0400
Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences
RISCV target developers reported that pseudos with equivalence used in
a loop can be spilled. Simple changes of heuristics of cost
calculation of pseudos with equivalence or even ignoring equivalences
resulted in numerous testsuite failures on different targets or worse
spec2017 performance. This patch implements more sophisticated cost
calculations of pseudos with equivalences. The patch does not change
RA behaviour for targets still using the old reload pass instead of
LRA. The patch solves the reported problem and improves x86-64
specint2017 a bit (specfp2017 performance stays the same). The patch
takes into account how the equivalence will be used: will it be
integrated into the user insns or require an input reload insn. It
requires additional pass over insns. To compensate RA slow down, the
patch removes a pass over insns in the reload pass used by IRA before.
This also decouples IRA from reload more and will help to remove the
reload pass in the future if it ever happens.
gcc/ChangeLog:
* dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
LRA is used.
* ira-costs.cc: Include regset.h.
(equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
New functions.
(find_costs_and_classes): Call calculate_equiv_gains and redefine
mem_cost of pseudos with equivs when LRA is used.
* var-tracking.cc: Include ira.h and lra.h.
(vt_initialize): Use lra_eliminate_regs when LRA is used.
---
gcc/dwarf2out.cc | 4 +-
gcc/ira-costs.cc | 169 ++++++++++++++++++++++++++++++++++++++++++--
gcc/var-tracking.cc | 14 +++-
3 files changed, 179 insertions(+), 8 deletions(-)
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 0a5c081d8..f0f6f4fd4 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
argument pointer and soft frame pointer rtx's.
Use DW_OP_fbreg offset DW_OP_stack_value in this case. */
if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
- && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
+ && (ira_use_lra_p
+ ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
+ : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
{
dw_loc_descr_ref result = NULL;
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index 642fda529..c79311783 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
#include "tm_p.h"
#include "insn-config.h"
#include "regs.h"
+#include "regset.h"
#include "ira.h"
#include "ira-int.h"
#include "addresses.h"
@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
process_bb_for_costs (bb);
}
+/* Check that reg REGNO can be changed by TO in INSN. Return true in case the
+ result insn would be valid one. */
+static bool
+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
+{
+ validate_replace_src_group (regno_reg_rtx[regno], to, insn);
+ bool res = verify_changes (0);
+ cancel_changes (0);
+ return res;
+}
+
+/* Return true if X contains a pseudo with equivalence. In this case also
+ return the pseudo through parameter REG. If the pseudo is a part of subreg,
+ return the subreg through parameter SUBREG. */
+
+static bool
+get_equiv_regno (rtx x, int &regno, rtx &subreg)
+{
+ subreg = NULL_RTX;
+ if (GET_CODE (x) == SUBREG)
+ {
+ subreg = x;
+ x = SUBREG_REG (x);
+ }
+ if (REG_P (x)
+ && (ira_reg_equiv[REGNO (x)].memory != NULL
+ || ira_reg_equiv[REGNO (x)].constant != NULL))
+ {
+ regno = REGNO (x);
+ return true;
+ }
+ RTX_CODE code = GET_CODE (x);
+ const char *fmt = GET_RTX_FORMAT (code);
+
+ for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ if (fmt[i] == 'e')
+ {
+ if (get_equiv_regno (XEXP (x, i), regno, subreg))
+ return true;
+ }
+ else if (fmt[i] == 'E')
+ {
+ for (int j = 0; j < XVECLEN (x, i); j++)
+ if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
+ return true;
+ }
+ return false;
+}
+
+/* A pass through the current function insns. Calculate costs of using
+ equivalences for pseudos and store them in regno_equiv_gains. */
+
+static void
+calculate_equiv_gains (void)
+{
+ basic_block bb;
+ int regno, freq, cost;
+ rtx subreg;
+ rtx_insn *insn;
+ machine_mode mode;
+ enum reg_class rclass;
+ bitmap_head equiv_pseudos;
+
+ ira_assert (allocno_p);
+ bitmap_initialize (&equiv_pseudos, &reg_obstack);
+ for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
+ if (ira_reg_equiv[regno].init_insns != NULL
+ && (ira_reg_equiv[regno].memory != NULL
+ || (ira_reg_equiv[regno].constant != NULL
+ /* Ignore complicated constants which probably will be placed
+ in memory: */
+ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE
+ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR
+ && GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF)))
+ {
+ rtx_insn_list *x;
+ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
+ {
+ insn = x->insn ();
+ rtx set = single_set (insn);
+
+ if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno])
+ break;
+ bb = BLOCK_FOR_INSN (insn);
+ ira_curr_regno_allocno_map
+ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
+ mode = PSEUDO_REGNO_MODE (regno);
+ rclass = pref[COST_INDEX (regno)];
+ ira_init_register_move_cost_if_necessary (mode);
+ if (ira_reg_equiv[regno].memory != NULL)
+ cost = ira_memory_move_cost[mode][rclass][1];
+ else
+ cost = ira_register_move_cost[mode][rclass][rclass];
+ freq = REG_FREQ_FROM_BB (bb);
+ regno_equiv_gains[regno] += cost * freq;
+ }
+ if (x != NULL)
+ /* We found complicated equiv or reverse equiv mem=reg. Ignore
+ them. */
+ regno_equiv_gains[regno] = 0;
+ else
+ bitmap_set_bit (&equiv_pseudos, regno);
+ }
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ freq = REG_FREQ_FROM_BB (bb);
+ ira_curr_regno_allocno_map
+ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg)
+ || !bitmap_bit_p (&equiv_pseudos, regno))
+ continue;
+ rtx subst = ira_reg_equiv[regno].memory;
+
+ if (subst == NULL)
+ subst = ira_reg_equiv[regno].constant;
+ ira_assert (subst != NULL);
+ mode = PSEUDO_REGNO_MODE (regno);
+ ira_init_register_move_cost_if_necessary (mode);
+ bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
+
+ rclass = pref[COST_INDEX (regno)];
+ if (MEM_P (subst)
+ /* If it is a change of constant into double for example, the
+ result constant probably will be placed in memory. */
+ || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg))))
+ cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1);
+ else if (consumed_p)
+ continue;
+ else
+ cost = ira_register_move_cost[mode][rclass][rclass];
+ regno_equiv_gains[regno] -= cost * freq;
+ }
+ }
+ bitmap_clear (&equiv_pseudos);
+}
+
/* Find costs of register classes and memory for allocnos or pseudos
and their best costs. Set up preferred, alternative and allocno
classes for pseudos. */
@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file)
if (pass == 0)
pref = pref_buffer;
+ if (ira_use_lra_p && allocno_p && pass == 1)
+ /* It is a pass through all insns. So do it once and only for RA (not
+ for insn scheduler) when we already found preferable pseudo register
+ classes on the previous pass. */
+ calculate_equiv_gains ();
+
/* Now for each allocno look at how desirable each class is and
find which class is preferred. */
for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--)
@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file)
}
if (i >= first_moveable_pseudo && i < last_moveable_pseudo)
i_mem_cost = 0;
+ else if (ira_use_lra_p)
+ {
+ if (equiv_savings > 0)
+ {
+ i_mem_cost = 0;
+ if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
+ fprintf (ira_dump_file,
+ " Use MEM for r%d as the equiv savings is %d\n",
+ i, equiv_savings);
+ }
+ }
else if (equiv_savings < 0)
i_mem_cost = -equiv_savings;
else if (equiv_savings > 0)
@@ -2378,7 +2535,10 @@ ira_costs (void)
total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size
* ira_allocnos_num);
initiate_regno_cost_classes ();
- calculate_elim_costs_all_insns ();
+ if (!ira_use_lra_p)
+ /* Process equivs in reload to update costs through hook
+ ira_adjust_equiv_reg_cost. */
+ calculate_elim_costs_all_insns ();
find_costs_and_classes (ira_dump_file);
setup_allocno_class_and_costs ();
finish_regno_cost_classes ();
@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void)
}
}
-/* Add COST to the estimated gain for eliminating REGNO with its
- equivalence. If COST is zero, record that no such elimination is
- possible. */
+/* A hook from the reload pass. Add COST to the estimated gain for eliminating
+ REGNO with its equivalence. If COST is zero, record that no such
+ elimination is possible. */
void
ira_adjust_equiv_reg_cost (unsigned regno, int cost)
{
+ ira_assert (!ira_use_lra_p);
if (cost == 0)
regno_equiv_gains[regno] = 0;
else
diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc
index 7c3ad0a55..b10c8c1eb 100644
--- a/gcc/var-tracking.cc
+++ b/gcc/var-tracking.cc
@@ -107,6 +107,8 @@
#include "cfgrtl.h"
#include "cfganal.h"
#include "reload.h"
+#include "ira.h"
+#include "lra.h"
#include "calls.h"
#include "tree-dfa.h"
#include "tree-ssa.h"
@@ -10133,7 +10135,9 @@ vt_initialize (void)
#else
reg = arg_pointer_rtx;
#endif
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
+ elim = (ira_use_lra_p
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
if (elim != reg)
{
if (GET_CODE (elim) == PLUS)
@@ -10153,7 +10157,9 @@ vt_initialize (void)
reg = arg_pointer_rtx;
fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl);
#endif
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
+ elim = (ira_use_lra_p
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
if (elim != reg)
{
if (GET_CODE (elim) == PLUS)
@@ -10185,7 +10191,9 @@ vt_initialize (void)
#else
reg = arg_pointer_rtx;
#endif
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
+ elim = (ira_use_lra_p
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
if (elim != reg)
{
if (GET_CODE (elim) == PLUS)
--
2.28.0.windows.1

View File

@ -0,0 +1,49 @@
From 4965473a4211a9feb46a0d168180ab450cb18bcc Mon Sep 17 00:00:00 2001
From: "Vladimir N. Makarov" <vmakarov@redhat.com>
Date: Fri, 27 Oct 2023 08:28:24 -0400
Subject: [PATCH 32/32] Add cost calculation for reg equivalence invariants
My recent patch improving cost calculation for pseudos with equivalence
resulted in failure of gcc.target/arm/eliminate.c on aarch64. This patch
fixes this failure.
gcc/ChangeLog:
* ira-costs.cc: (get_equiv_regno, calculate_equiv_gains):
Process reg equivalence invariants.
---
gcc/ira-costs.cc | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index c79311783..d33104a30 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -1777,6 +1777,7 @@ get_equiv_regno (rtx x, int &regno, rtx &subreg)
}
if (REG_P (x)
&& (ira_reg_equiv[REGNO (x)].memory != NULL
+ || ira_reg_equiv[REGNO (x)].invariant != NULL
|| ira_reg_equiv[REGNO (x)].constant != NULL))
{
regno = REGNO (x);
@@ -1819,6 +1820,7 @@ calculate_equiv_gains (void)
for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
if (ira_reg_equiv[regno].init_insns != NULL
&& (ira_reg_equiv[regno].memory != NULL
+ || ira_reg_equiv[regno].invariant != NULL
|| (ira_reg_equiv[regno].constant != NULL
/* Ignore complicated constants which probably will be placed
in memory: */
@@ -1869,6 +1871,8 @@ calculate_equiv_gains (void)
if (subst == NULL)
subst = ira_reg_equiv[regno].constant;
+ if (subst == NULL)
+ subst = ira_reg_equiv[regno].invariant;
ira_assert (subst != NULL);
mode = PSEUDO_REGNO_MODE (regno);
ira_init_register_move_cost_if_necessary (mode);
--
2.28.0.windows.1