[Sync] Sync patch from openeuler/gcc
This commit is contained in:
parent
baeac561ff
commit
79c5f73966
30
0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
Normal file
30
0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
Normal file
@ -0,0 +1,30 @@
|
||||
From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Fri, 31 Mar 2023 10:49:14 +0800
|
||||
Subject: [PATCH 24/32] i386: Add AMX-COMPLEX to Granite Rapids
|
||||
|
||||
gcc/Changelog:
|
||||
|
||||
* config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX.
|
||||
|
||||
(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e)
|
||||
---
|
||||
gcc/config/i386/i386.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index 75953defc..56d7794dc 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
|
||||
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
|
||||
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
|
||||
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
|
||||
- | PTA_PREFETCHI;
|
||||
+ | PTA_PREFETCHI | PTA_AMX_COMPLEX;
|
||||
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
|
||||
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
|
||||
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
212
0080-Initial-Granite-Rapids-D-Support.patch
Normal file
212
0080-Initial-Granite-Rapids-D-Support.patch
Normal file
@ -0,0 +1,212 @@
|
||||
From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001
|
||||
From: "Mo, Zewei" <zewei.mo@intel.com>
|
||||
Date: Mon, 3 Jul 2023 11:00:26 +0800
|
||||
Subject: [PATCH 25/32] Initial Granite Rapids D Support
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h
|
||||
(get_intel_cpu): Handle Granite Rapids D.
|
||||
* common/config/i386/i386-common.cc:
|
||||
(processor_alias_table): Add graniterapids-d.
|
||||
* common/config/i386/i386-cpuinfo.h
|
||||
(enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D.
|
||||
* config.gcc: Add -march=graniterapids-d.
|
||||
* config/i386/driver-i386.cc (host_detect_local_cpu):
|
||||
Handle graniterapids-d.
|
||||
* config/i386/i386.h: (PTA_GRANITERAPIDS_D): New.
|
||||
* doc/extend.texi: Add graniterapids-d.
|
||||
* doc/invoke.texi: Ditto.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.target/i386/mv16.C: Add graniterapids-d.
|
||||
* gcc.target/i386/funcspec-56.inc: Handle new march.
|
||||
|
||||
(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 9 ++++++++-
|
||||
gcc/common/config/i386/i386-common.cc | 2 ++
|
||||
gcc/common/config/i386/i386-cpuinfo.h | 1 +
|
||||
gcc/config.gcc | 3 ++-
|
||||
gcc/config/i386/driver-i386.cc | 5 ++++-
|
||||
gcc/config/i386/i386.h | 4 +++-
|
||||
gcc/doc/extend.texi | 3 +++
|
||||
gcc/doc/invoke.texi | 11 +++++++++++
|
||||
gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++
|
||||
gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 +
|
||||
10 files changed, 41 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 39d3351db..1e53248ef 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS;
|
||||
break;
|
||||
case 0xad:
|
||||
- case 0xae:
|
||||
/* Granite Rapids. */
|
||||
cpu = "graniterapids";
|
||||
CHECK___builtin_cpu_is ("corei7");
|
||||
@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
cpu_model->__cpu_type = INTEL_COREI7;
|
||||
cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS;
|
||||
break;
|
||||
+ case 0xae:
|
||||
+ /* Granite Rapids D. */
|
||||
+ cpu = "graniterapids-d";
|
||||
+ CHECK___builtin_cpu_is ("corei7");
|
||||
+ CHECK___builtin_cpu_is ("graniterapids-d");
|
||||
+ cpu_model->__cpu_type = INTEL_COREI7;
|
||||
+ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D;
|
||||
+ break;
|
||||
case 0x17:
|
||||
case 0x1d:
|
||||
/* Penryn. */
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index 87e8afe9b..28f468f48 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1993,6 +1993,8 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
|
||||
+ {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
|
||||
+ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
index 56020faac..a32f32c97 100644
|
||||
--- a/gcc/common/config/i386/i386-cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/i386-cpuinfo.h
|
||||
@@ -93,6 +93,7 @@ enum processor_subtypes
|
||||
INTEL_COREI7_ROCKETLAKE,
|
||||
AMDFAM19H_ZNVER4,
|
||||
INTEL_COREI7_GRANITERAPIDS,
|
||||
+ INTEL_COREI7_GRANITERAPIDS_D,
|
||||
CPU_SUBTYPE_MAX
|
||||
};
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index ca5c8f8a0..3108ac4eb 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
|
||||
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
|
||||
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
|
||||
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
|
||||
-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native"
|
||||
+nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \
|
||||
+graniterapids-d native"
|
||||
|
||||
# Additional x86 processors supported by --with-cpu=. Each processor
|
||||
# MUST be separated by exactly one space.
|
||||
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
|
||||
index ea8c3d8d1..e3bca4b49 100644
|
||||
--- a/gcc/config/i386/driver-i386.cc
|
||||
+++ b/gcc/config/i386/driver-i386.cc
|
||||
@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
/* This is unknown family 0x6 CPU. */
|
||||
if (has_feature (FEATURE_AVX))
|
||||
{
|
||||
+ /* Assume Granite Rapids D. */
|
||||
+ if (has_feature (FEATURE_AMX_COMPLEX))
|
||||
+ cpu = "graniterapids-d";
|
||||
/* Assume Granite Rapids. */
|
||||
- if (has_feature (FEATURE_AMX_FP16))
|
||||
+ else if (has_feature (FEATURE_AMX_FP16))
|
||||
cpu = "graniterapids";
|
||||
/* Assume Tiger Lake */
|
||||
else if (has_feature (FEATURE_AVX512VP2INTERSECT))
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index 56d7794dc..eda3e5e5b 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
|
||||
| PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
|
||||
| PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
|
||||
constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
|
||||
- | PTA_PREFETCHI | PTA_AMX_COMPLEX;
|
||||
+ | PTA_PREFETCHI;
|
||||
+constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
|
||||
+ | PTA_AMX_COMPLEX;
|
||||
constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
|
||||
| PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
|
||||
constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index d7b0bc802..674db2f1a 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU.
|
||||
@item graniterapids
|
||||
Intel Core i7 graniterapids CPU.
|
||||
|
||||
+@item graniterapids-d
|
||||
+Intel Core i7 graniterapids D CPU.
|
||||
+
|
||||
@item bonnell
|
||||
Intel Atom Bonnell CPU.
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 186b33481..a2ec060fd 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
|
||||
|
||||
+@item graniterapids-d
|
||||
+Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE,
|
||||
+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
+AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
+AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
+VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
+MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
+SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
+AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
|
||||
+
|
||||
@item k6
|
||||
AMD K6 CPU with MMX instruction set support.
|
||||
|
||||
diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C
|
||||
index 65cc24f32..17b1fc722 100644
|
||||
--- a/gcc/testsuite/g++.target/i386/mv16.C
|
||||
+++ b/gcc/testsuite/g++.target/i386/mv16.C
|
||||
@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () {
|
||||
return 26;
|
||||
}
|
||||
|
||||
+int __attribute__ ((target("arch=graniterapids-d"))) foo () {
|
||||
+ return 28;
|
||||
+}
|
||||
+
|
||||
int main ()
|
||||
{
|
||||
int val = foo ();
|
||||
@@ -136,6 +140,8 @@ int main ()
|
||||
assert (val == 24);
|
||||
else if (__builtin_cpu_is ("graniterapids"))
|
||||
assert (val == 25);
|
||||
+ else if (__builtin_cpu_is ("graniterapids-d"))
|
||||
+ assert (val == 26);
|
||||
else
|
||||
assert (val == 0);
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
index 1a2f3b83d..f0f3397a7 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
|
||||
@@ -191,6 +191,7 @@ extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapp
|
||||
extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake")));
|
||||
extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake")));
|
||||
extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids")));
|
||||
+extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=graniterapids-d")));
|
||||
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
|
||||
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));
|
||||
extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron")));
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
48
0081-Correct-Granite-Rapids-D-documentation.patch
Normal file
48
0081-Correct-Granite-Rapids-D-documentation.patch
Normal file
@ -0,0 +1,48 @@
|
||||
From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001
|
||||
From: Haochen Jiang <haochen.jiang@intel.com>
|
||||
Date: Thu, 20 Jul 2023 10:47:18 +0800
|
||||
Subject: [PATCH 26/32] Correct Granite Rapids{, D} documentation
|
||||
|
||||
gcc/Changelog:
|
||||
|
||||
* doc/invoke.texi: Remove AVX512VP2INTERSECT in
|
||||
Granite Rapids{, D} from documentation.
|
||||
|
||||
(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9)
|
||||
---
|
||||
gcc/doc/invoke.texi | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index a2ec060fd..4d3eccdb2 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support.
|
||||
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
|
||||
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16
|
||||
+and PREFETCHI instruction set support.
|
||||
|
||||
@item graniterapids-d
|
||||
Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
|
||||
@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW,
|
||||
AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ,
|
||||
AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2,
|
||||
VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB,
|
||||
-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG,
|
||||
-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16,
|
||||
-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support.
|
||||
+MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK,
|
||||
+UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16,
|
||||
+PREFETCHI and AMX-COMPLEX instruction set support.
|
||||
|
||||
@item k6
|
||||
AMD K6 CPU with MMX instruction set support.
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
30
0082-i386-Remove-Meteorlake-s-family_model.patch
Normal file
30
0082-i386-Remove-Meteorlake-s-family_model.patch
Normal file
@ -0,0 +1,30 @@
|
||||
From 62852213bc6d3e56804ca05826bb95a3a2fe4eba Mon Sep 17 00:00:00 2001
|
||||
From: "Hu, Lin1" <lin1.hu@intel.com>
|
||||
Date: Thu, 15 Dec 2022 15:51:18 +0800
|
||||
Subject: [PATCH 27/32] i386: Remove Meteorlake's family_model
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5
|
||||
for meteorlake.
|
||||
|
||||
(cherry picked from commit 9e74b7ec0b218364905e3e7de5c41e8148ffc61b)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 1e53248ef..348bc0c12 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -510,7 +510,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
/* Raptor Lake. */
|
||||
- case 0xb5:
|
||||
case 0xaa:
|
||||
case 0xac:
|
||||
/* Meteor Lake. */
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -0,0 +1,33 @@
|
||||
From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001
|
||||
From: "Cui, Lili" <lili.cui@intel.com>
|
||||
Date: Thu, 29 Jun 2023 03:10:35 +0000
|
||||
Subject: [PATCH 28/32] x86: Update model values for Alderlake, Rocketlake and
|
||||
Raptorlake.
|
||||
|
||||
Update model values for Alderlake, Rocketlake and Raptorlake according to SDM.
|
||||
|
||||
gcc/ChangeLog
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8
|
||||
from Rocketlake, move model value 0xbf from Alderlake to Raptorlake.
|
||||
|
||||
(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index 348bc0c12..f9bcb6fad 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
case 0x9a:
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
+ case 0xbf:
|
||||
/* Raptor Lake. */
|
||||
case 0xaa:
|
||||
case 0xac:
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
32
0084-x86-Update-model-values-for-Raptorlake.patch
Normal file
32
0084-x86-Update-model-values-for-Raptorlake.patch
Normal file
@ -0,0 +1,32 @@
|
||||
From 3dbe28984e0f9c24d6670cfba42983bc32c08b0a Mon Sep 17 00:00:00 2001
|
||||
From: "Cui, Lili" <lili.cui@intel.com>
|
||||
Date: Mon, 14 Aug 2023 02:06:00 +0000
|
||||
Subject: [PATCH 29/32] x86: Update model values for Raptorlake.
|
||||
|
||||
Update model values for Raptorlake according to SDM.
|
||||
|
||||
gcc/ChangeLog
|
||||
|
||||
* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba
|
||||
to Raptorlake.
|
||||
|
||||
(cherry picked from commit 614052dd4ea083e086712809c754ffebd9361316)
|
||||
---
|
||||
gcc/common/config/i386/cpuinfo.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
|
||||
index f9bcb6fad..da1568fd1 100644
|
||||
--- a/gcc/common/config/i386/cpuinfo.h
|
||||
+++ b/gcc/common/config/i386/cpuinfo.h
|
||||
@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
|
||||
case 0x9a:
|
||||
/* Alder Lake. */
|
||||
case 0xb7:
|
||||
+ case 0xba:
|
||||
case 0xbf:
|
||||
/* Raptor Lake. */
|
||||
case 0xaa:
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
159
0085-Fix-target_clone-arch-graniterapids-d.patch
Normal file
159
0085-Fix-target_clone-arch-graniterapids-d.patch
Normal file
@ -0,0 +1,159 @@
|
||||
From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001
|
||||
From: liuhongt <hongtao.liu@intel.com>
|
||||
Date: Tue, 22 Aug 2023 18:18:31 +0800
|
||||
Subject: [PATCH 30/32] Fix target_clone ("arch=graniterapids-d")
|
||||
|
||||
Both "graniterapid-d" and "graniterapids" are attached with
|
||||
PROCESSOR_GRANITERAPID in processor_alias_table but mapped to
|
||||
different __cpu_subtype in get_intel_cpu.
|
||||
|
||||
And get_builtin_code_for_version will try to match the first
|
||||
PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to
|
||||
"granitepraids" here.
|
||||
|
||||
861 else if (new_target->arch_specified && new_target->arch > 0)
|
||||
1862 for (i = 0; i < pta_size; i++)
|
||||
1863 if (processor_alias_table[i].processor == new_target->arch)
|
||||
1864 {
|
||||
1865 const pta *arch_info = &processor_alias_table[i];
|
||||
1866 switch (arch_info->priority)
|
||||
1867 {
|
||||
1868 default:
|
||||
1869 arg_str = arch_info->name;
|
||||
|
||||
This mismatch makes dispatch_function_versions check the preidcate
|
||||
of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes
|
||||
the issue.
|
||||
The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction.
|
||||
|
||||
For "alderlake","raptorlake", "meteorlake" they share same isa, cost,
|
||||
tuning, and mapped to the same __cpu_type/__cpu_subtype in
|
||||
get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/i386/i386-common.cc (processor_names): Add new
|
||||
member graniterapids-s.
|
||||
* config/i386/i386-options.cc (processor_alias_table): Update
|
||||
table with and PROCESSOR_GRANITERAPIDS_D.
|
||||
(m_GRANITERAPID_D): New macro.
|
||||
(m_CORE_AVX512): Add m_GRANITERAPIDS_D.
|
||||
(processor_cost_table): Add icelake_cost for
|
||||
PROCESSOR_GRANITERAPIDS_D.
|
||||
* config/i386/i386.h (enum processor_type): Add new member
|
||||
PROCESSOR_GRANITERAPIDS_D.
|
||||
* config/i386/i386-c.cc (ix86_target_macros_internal): Handle
|
||||
PROCESSOR_GRANITERAPIDS_D
|
||||
---
|
||||
gcc/common/config/i386/i386-common.cc | 6 ++++--
|
||||
gcc/config/i386/i386-c.cc | 8 ++++++++
|
||||
gcc/config/i386/i386-options.cc | 4 +++-
|
||||
gcc/config/i386/i386.h | 3 ++-
|
||||
4 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
|
||||
index 28f468f48..bec6801ce 100644
|
||||
--- a/gcc/common/config/i386/i386-common.cc
|
||||
+++ b/gcc/common/config/i386/i386-common.cc
|
||||
@@ -1873,6 +1873,7 @@ const char *const processor_names[] =
|
||||
"alderlake",
|
||||
"rocketlake",
|
||||
"graniterapids",
|
||||
+ "graniterapids-d",
|
||||
"intel",
|
||||
"geode",
|
||||
"k6",
|
||||
@@ -1993,8 +1994,9 @@ const pta processor_alias_table[] =
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
|
||||
{"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS,
|
||||
M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F},
|
||||
- {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D,
|
||||
- M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F},
|
||||
+ {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL,
|
||||
+ PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D),
|
||||
+ P_PROC_AVX512F},
|
||||
{"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3},
|
||||
{"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL,
|
||||
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
|
||||
index 5e0ac278c..49f0db2b8 100644
|
||||
--- a/gcc/config/i386/i386-c.cc
|
||||
+++ b/gcc/config/i386/i386-c.cc
|
||||
@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__graniterapids");
|
||||
def_or_undef (parse_in, "__graniterapids__");
|
||||
break;
|
||||
+ case PROCESSOR_GRANITERAPIDS_D:
|
||||
+ def_or_undef (parse_in, "__graniterapids_d");
|
||||
+ def_or_undef (parse_in, "__graniterapids_d__");
|
||||
+ break;
|
||||
case PROCESSOR_ALDERLAKE:
|
||||
def_or_undef (parse_in, "__alderlake");
|
||||
def_or_undef (parse_in, "__alderlake__");
|
||||
@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__rocketlake");
|
||||
def_or_undef (parse_in, "__rocketlake__");
|
||||
break;
|
||||
+
|
||||
/* use PROCESSOR_max to not set/unset the arch macro. */
|
||||
case PROCESSOR_max:
|
||||
break;
|
||||
@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
case PROCESSOR_GRANITERAPIDS:
|
||||
def_or_undef (parse_in, "__tune_graniterapids__");
|
||||
break;
|
||||
+ case PROCESSOR_GRANITERAPIDS_D:
|
||||
+ def_or_undef (parse_in, "__tune_graniterapids_d__");
|
||||
+ break;
|
||||
case PROCESSOR_INTEL:
|
||||
case PROCESSOR_GENERIC:
|
||||
break;
|
||||
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
|
||||
index 7efd25084..86932d719 100644
|
||||
--- a/gcc/config/i386/i386-options.cc
|
||||
+++ b/gcc/config/i386/i386-options.cc
|
||||
@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define m_ALDERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ALDERLAKE)
|
||||
#define m_ROCKETLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ROCKETLAKE)
|
||||
#define m_GRANITERAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS)
|
||||
+#define m_GRANITERAPIDS_D (HOST_WIDE_INT_1U<<PROCESSOR_GRANITERAPIDS_D)
|
||||
#define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
|
||||
| m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
|
||||
| m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
|
||||
- | m_ROCKETLAKE | m_GRANITERAPIDS)
|
||||
+ | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
|
||||
#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
|
||||
#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2)
|
||||
#define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT)
|
||||
@@ -764,6 +765,7 @@ static const struct processor_costs *processor_cost_table[] =
|
||||
&alderlake_cost,
|
||||
&icelake_cost,
|
||||
&icelake_cost,
|
||||
+ &icelake_cost,
|
||||
&intel_cost,
|
||||
&geode_cost,
|
||||
&k6_cost,
|
||||
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
|
||||
index eda3e5e5b..5052f878d 100644
|
||||
--- a/gcc/config/i386/i386.h
|
||||
+++ b/gcc/config/i386/i386.h
|
||||
@@ -2216,7 +2216,7 @@ extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
|
||||
#define DEFAULT_LARGE_SECTION_THRESHOLD 65536
|
||||
|
||||
/* Which processor to tune code generation for. These must be in sync
|
||||
- with processor_target_table in i386.cc. */
|
||||
+ with processor_cost_table in i386-options.cc. */
|
||||
|
||||
enum processor_type
|
||||
{
|
||||
@@ -2251,6 +2251,7 @@ enum processor_type
|
||||
PROCESSOR_ALDERLAKE,
|
||||
PROCESSOR_ROCKETLAKE,
|
||||
PROCESSOR_GRANITERAPIDS,
|
||||
+ PROCESSOR_GRANITERAPIDS_D,
|
||||
PROCESSOR_INTEL,
|
||||
PROCESSOR_GEODE,
|
||||
PROCESSOR_K6,
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
321
0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
Normal file
321
0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
Normal file
@ -0,0 +1,321 @@
|
||||
From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001
|
||||
From: "Vladimir N. Makarov" <vmakarov@redhat.com>
|
||||
Date: Thu, 26 Oct 2023 09:50:40 -0400
|
||||
Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences
|
||||
|
||||
RISCV target developers reported that pseudos with equivalence used in
|
||||
a loop can be spilled. Simple changes of heuristics of cost
|
||||
calculation of pseudos with equivalence or even ignoring equivalences
|
||||
resulted in numerous testsuite failures on different targets or worse
|
||||
spec2017 performance. This patch implements more sophisticated cost
|
||||
calculations of pseudos with equivalences. The patch does not change
|
||||
RA behaviour for targets still using the old reload pass instead of
|
||||
LRA. The patch solves the reported problem and improves x86-64
|
||||
specint2017 a bit (specfp2017 performance stays the same). The patch
|
||||
takes into account how the equivalence will be used: will it be
|
||||
integrated into the user insns or require an input reload insn. It
|
||||
requires additional pass over insns. To compensate RA slow down, the
|
||||
patch removes a pass over insns in the reload pass used by IRA before.
|
||||
This also decouples IRA from reload more and will help to remove the
|
||||
reload pass in the future if it ever happens.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
|
||||
LRA is used.
|
||||
* ira-costs.cc: Include regset.h.
|
||||
(equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
|
||||
New functions.
|
||||
(find_costs_and_classes): Call calculate_equiv_gains and redefine
|
||||
mem_cost of pseudos with equivs when LRA is used.
|
||||
* var-tracking.cc: Include ira.h and lra.h.
|
||||
(vt_initialize): Use lra_eliminate_regs when LRA is used.
|
||||
---
|
||||
gcc/dwarf2out.cc | 4 +-
|
||||
gcc/ira-costs.cc | 169 ++++++++++++++++++++++++++++++++++++++++++--
|
||||
gcc/var-tracking.cc | 14 +++-
|
||||
3 files changed, 179 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
|
||||
index 0a5c081d8..f0f6f4fd4 100644
|
||||
--- a/gcc/dwarf2out.cc
|
||||
+++ b/gcc/dwarf2out.cc
|
||||
@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
|
||||
argument pointer and soft frame pointer rtx's.
|
||||
Use DW_OP_fbreg offset DW_OP_stack_value in this case. */
|
||||
if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
|
||||
- && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
|
||||
+ && (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
|
||||
{
|
||||
dw_loc_descr_ref result = NULL;
|
||||
|
||||
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
|
||||
index 642fda529..c79311783 100644
|
||||
--- a/gcc/ira-costs.cc
|
||||
+++ b/gcc/ira-costs.cc
|
||||
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tm_p.h"
|
||||
#include "insn-config.h"
|
||||
#include "regs.h"
|
||||
+#include "regset.h"
|
||||
#include "ira.h"
|
||||
#include "ira-int.h"
|
||||
#include "addresses.h"
|
||||
@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
|
||||
process_bb_for_costs (bb);
|
||||
}
|
||||
|
||||
+/* Check that reg REGNO can be changed by TO in INSN. Return true in case the
|
||||
+ result insn would be valid one. */
|
||||
+static bool
|
||||
+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
|
||||
+{
|
||||
+ validate_replace_src_group (regno_reg_rtx[regno], to, insn);
|
||||
+ bool res = verify_changes (0);
|
||||
+ cancel_changes (0);
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* Return true if X contains a pseudo with equivalence. In this case also
|
||||
+ return the pseudo through parameter REG. If the pseudo is a part of subreg,
|
||||
+ return the subreg through parameter SUBREG. */
|
||||
+
|
||||
+static bool
|
||||
+get_equiv_regno (rtx x, int ®no, rtx &subreg)
|
||||
+{
|
||||
+ subreg = NULL_RTX;
|
||||
+ if (GET_CODE (x) == SUBREG)
|
||||
+ {
|
||||
+ subreg = x;
|
||||
+ x = SUBREG_REG (x);
|
||||
+ }
|
||||
+ if (REG_P (x)
|
||||
+ && (ira_reg_equiv[REGNO (x)].memory != NULL
|
||||
+ || ira_reg_equiv[REGNO (x)].constant != NULL))
|
||||
+ {
|
||||
+ regno = REGNO (x);
|
||||
+ return true;
|
||||
+ }
|
||||
+ RTX_CODE code = GET_CODE (x);
|
||||
+ const char *fmt = GET_RTX_FORMAT (code);
|
||||
+
|
||||
+ for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
||||
+ if (fmt[i] == 'e')
|
||||
+ {
|
||||
+ if (get_equiv_regno (XEXP (x, i), regno, subreg))
|
||||
+ return true;
|
||||
+ }
|
||||
+ else if (fmt[i] == 'E')
|
||||
+ {
|
||||
+ for (int j = 0; j < XVECLEN (x, i); j++)
|
||||
+ if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/* A pass through the current function insns. Calculate costs of using
|
||||
+ equivalences for pseudos and store them in regno_equiv_gains. */
|
||||
+
|
||||
+static void
|
||||
+calculate_equiv_gains (void)
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+ int regno, freq, cost;
|
||||
+ rtx subreg;
|
||||
+ rtx_insn *insn;
|
||||
+ machine_mode mode;
|
||||
+ enum reg_class rclass;
|
||||
+ bitmap_head equiv_pseudos;
|
||||
+
|
||||
+ ira_assert (allocno_p);
|
||||
+ bitmap_initialize (&equiv_pseudos, ®_obstack);
|
||||
+ for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
|
||||
+ if (ira_reg_equiv[regno].init_insns != NULL
|
||||
+ && (ira_reg_equiv[regno].memory != NULL
|
||||
+ || (ira_reg_equiv[regno].constant != NULL
|
||||
+ /* Ignore complicated constants which probably will be placed
|
||||
+ in memory: */
|
||||
+ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE
|
||||
+ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR
|
||||
+ && GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF)))
|
||||
+ {
|
||||
+ rtx_insn_list *x;
|
||||
+ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
|
||||
+ {
|
||||
+ insn = x->insn ();
|
||||
+ rtx set = single_set (insn);
|
||||
+
|
||||
+ if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno])
|
||||
+ break;
|
||||
+ bb = BLOCK_FOR_INSN (insn);
|
||||
+ ira_curr_regno_allocno_map
|
||||
+ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
|
||||
+ mode = PSEUDO_REGNO_MODE (regno);
|
||||
+ rclass = pref[COST_INDEX (regno)];
|
||||
+ ira_init_register_move_cost_if_necessary (mode);
|
||||
+ if (ira_reg_equiv[regno].memory != NULL)
|
||||
+ cost = ira_memory_move_cost[mode][rclass][1];
|
||||
+ else
|
||||
+ cost = ira_register_move_cost[mode][rclass][rclass];
|
||||
+ freq = REG_FREQ_FROM_BB (bb);
|
||||
+ regno_equiv_gains[regno] += cost * freq;
|
||||
+ }
|
||||
+ if (x != NULL)
|
||||
+ /* We found complicated equiv or reverse equiv mem=reg. Ignore
|
||||
+ them. */
|
||||
+ regno_equiv_gains[regno] = 0;
|
||||
+ else
|
||||
+ bitmap_set_bit (&equiv_pseudos, regno);
|
||||
+ }
|
||||
+
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ freq = REG_FREQ_FROM_BB (bb);
|
||||
+ ira_curr_regno_allocno_map
|
||||
+ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ {
|
||||
+ if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg)
|
||||
+ || !bitmap_bit_p (&equiv_pseudos, regno))
|
||||
+ continue;
|
||||
+ rtx subst = ira_reg_equiv[regno].memory;
|
||||
+
|
||||
+ if (subst == NULL)
|
||||
+ subst = ira_reg_equiv[regno].constant;
|
||||
+ ira_assert (subst != NULL);
|
||||
+ mode = PSEUDO_REGNO_MODE (regno);
|
||||
+ ira_init_register_move_cost_if_necessary (mode);
|
||||
+ bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
|
||||
+
|
||||
+ rclass = pref[COST_INDEX (regno)];
|
||||
+ if (MEM_P (subst)
|
||||
+ /* If it is a change of constant into double for example, the
|
||||
+ result constant probably will be placed in memory. */
|
||||
+ || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg))))
|
||||
+ cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1);
|
||||
+ else if (consumed_p)
|
||||
+ continue;
|
||||
+ else
|
||||
+ cost = ira_register_move_cost[mode][rclass][rclass];
|
||||
+ regno_equiv_gains[regno] -= cost * freq;
|
||||
+ }
|
||||
+ }
|
||||
+ bitmap_clear (&equiv_pseudos);
|
||||
+}
|
||||
+
|
||||
/* Find costs of register classes and memory for allocnos or pseudos
|
||||
and their best costs. Set up preferred, alternative and allocno
|
||||
classes for pseudos. */
|
||||
@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file)
|
||||
if (pass == 0)
|
||||
pref = pref_buffer;
|
||||
|
||||
+ if (ira_use_lra_p && allocno_p && pass == 1)
|
||||
+ /* It is a pass through all insns. So do it once and only for RA (not
|
||||
+ for insn scheduler) when we already found preferable pseudo register
|
||||
+ classes on the previous pass. */
|
||||
+ calculate_equiv_gains ();
|
||||
+
|
||||
/* Now for each allocno look at how desirable each class is and
|
||||
find which class is preferred. */
|
||||
for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--)
|
||||
@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file)
|
||||
}
|
||||
if (i >= first_moveable_pseudo && i < last_moveable_pseudo)
|
||||
i_mem_cost = 0;
|
||||
+ else if (ira_use_lra_p)
|
||||
+ {
|
||||
+ if (equiv_savings > 0)
|
||||
+ {
|
||||
+ i_mem_cost = 0;
|
||||
+ if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
|
||||
+ fprintf (ira_dump_file,
|
||||
+ " Use MEM for r%d as the equiv savings is %d\n",
|
||||
+ i, equiv_savings);
|
||||
+ }
|
||||
+ }
|
||||
else if (equiv_savings < 0)
|
||||
i_mem_cost = -equiv_savings;
|
||||
else if (equiv_savings > 0)
|
||||
@@ -2378,7 +2535,10 @@ ira_costs (void)
|
||||
total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size
|
||||
* ira_allocnos_num);
|
||||
initiate_regno_cost_classes ();
|
||||
- calculate_elim_costs_all_insns ();
|
||||
+ if (!ira_use_lra_p)
|
||||
+ /* Process equivs in reload to update costs through hook
|
||||
+ ira_adjust_equiv_reg_cost. */
|
||||
+ calculate_elim_costs_all_insns ();
|
||||
find_costs_and_classes (ira_dump_file);
|
||||
setup_allocno_class_and_costs ();
|
||||
finish_regno_cost_classes ();
|
||||
@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void)
|
||||
}
|
||||
}
|
||||
|
||||
-/* Add COST to the estimated gain for eliminating REGNO with its
|
||||
- equivalence. If COST is zero, record that no such elimination is
|
||||
- possible. */
|
||||
+/* A hook from the reload pass. Add COST to the estimated gain for eliminating
|
||||
+ REGNO with its equivalence. If COST is zero, record that no such
|
||||
+ elimination is possible. */
|
||||
|
||||
void
|
||||
ira_adjust_equiv_reg_cost (unsigned regno, int cost)
|
||||
{
|
||||
+ ira_assert (!ira_use_lra_p);
|
||||
if (cost == 0)
|
||||
regno_equiv_gains[regno] = 0;
|
||||
else
|
||||
diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc
|
||||
index 7c3ad0a55..b10c8c1eb 100644
|
||||
--- a/gcc/var-tracking.cc
|
||||
+++ b/gcc/var-tracking.cc
|
||||
@@ -107,6 +107,8 @@
|
||||
#include "cfgrtl.h"
|
||||
#include "cfganal.h"
|
||||
#include "reload.h"
|
||||
+#include "ira.h"
|
||||
+#include "lra.h"
|
||||
#include "calls.h"
|
||||
#include "tree-dfa.h"
|
||||
#include "tree-ssa.h"
|
||||
@@ -10133,7 +10135,9 @@ vt_initialize (void)
|
||||
#else
|
||||
reg = arg_pointer_rtx;
|
||||
#endif
|
||||
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
|
||||
+ elim = (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
|
||||
if (elim != reg)
|
||||
{
|
||||
if (GET_CODE (elim) == PLUS)
|
||||
@@ -10153,7 +10157,9 @@ vt_initialize (void)
|
||||
reg = arg_pointer_rtx;
|
||||
fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl);
|
||||
#endif
|
||||
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
|
||||
+ elim = (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
|
||||
if (elim != reg)
|
||||
{
|
||||
if (GET_CODE (elim) == PLUS)
|
||||
@@ -10185,7 +10191,9 @@ vt_initialize (void)
|
||||
#else
|
||||
reg = arg_pointer_rtx;
|
||||
#endif
|
||||
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
|
||||
+ elim = (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
|
||||
if (elim != reg)
|
||||
{
|
||||
if (GET_CODE (elim) == PLUS)
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
From 4965473a4211a9feb46a0d168180ab450cb18bcc Mon Sep 17 00:00:00 2001
|
||||
From: "Vladimir N. Makarov" <vmakarov@redhat.com>
|
||||
Date: Fri, 27 Oct 2023 08:28:24 -0400
|
||||
Subject: [PATCH 32/32] Add cost calculation for reg equivalence invariants
|
||||
|
||||
My recent patch improving cost calculation for pseudos with equivalence
|
||||
resulted in failure of gcc.target/arm/eliminate.c on aarch64. This patch
|
||||
fixes this failure.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* ira-costs.cc: (get_equiv_regno, calculate_equiv_gains):
|
||||
Process reg equivalence invariants.
|
||||
---
|
||||
gcc/ira-costs.cc | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
|
||||
index c79311783..d33104a30 100644
|
||||
--- a/gcc/ira-costs.cc
|
||||
+++ b/gcc/ira-costs.cc
|
||||
@@ -1777,6 +1777,7 @@ get_equiv_regno (rtx x, int ®no, rtx &subreg)
|
||||
}
|
||||
if (REG_P (x)
|
||||
&& (ira_reg_equiv[REGNO (x)].memory != NULL
|
||||
+ || ira_reg_equiv[REGNO (x)].invariant != NULL
|
||||
|| ira_reg_equiv[REGNO (x)].constant != NULL))
|
||||
{
|
||||
regno = REGNO (x);
|
||||
@@ -1819,6 +1820,7 @@ calculate_equiv_gains (void)
|
||||
for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
|
||||
if (ira_reg_equiv[regno].init_insns != NULL
|
||||
&& (ira_reg_equiv[regno].memory != NULL
|
||||
+ || ira_reg_equiv[regno].invariant != NULL
|
||||
|| (ira_reg_equiv[regno].constant != NULL
|
||||
/* Ignore complicated constants which probably will be placed
|
||||
in memory: */
|
||||
@@ -1869,6 +1871,8 @@ calculate_equiv_gains (void)
|
||||
|
||||
if (subst == NULL)
|
||||
subst = ira_reg_equiv[regno].constant;
|
||||
+ if (subst == NULL)
|
||||
+ subst = ira_reg_equiv[regno].invariant;
|
||||
ira_assert (subst != NULL);
|
||||
mode = PSEUDO_REGNO_MODE (regno);
|
||||
ira_init_register_move_cost_if_necessary (mode);
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user