diff --git a/0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch b/0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch new file mode 100644 index 0000000..2fc6198 --- /dev/null +++ b/0079-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch @@ -0,0 +1,30 @@ +From 40469a6119085e4c4741bcaeb9418606d28b40c4 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Fri, 31 Mar 2023 10:49:14 +0800 +Subject: [PATCH 24/32] i386: Add AMX-COMPLEX to Granite Rapids + +gcc/Changelog: + + * config/i386/i386.h (PTA_GRANITERAPIDS): Add PTA_AMX_COMPLEX. + +(cherry picked from commit afa87bd5f7b126e20268aa959441cde2e02bba0e) +--- + gcc/config/i386/i386.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 75953defc..56d7794dc 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2358,7 +2358,7 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 +- | PTA_PREFETCHI; ++ | PTA_PREFETCHI | PTA_AMX_COMPLEX; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +-- +2.28.0.windows.1 + diff --git a/0080-Initial-Granite-Rapids-D-Support.patch b/0080-Initial-Granite-Rapids-D-Support.patch new file mode 100644 index 0000000..c1248eb --- /dev/null +++ b/0080-Initial-Granite-Rapids-D-Support.patch @@ -0,0 +1,212 @@ +From 125e5d448538f7534e0fe3df9b7947cf41605b51 Mon Sep 17 00:00:00 2001 +From: "Mo, Zewei" +Date: Mon, 3 Jul 2023 11:00:26 +0800 +Subject: [PATCH 25/32] Initial Granite Rapids D Support + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h + (get_intel_cpu): Handle Granite Rapids D. + * common/config/i386/i386-common.cc: + (processor_alias_table): Add graniterapids-d. + * common/config/i386/i386-cpuinfo.h + (enum processor_subtypes): Add INTEL_COREI7_GRANITERAPIDS_D. + * config.gcc: Add -march=graniterapids-d. + * config/i386/driver-i386.cc (host_detect_local_cpu): + Handle graniterapids-d. + * config/i386/i386.h: (PTA_GRANITERAPIDS_D): New. + * doc/extend.texi: Add graniterapids-d. + * doc/invoke.texi: Ditto. + +gcc/testsuite/ChangeLog: + + * g++.target/i386/mv16.C: Add graniterapids-d. + * gcc.target/i386/funcspec-56.inc: Handle new march. + +(cherry picked from commit a0cb65d34cc141571e870fb3b53b3ff47ae3338d) +--- + gcc/common/config/i386/cpuinfo.h | 9 ++++++++- + gcc/common/config/i386/i386-common.cc | 2 ++ + gcc/common/config/i386/i386-cpuinfo.h | 1 + + gcc/config.gcc | 3 ++- + gcc/config/i386/driver-i386.cc | 5 ++++- + gcc/config/i386/i386.h | 4 +++- + gcc/doc/extend.texi | 3 +++ + gcc/doc/invoke.texi | 11 +++++++++++ + gcc/testsuite/g++.target/i386/mv16.C | 6 ++++++ + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 1 + + 10 files changed, 41 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 39d3351db..1e53248ef 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -529,7 +529,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; + case 0xad: +- case 0xae: + /* Granite Rapids. */ + cpu = "graniterapids"; + CHECK___builtin_cpu_is ("corei7"); +@@ -537,6 +536,14 @@ get_intel_cpu (struct __processor_model *cpu_model, + cpu_model->__cpu_type = INTEL_COREI7; + cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS; + break; ++ case 0xae: ++ /* Granite Rapids D. */ ++ cpu = "graniterapids-d"; ++ CHECK___builtin_cpu_is ("corei7"); ++ CHECK___builtin_cpu_is ("graniterapids-d"); ++ cpu_model->__cpu_type = INTEL_COREI7; ++ cpu_model->__cpu_subtype = INTEL_COREI7_GRANITERAPIDS_D; ++ break; + case 0x17: + case 0x1d: + /* Penryn. */ +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 87e8afe9b..28f468f48 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1993,6 +1993,8 @@ const pta processor_alias_table[] = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, + M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, ++ {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, ++ M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 56020faac..a32f32c97 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -93,6 +93,7 @@ enum processor_subtypes + INTEL_COREI7_ROCKETLAKE, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, ++ INTEL_COREI7_GRANITERAPIDS_D, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index ca5c8f8a0..3108ac4eb 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -670,7 +670,8 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids native" ++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \ ++graniterapids-d native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index ea8c3d8d1..e3bca4b49 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -576,8 +576,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) + /* This is unknown family 0x6 CPU. */ + if (has_feature (FEATURE_AVX)) + { ++ /* Assume Granite Rapids D. */ ++ if (has_feature (FEATURE_AMX_COMPLEX)) ++ cpu = "graniterapids-d"; + /* Assume Granite Rapids. */ +- if (has_feature (FEATURE_AMX_FP16)) ++ else if (has_feature (FEATURE_AMX_FP16)) + cpu = "graniterapids"; + /* Assume Tiger Lake */ + else if (has_feature (FEATURE_AVX512VP2INTERSECT)) +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 56d7794dc..eda3e5e5b 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2358,7 +2358,9 @@ constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX + | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE + | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; + constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 +- | PTA_PREFETCHI | PTA_AMX_COMPLEX; ++ | PTA_PREFETCHI; ++constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS ++ | PTA_AMX_COMPLEX; + constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW + | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ; + constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index d7b0bc802..674db2f1a 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -21837,6 +21837,9 @@ Intel Core i7 Rocketlake CPU. + @item graniterapids + Intel Core i7 graniterapids CPU. + ++@item graniterapids-d ++Intel Core i7 graniterapids D CPU. ++ + @item bonnell + Intel Atom Bonnell CPU. + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 186b33481..a2ec060fd 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31626,6 +31626,17 @@ MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, + SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, + AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. + ++@item graniterapids-d ++Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, ++SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, ++RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, ++AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, ++AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, ++VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, ++MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, ++SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, ++AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support. ++ + @item k6 + AMD K6 CPU with MMX instruction set support. + +diff --git a/gcc/testsuite/g++.target/i386/mv16.C b/gcc/testsuite/g++.target/i386/mv16.C +index 65cc24f32..17b1fc722 100644 +--- a/gcc/testsuite/g++.target/i386/mv16.C ++++ b/gcc/testsuite/g++.target/i386/mv16.C +@@ -96,6 +96,10 @@ int __attribute__ ((target("arch=graniterapids"))) foo () { + return 26; + } + ++int __attribute__ ((target("arch=graniterapids-d"))) foo () { ++ return 28; ++} ++ + int main () + { + int val = foo (); +@@ -136,6 +140,8 @@ int main () + assert (val == 24); + else if (__builtin_cpu_is ("graniterapids")) + assert (val == 25); ++ else if (__builtin_cpu_is ("graniterapids-d")) ++ assert (val == 26); + else + assert (val == 0); + +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index 1a2f3b83d..f0f3397a7 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -191,6 +191,7 @@ extern void test_arch_sapphirerapids (void) __attribute__((__target__("arch=sapp + extern void test_arch_alderlake (void) __attribute__((__target__("arch=alderlake"))); + extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake"))); + extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids"))); ++extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=graniterapids-d"))); + extern void test_arch_k8 (void) __attribute__((__target__("arch=k8"))); + extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3"))); + extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron"))); +-- +2.28.0.windows.1 + diff --git a/0081-Correct-Granite-Rapids-D-documentation.patch b/0081-Correct-Granite-Rapids-D-documentation.patch new file mode 100644 index 0000000..92e7154 --- /dev/null +++ b/0081-Correct-Granite-Rapids-D-documentation.patch @@ -0,0 +1,48 @@ +From a809a6a416af4d08f7feeadfdd5d1f5a76a830b5 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Thu, 20 Jul 2023 10:47:18 +0800 +Subject: [PATCH 26/32] Correct Granite Rapids{, D} documentation + +gcc/Changelog: + + * doc/invoke.texi: Remove AVX512VP2INTERSECT in + Granite Rapids{, D} from documentation. + +(cherry picked from commit 38daaaa91438d3f635a10bf5d5181c3b29f07df9) +--- + gcc/doc/invoke.texi | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index a2ec060fd..4d3eccdb2 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31622,9 +31622,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, + AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, +-AVX512BF16, AMX-FP16 and PREFETCHI instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16 ++and PREFETCHI instruction set support. + + @item graniterapids-d + Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +@@ -31633,9 +31633,9 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, + AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, +-AVX512BF16, AMX-FP16, PREFETCHI and AMX-COMPLEX instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16, ++PREFETCHI and AMX-COMPLEX instruction set support. + + @item k6 + AMD K6 CPU with MMX instruction set support. +-- +2.28.0.windows.1 + diff --git a/0082-i386-Remove-Meteorlake-s-family_model.patch b/0082-i386-Remove-Meteorlake-s-family_model.patch new file mode 100644 index 0000000..6d075cd --- /dev/null +++ b/0082-i386-Remove-Meteorlake-s-family_model.patch @@ -0,0 +1,30 @@ +From 62852213bc6d3e56804ca05826bb95a3a2fe4eba Mon Sep 17 00:00:00 2001 +From: "Hu, Lin1" +Date: Thu, 15 Dec 2022 15:51:18 +0800 +Subject: [PATCH 27/32] i386: Remove Meteorlake's family_model + +gcc/ChangeLog: + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5 + for meteorlake. + +(cherry picked from commit 9e74b7ec0b218364905e3e7de5c41e8148ffc61b) +--- + gcc/common/config/i386/cpuinfo.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 1e53248ef..348bc0c12 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -510,7 +510,6 @@ get_intel_cpu (struct __processor_model *cpu_model, + /* Alder Lake. */ + case 0xb7: + /* Raptor Lake. */ +- case 0xb5: + case 0xaa: + case 0xac: + /* Meteor Lake. */ +-- +2.28.0.windows.1 + diff --git a/0083-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch b/0083-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch new file mode 100644 index 0000000..cf8ee64 --- /dev/null +++ b/0083-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch @@ -0,0 +1,33 @@ +From 73042aa18fe70aa30a9c7c760b08e642560ecccd Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" +Date: Thu, 29 Jun 2023 03:10:35 +0000 +Subject: [PATCH 28/32] x86: Update model values for Alderlake, Rocketlake and + Raptorlake. + +Update model values for Alderlake, Rocketlake and Raptorlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Remove model value 0xa8 + from Rocketlake, move model value 0xbf from Alderlake to Raptorlake. + +(cherry picked from commit e510c3be13a8ccdf1fc1b27c2501c126d493f335) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 348bc0c12..f9bcb6fad 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x9a: + /* Alder Lake. */ + case 0xb7: ++ case 0xbf: + /* Raptor Lake. */ + case 0xaa: + case 0xac: +-- +2.28.0.windows.1 + diff --git a/0084-x86-Update-model-values-for-Raptorlake.patch b/0084-x86-Update-model-values-for-Raptorlake.patch new file mode 100644 index 0000000..9a33e31 --- /dev/null +++ b/0084-x86-Update-model-values-for-Raptorlake.patch @@ -0,0 +1,32 @@ +From 3dbe28984e0f9c24d6670cfba42983bc32c08b0a Mon Sep 17 00:00:00 2001 +From: "Cui, Lili" +Date: Mon, 14 Aug 2023 02:06:00 +0000 +Subject: [PATCH 29/32] x86: Update model values for Raptorlake. + +Update model values for Raptorlake according to SDM. + +gcc/ChangeLog + + * common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba + to Raptorlake. + +(cherry picked from commit 614052dd4ea083e086712809c754ffebd9361316) +--- + gcc/common/config/i386/cpuinfo.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index f9bcb6fad..da1568fd1 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -509,6 +509,7 @@ get_intel_cpu (struct __processor_model *cpu_model, + case 0x9a: + /* Alder Lake. */ + case 0xb7: ++ case 0xba: + case 0xbf: + /* Raptor Lake. */ + case 0xaa: +-- +2.28.0.windows.1 + diff --git a/0085-Fix-target_clone-arch-graniterapids-d.patch b/0085-Fix-target_clone-arch-graniterapids-d.patch new file mode 100644 index 0000000..175c09d --- /dev/null +++ b/0085-Fix-target_clone-arch-graniterapids-d.patch @@ -0,0 +1,159 @@ +From 8db0f3cd29bd7f937ffa01dd1100360fbbf5b6f4 Mon Sep 17 00:00:00 2001 +From: liuhongt +Date: Tue, 22 Aug 2023 18:18:31 +0800 +Subject: [PATCH 30/32] Fix target_clone ("arch=graniterapids-d") + +Both "graniterapid-d" and "graniterapids" are attached with +PROCESSOR_GRANITERAPID in processor_alias_table but mapped to +different __cpu_subtype in get_intel_cpu. + +And get_builtin_code_for_version will try to match the first +PROCESSOR_GRANITERAPIDS in processor_alias_table which maps to +"granitepraids" here. + +861 else if (new_target->arch_specified && new_target->arch > 0) +1862 for (i = 0; i < pta_size; i++) +1863 if (processor_alias_table[i].processor == new_target->arch) +1864 { +1865 const pta *arch_info = &processor_alias_table[i]; +1866 switch (arch_info->priority) +1867 { +1868 default: +1869 arg_str = arch_info->name; + +This mismatch makes dispatch_function_versions check the preidcate +of__builtin_cpu_is ("graniterapids") for "graniterapids-d" and causes +the issue. +The patch explicitly adds PROCESSOR_GRANITERAPIDS_D to make a distinction. + +For "alderlake","raptorlake", "meteorlake" they share same isa, cost, +tuning, and mapped to the same __cpu_type/__cpu_subtype in +get_intel_cpu, so no need to add PROCESSOR_RAPTORLAKE and others. + +gcc/ChangeLog: + + * common/config/i386/i386-common.cc (processor_names): Add new + member graniterapids-s. + * config/i386/i386-options.cc (processor_alias_table): Update + table with and PROCESSOR_GRANITERAPIDS_D. + (m_GRANITERAPID_D): New macro. + (m_CORE_AVX512): Add m_GRANITERAPIDS_D. + (processor_cost_table): Add icelake_cost for + PROCESSOR_GRANITERAPIDS_D. + * config/i386/i386.h (enum processor_type): Add new member + PROCESSOR_GRANITERAPIDS_D. + * config/i386/i386-c.cc (ix86_target_macros_internal): Handle + PROCESSOR_GRANITERAPIDS_D +--- + gcc/common/config/i386/i386-common.cc | 6 ++++-- + gcc/config/i386/i386-c.cc | 8 ++++++++ + gcc/config/i386/i386-options.cc | 4 +++- + gcc/config/i386/i386.h | 3 ++- + 4 files changed, 17 insertions(+), 4 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index 28f468f48..bec6801ce 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1873,6 +1873,7 @@ const char *const processor_names[] = + "alderlake", + "rocketlake", + "graniterapids", ++ "graniterapids-d", + "intel", + "geode", + "k6", +@@ -1993,8 +1994,9 @@ const pta processor_alias_table[] = + M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2}, + {"graniterapids", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS, + M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS), P_PROC_AVX512F}, +- {"graniterapids-d", PROCESSOR_GRANITERAPIDS, CPU_HASWELL, PTA_GRANITERAPIDS_D, +- M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), P_PROC_AVX512F}, ++ {"graniterapids-d", PROCESSOR_GRANITERAPIDS_D, CPU_HASWELL, ++ PTA_GRANITERAPIDS_D, M_CPU_SUBTYPE (INTEL_COREI7_GRANITERAPIDS_D), ++ P_PROC_AVX512F}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, + M_CPU_TYPE (INTEL_BONNELL), P_PROC_SSSE3}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL, +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 5e0ac278c..49f0db2b8 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -246,6 +246,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__graniterapids"); + def_or_undef (parse_in, "__graniterapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS_D: ++ def_or_undef (parse_in, "__graniterapids_d"); ++ def_or_undef (parse_in, "__graniterapids_d__"); ++ break; + case PROCESSOR_ALDERLAKE: + def_or_undef (parse_in, "__alderlake"); + def_or_undef (parse_in, "__alderlake__"); +@@ -254,6 +258,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__rocketlake"); + def_or_undef (parse_in, "__rocketlake__"); + break; ++ + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; +@@ -426,6 +431,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_GRANITERAPIDS: + def_or_undef (parse_in, "__tune_graniterapids__"); + break; ++ case PROCESSOR_GRANITERAPIDS_D: ++ def_or_undef (parse_in, "__tune_graniterapids_d__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 7efd25084..86932d719 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -128,10 +128,11 @@ along with GCC; see the file COPYING3. If not see + #define m_ALDERLAKE (HOST_WIDE_INT_1U< +Date: Thu, 26 Oct 2023 09:50:40 -0400 +Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences + +RISCV target developers reported that pseudos with equivalence used in +a loop can be spilled. Simple changes of heuristics of cost +calculation of pseudos with equivalence or even ignoring equivalences +resulted in numerous testsuite failures on different targets or worse +spec2017 performance. This patch implements more sophisticated cost +calculations of pseudos with equivalences. The patch does not change +RA behaviour for targets still using the old reload pass instead of +LRA. The patch solves the reported problem and improves x86-64 +specint2017 a bit (specfp2017 performance stays the same). The patch +takes into account how the equivalence will be used: will it be +integrated into the user insns or require an input reload insn. It +requires additional pass over insns. To compensate RA slow down, the +patch removes a pass over insns in the reload pass used by IRA before. +This also decouples IRA from reload more and will help to remove the +reload pass in the future if it ever happens. + +gcc/ChangeLog: + + * dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when + LRA is used. + * ira-costs.cc: Include regset.h. + (equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains): + New functions. + (find_costs_and_classes): Call calculate_equiv_gains and redefine + mem_cost of pseudos with equivs when LRA is used. + * var-tracking.cc: Include ira.h and lra.h. + (vt_initialize): Use lra_eliminate_regs when LRA is used. +--- + gcc/dwarf2out.cc | 4 +- + gcc/ira-costs.cc | 169 ++++++++++++++++++++++++++++++++++++++++++-- + gcc/var-tracking.cc | 14 +++- + 3 files changed, 179 insertions(+), 8 deletions(-) + +diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc +index 0a5c081d8..f0f6f4fd4 100644 +--- a/gcc/dwarf2out.cc ++++ b/gcc/dwarf2out.cc +@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized) + argument pointer and soft frame pointer rtx's. + Use DW_OP_fbreg offset DW_OP_stack_value in this case. */ + if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx) +- && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl) ++ && (ira_use_lra_p ++ ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX) ++ : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl) + { + dw_loc_descr_ref result = NULL; + +diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc +index 642fda529..c79311783 100644 +--- a/gcc/ira-costs.cc ++++ b/gcc/ira-costs.cc +@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see + #include "tm_p.h" + #include "insn-config.h" + #include "regs.h" ++#include "regset.h" + #include "ira.h" + #include "ira-int.h" + #include "addresses.h" +@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node) + process_bb_for_costs (bb); + } + ++/* Check that reg REGNO can be changed by TO in INSN. Return true in case the ++ result insn would be valid one. */ ++static bool ++equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn) ++{ ++ validate_replace_src_group (regno_reg_rtx[regno], to, insn); ++ bool res = verify_changes (0); ++ cancel_changes (0); ++ return res; ++} ++ ++/* Return true if X contains a pseudo with equivalence. In this case also ++ return the pseudo through parameter REG. If the pseudo is a part of subreg, ++ return the subreg through parameter SUBREG. */ ++ ++static bool ++get_equiv_regno (rtx x, int ®no, rtx &subreg) ++{ ++ subreg = NULL_RTX; ++ if (GET_CODE (x) == SUBREG) ++ { ++ subreg = x; ++ x = SUBREG_REG (x); ++ } ++ if (REG_P (x) ++ && (ira_reg_equiv[REGNO (x)].memory != NULL ++ || ira_reg_equiv[REGNO (x)].constant != NULL)) ++ { ++ regno = REGNO (x); ++ return true; ++ } ++ RTX_CODE code = GET_CODE (x); ++ const char *fmt = GET_RTX_FORMAT (code); ++ ++ for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) ++ if (fmt[i] == 'e') ++ { ++ if (get_equiv_regno (XEXP (x, i), regno, subreg)) ++ return true; ++ } ++ else if (fmt[i] == 'E') ++ { ++ for (int j = 0; j < XVECLEN (x, i); j++) ++ if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg)) ++ return true; ++ } ++ return false; ++} ++ ++/* A pass through the current function insns. Calculate costs of using ++ equivalences for pseudos and store them in regno_equiv_gains. */ ++ ++static void ++calculate_equiv_gains (void) ++{ ++ basic_block bb; ++ int regno, freq, cost; ++ rtx subreg; ++ rtx_insn *insn; ++ machine_mode mode; ++ enum reg_class rclass; ++ bitmap_head equiv_pseudos; ++ ++ ira_assert (allocno_p); ++ bitmap_initialize (&equiv_pseudos, ®_obstack); ++ for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--) ++ if (ira_reg_equiv[regno].init_insns != NULL ++ && (ira_reg_equiv[regno].memory != NULL ++ || (ira_reg_equiv[regno].constant != NULL ++ /* Ignore complicated constants which probably will be placed ++ in memory: */ ++ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE ++ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR ++ && GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF))) ++ { ++ rtx_insn_list *x; ++ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ()) ++ { ++ insn = x->insn (); ++ rtx set = single_set (insn); ++ ++ if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno]) ++ break; ++ bb = BLOCK_FOR_INSN (insn); ++ ira_curr_regno_allocno_map ++ = ira_bb_nodes[bb->index].parent->regno_allocno_map; ++ mode = PSEUDO_REGNO_MODE (regno); ++ rclass = pref[COST_INDEX (regno)]; ++ ira_init_register_move_cost_if_necessary (mode); ++ if (ira_reg_equiv[regno].memory != NULL) ++ cost = ira_memory_move_cost[mode][rclass][1]; ++ else ++ cost = ira_register_move_cost[mode][rclass][rclass]; ++ freq = REG_FREQ_FROM_BB (bb); ++ regno_equiv_gains[regno] += cost * freq; ++ } ++ if (x != NULL) ++ /* We found complicated equiv or reverse equiv mem=reg. Ignore ++ them. */ ++ regno_equiv_gains[regno] = 0; ++ else ++ bitmap_set_bit (&equiv_pseudos, regno); ++ } ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ freq = REG_FREQ_FROM_BB (bb); ++ ira_curr_regno_allocno_map ++ = ira_bb_nodes[bb->index].parent->regno_allocno_map; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg) ++ || !bitmap_bit_p (&equiv_pseudos, regno)) ++ continue; ++ rtx subst = ira_reg_equiv[regno].memory; ++ ++ if (subst == NULL) ++ subst = ira_reg_equiv[regno].constant; ++ ira_assert (subst != NULL); ++ mode = PSEUDO_REGNO_MODE (regno); ++ ira_init_register_move_cost_if_necessary (mode); ++ bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn); ++ ++ rclass = pref[COST_INDEX (regno)]; ++ if (MEM_P (subst) ++ /* If it is a change of constant into double for example, the ++ result constant probably will be placed in memory. */ ++ || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg)))) ++ cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1); ++ else if (consumed_p) ++ continue; ++ else ++ cost = ira_register_move_cost[mode][rclass][rclass]; ++ regno_equiv_gains[regno] -= cost * freq; ++ } ++ } ++ bitmap_clear (&equiv_pseudos); ++} ++ + /* Find costs of register classes and memory for allocnos or pseudos + and their best costs. Set up preferred, alternative and allocno + classes for pseudos. */ +@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file) + if (pass == 0) + pref = pref_buffer; + ++ if (ira_use_lra_p && allocno_p && pass == 1) ++ /* It is a pass through all insns. So do it once and only for RA (not ++ for insn scheduler) when we already found preferable pseudo register ++ classes on the previous pass. */ ++ calculate_equiv_gains (); ++ + /* Now for each allocno look at how desirable each class is and + find which class is preferred. */ + for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--) +@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file) + } + if (i >= first_moveable_pseudo && i < last_moveable_pseudo) + i_mem_cost = 0; ++ else if (ira_use_lra_p) ++ { ++ if (equiv_savings > 0) ++ { ++ i_mem_cost = 0; ++ if (ira_dump_file != NULL && internal_flag_ira_verbose > 5) ++ fprintf (ira_dump_file, ++ " Use MEM for r%d as the equiv savings is %d\n", ++ i, equiv_savings); ++ } ++ } + else if (equiv_savings < 0) + i_mem_cost = -equiv_savings; + else if (equiv_savings > 0) +@@ -2378,7 +2535,10 @@ ira_costs (void) + total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size + * ira_allocnos_num); + initiate_regno_cost_classes (); +- calculate_elim_costs_all_insns (); ++ if (!ira_use_lra_p) ++ /* Process equivs in reload to update costs through hook ++ ira_adjust_equiv_reg_cost. */ ++ calculate_elim_costs_all_insns (); + find_costs_and_classes (ira_dump_file); + setup_allocno_class_and_costs (); + finish_regno_cost_classes (); +@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void) + } + } + +-/* Add COST to the estimated gain for eliminating REGNO with its +- equivalence. If COST is zero, record that no such elimination is +- possible. */ ++/* A hook from the reload pass. Add COST to the estimated gain for eliminating ++ REGNO with its equivalence. If COST is zero, record that no such ++ elimination is possible. */ + + void + ira_adjust_equiv_reg_cost (unsigned regno, int cost) + { ++ ira_assert (!ira_use_lra_p); + if (cost == 0) + regno_equiv_gains[regno] = 0; + else +diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc +index 7c3ad0a55..b10c8c1eb 100644 +--- a/gcc/var-tracking.cc ++++ b/gcc/var-tracking.cc +@@ -107,6 +107,8 @@ + #include "cfgrtl.h" + #include "cfganal.h" + #include "reload.h" ++#include "ira.h" ++#include "lra.h" + #include "calls.h" + #include "tree-dfa.h" + #include "tree-ssa.h" +@@ -10133,7 +10135,9 @@ vt_initialize (void) + #else + reg = arg_pointer_rtx; + #endif +- elim = eliminate_regs (reg, VOIDmode, NULL_RTX); ++ elim = (ira_use_lra_p ++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX) ++ : eliminate_regs (reg, VOIDmode, NULL_RTX)); + if (elim != reg) + { + if (GET_CODE (elim) == PLUS) +@@ -10153,7 +10157,9 @@ vt_initialize (void) + reg = arg_pointer_rtx; + fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl); + #endif +- elim = eliminate_regs (reg, VOIDmode, NULL_RTX); ++ elim = (ira_use_lra_p ++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX) ++ : eliminate_regs (reg, VOIDmode, NULL_RTX)); + if (elim != reg) + { + if (GET_CODE (elim) == PLUS) +@@ -10185,7 +10191,9 @@ vt_initialize (void) + #else + reg = arg_pointer_rtx; + #endif +- elim = eliminate_regs (reg, VOIDmode, NULL_RTX); ++ elim = (ira_use_lra_p ++ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX) ++ : eliminate_regs (reg, VOIDmode, NULL_RTX)); + if (elim != reg) + { + if (GET_CODE (elim) == PLUS) +-- +2.28.0.windows.1 + diff --git a/0087-Add-cost-calculation-for-reg-equivalence-invariants.patch b/0087-Add-cost-calculation-for-reg-equivalence-invariants.patch new file mode 100644 index 0000000..9315409 --- /dev/null +++ b/0087-Add-cost-calculation-for-reg-equivalence-invariants.patch @@ -0,0 +1,49 @@ +From 4965473a4211a9feb46a0d168180ab450cb18bcc Mon Sep 17 00:00:00 2001 +From: "Vladimir N. Makarov" +Date: Fri, 27 Oct 2023 08:28:24 -0400 +Subject: [PATCH 32/32] Add cost calculation for reg equivalence invariants + +My recent patch improving cost calculation for pseudos with equivalence +resulted in failure of gcc.target/arm/eliminate.c on aarch64. This patch +fixes this failure. + +gcc/ChangeLog: + + * ira-costs.cc: (get_equiv_regno, calculate_equiv_gains): + Process reg equivalence invariants. +--- + gcc/ira-costs.cc | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc +index c79311783..d33104a30 100644 +--- a/gcc/ira-costs.cc ++++ b/gcc/ira-costs.cc +@@ -1777,6 +1777,7 @@ get_equiv_regno (rtx x, int ®no, rtx &subreg) + } + if (REG_P (x) + && (ira_reg_equiv[REGNO (x)].memory != NULL ++ || ira_reg_equiv[REGNO (x)].invariant != NULL + || ira_reg_equiv[REGNO (x)].constant != NULL)) + { + regno = REGNO (x); +@@ -1819,6 +1820,7 @@ calculate_equiv_gains (void) + for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--) + if (ira_reg_equiv[regno].init_insns != NULL + && (ira_reg_equiv[regno].memory != NULL ++ || ira_reg_equiv[regno].invariant != NULL + || (ira_reg_equiv[regno].constant != NULL + /* Ignore complicated constants which probably will be placed + in memory: */ +@@ -1869,6 +1871,8 @@ calculate_equiv_gains (void) + + if (subst == NULL) + subst = ira_reg_equiv[regno].constant; ++ if (subst == NULL) ++ subst = ira_reg_equiv[regno].invariant; + ira_assert (subst != NULL); + mode = PSEUDO_REGNO_MODE (regno); + ira_init_register_move_cost_if_necessary (mode); +-- +2.28.0.windows.1 +