!324 [sync] PR-323: [Sync] Sync patch from openeuler/gcc
From: @openeuler-sync-bot Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
This commit is contained in:
commit
00d8ee9c5a
@ -1,7 +1,7 @@
|
||||
From 73ee6351353b036f466ba1aab9a9e7d7865bf972 Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Tue, 11 Jul 2023 16:07:51 +0800
|
||||
Subject: [PATCH] [Version] Set version to 12.3.1
|
||||
Subject: [PATCH 01/22] [Version] Set version to 12.3.1
|
||||
|
||||
---
|
||||
gcc/BASE-VER | 2 +-
|
||||
2042
0002-RISCV-Backport-inline-subword-atomic-patches.patch
Normal file
2042
0002-RISCV-Backport-inline-subword-atomic-patches.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
From 37ef787e743d98f9f6e53005d99709fb8e284964 Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Fri, 14 Jul 2023 11:07:05 +0800
|
||||
Subject: [PATCH 1/2] [CONFIG] Regenerate configure file
|
||||
Subject: [PATCH 03/22] [CONFIG] Regenerate configure file
|
||||
|
||||
Regenerate configure file under libquadmath directory
|
||||
since it is out of date.
|
||||
@ -1,7 +1,7 @@
|
||||
From 52a810b4d8a725a7edb2988f6c3813a9938362a5 Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Fri, 14 Jul 2023 11:10:24 +0800
|
||||
Subject: [PATCH 2/2] [libquadmath] Enable libquadmath on kunpeng
|
||||
Subject: [PATCH 04/22] [libquadmath] Enable libquadmath on kunpeng
|
||||
|
||||
This enable libquadmath on kunpeng platform to convenient
|
||||
users that migrating from x86 platform. libquadmath uses "__float128"
|
||||
@ -0,0 +1,89 @@
|
||||
From e7013d2640d82e928ebdaf830b6833051ac65296 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Sat, 5 Nov 2022 13:22:33 +0800
|
||||
Subject: [PATCH 06/22] [MULL64 1/3] Add A ? B op CST : B match and simplify
|
||||
optimizations
|
||||
|
||||
Refer to commit b6bdd7a4, use pattern match to simple
|
||||
A ? B op CST : B (where CST is power of 2) simplifications.
|
||||
Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue.
|
||||
|
||||
gcc/
|
||||
* match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/pr107190.c: New test.
|
||||
---
|
||||
gcc/match.pd | 21 +++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++
|
||||
2 files changed, 48 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/pr107190.c
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index fc2833bbd..fd0857fc9 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -4280,6 +4280,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
#endif
|
||||
|
||||
+#if GIMPLE
|
||||
+(if (canonicalize_math_p ())
|
||||
+/* These patterns are mostly used by PHIOPT to move some operations outside of
|
||||
+ the if statements. They should be done late because it gives jump threading
|
||||
+ and few other passes to reduce what is going on. */
|
||||
+/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */
|
||||
+ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate)
|
||||
+ (simplify
|
||||
+ (cond @0 (op:s @1 integer_pow2p@2) @1)
|
||||
+ /* powerof2cst */
|
||||
+ (if (INTEGRAL_TYPE_P (type))
|
||||
+ (with {
|
||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
||||
+ }
|
||||
+ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; })))
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
||||
be extended. */
|
||||
/* This pattern implements two kinds simplification:
|
||||
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
new file mode 100644
|
||||
index 000000000..235b2761a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
||||
+
|
||||
+# define BN_BITS4 32
|
||||
+# define BN_MASK2 (0xffffffffffffffffL)
|
||||
+# define BN_MASK2l (0xffffffffL)
|
||||
+# define BN_MASK2h (0xffffffff00000000L)
|
||||
+# define BN_MASK2h1 (0xffffffff80000000L)
|
||||
+# define LBITS(a) ((a)&BN_MASK2l)
|
||||
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
||||
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
||||
+
|
||||
+unsigned int test_m(unsigned long in0, unsigned long in1) {
|
||||
+ unsigned long m, m1, lt, ht, bl, bh;
|
||||
+ lt = LBITS(in0);
|
||||
+ ht = HBITS(in0);
|
||||
+ bl = LBITS(in1);
|
||||
+ bh = HBITS(in1);
|
||||
+ m = bh * lt;
|
||||
+ m1 = bl * ht;
|
||||
+ ht = bh * ht;
|
||||
+ m = (m + m1) & BN_MASK2;
|
||||
+ if (m < m1) ht += L2HBITS((unsigned long)1);
|
||||
+ return ht + m;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
130
0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch
Normal file
130
0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch
Normal file
@ -0,0 +1,130 @@
|
||||
From 547ab9b3e073ef389e5fd89d961bb1e3e6934ae9 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Wed, 9 Nov 2022 17:04:13 +0800
|
||||
Subject: [PATCH 07/22] [MULL64 2/3] Fold series of instructions into mul
|
||||
|
||||
Merge the low part of series instructions into mul
|
||||
|
||||
gcc/
|
||||
* match.pd: Add simplifcations for low part of mul
|
||||
* common.opt: Add new option fmerge-mull enable with -O2
|
||||
* opts.c: default_options_table
|
||||
|
||||
gcc/testsuite/
|
||||
* g++.dg/tree-ssa/mull64.C: New test.
|
||||
---
|
||||
gcc/common.opt | 4 +++
|
||||
gcc/match.pd | 27 ++++++++++++++++++++
|
||||
gcc/opts.cc | 1 +
|
||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
|
||||
4 files changed, 66 insertions(+)
|
||||
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 8a0dafc52..e365a48bc 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2126,6 +2126,10 @@ fmerge-debug-strings
|
||||
Common Var(flag_merge_debug_strings) Init(1)
|
||||
Attempt to merge identical debug strings across compilation units.
|
||||
|
||||
+fmerge-mull
|
||||
+Common Var(flag_merge_mull) Init(0) Optimization
|
||||
+Attempt to merge series instructions into mul.
|
||||
+
|
||||
fmessage-length=
|
||||
Common RejectNegative Joined UInteger
|
||||
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index fd0857fc9..2092e6959 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
#endif
|
||||
|
||||
+#if GIMPLE
|
||||
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
|
||||
+ simple IR. The following scenario should be matched:
|
||||
+ In0Lo = In0(D) & 4294967295;
|
||||
+ In0Hi = In0(D) >> 32;
|
||||
+ In1Lo = In1(D) & 4294967295;
|
||||
+ In1Hi = In1(D) >> 32;
|
||||
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
|
||||
+ addc32 = Addc << 32;
|
||||
+ ResLo = In0Lo * In1Lo + addc32 */
|
||||
+(simplify
|
||||
+ (plus:c (mult @4 @5)
|
||||
+ (lshift
|
||||
+ (plus:c
|
||||
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
|
||||
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
|
||||
+ INTEGER_CST@3
|
||||
+ )
|
||||
+ )
|
||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
|
||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
|
||||
+ && TYPE_PRECISION (type) == 64)
|
||||
+ (mult (convert:type @0) (convert:type @1))
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
||||
be extended. */
|
||||
/* This pattern implements two kinds simplification:
|
||||
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||
index a97630d1c..eae71ed20 100644
|
||||
--- a/gcc/opts.cc
|
||||
+++ b/gcc/opts.cc
|
||||
@@ -647,6 +647,7 @@ static const struct default_options default_options_table[] =
|
||||
VECT_COST_MODEL_VERY_CHEAP },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
|
||||
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
|
||||
|
||||
/* -O2 and above optimizations, but not -Os or -Og. */
|
||||
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
new file mode 100644
|
||||
index 000000000..2a3b74604
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
@@ -0,0 +1,34 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
|
||||
+
|
||||
+# define BN_BITS4 32
|
||||
+# define BN_MASK2 (0xffffffffffffffffL)
|
||||
+# define BN_MASK2l (0xffffffffL)
|
||||
+# define BN_MASK2h (0xffffffff00000000L)
|
||||
+# define BN_MASK2h1 (0xffffffff80000000L)
|
||||
+# define LBITS(a) ((a)&BN_MASK2l)
|
||||
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
||||
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
||||
+
|
||||
+void mul64(unsigned long in0, unsigned long in1,
|
||||
+ unsigned long &retLo, unsigned long &retHi) {
|
||||
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
|
||||
+ unsigned long Addc, addc32, low;
|
||||
+ al = LBITS(in0);
|
||||
+ ah = HBITS(in0);
|
||||
+ bl = LBITS(in1);
|
||||
+ bh = HBITS(in1);
|
||||
+ m10 = bh * al;
|
||||
+ m00 = bl * al;
|
||||
+ m01 = bl * ah;
|
||||
+ m11 = bh * ah;
|
||||
+ Addc = (m10 + m01) & BN_MASK2;
|
||||
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
|
||||
+ m11 += HBITS(Addc);
|
||||
+ addc32 = L2HBITS(Addc);
|
||||
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
|
||||
+ retLo = low;
|
||||
+ retHi = m11;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
105
0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch
Normal file
105
0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch
Normal file
@ -0,0 +1,105 @@
|
||||
From 4e536dbb4a08925cea259be13962969efcc0f3c1 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Fri, 11 Nov 2022 11:30:37 +0800
|
||||
Subject: [PATCH 08/22] [MULL64 3/3] Fold series of instructions into umulh
|
||||
|
||||
Merge the high part of series instructions into umulh
|
||||
|
||||
gcc/
|
||||
* match.pd: Add simplifcations for high part of umulh
|
||||
|
||||
gcc/testsuite/
|
||||
* g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
|
||||
---
|
||||
gcc/match.pd | 56 ++++++++++++++++++++++++++
|
||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
|
||||
2 files changed, 59 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 2092e6959..b7e3588e8 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -4301,6 +4301,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
#endif
|
||||
|
||||
+#if GIMPLE
|
||||
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
|
||||
+ the if statements. They should be done late because it gives jump threading
|
||||
+ and few other passes to reduce what is going on. */
|
||||
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
|
||||
+ integers to one 128-bit integer. Try to match the high part of mul pattern
|
||||
+ after the low part of mul pattern is simplified. The following scenario
|
||||
+ should be matched:
|
||||
+ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
|
||||
+ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
|
||||
+ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
|
||||
+ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
|
||||
+ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
|
||||
+ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
|
||||
+ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
|
||||
+ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
|
||||
+ addc32 = Addc << 32; -- lshift@10 @9 @3
|
||||
+ ResLo = In0(D) * In1(D); -- mult @0 @1
|
||||
+ ResHi = ((long unsigned int) (addc32 > ResLo)) +
|
||||
+ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
|
||||
+ } */
|
||||
+(simplify
|
||||
+ (plus:c
|
||||
+ (plus:c
|
||||
+ (convert
|
||||
+ (gt (lshift@10 @9 @3)
|
||||
+ (mult:c @0 @1)))
|
||||
+ (lshift
|
||||
+ (convert
|
||||
+ (gt @8 @9))
|
||||
+ @3))
|
||||
+ (plus:c@11
|
||||
+ (rshift
|
||||
+ (plus:c@9
|
||||
+ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
|
||||
+ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
|
||||
+ @3)
|
||||
+ (mult:c (rshift@5 SSA_NAME@0 @3)
|
||||
+ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
|
||||
+ )
|
||||
+ )
|
||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
|
||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
|
||||
+ && TYPE_PRECISION (type) == 64)
|
||||
+ (with {
|
||||
+ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
|
||||
+ tree shift = build_int_cst (integer_type_node, 64);
|
||||
+ }
|
||||
+ (convert:type (rshift
|
||||
+ (mult (convert:i128_type @0)
|
||||
+ (convert:i128_type @1))
|
||||
+ { shift; })))
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
#if GIMPLE
|
||||
/* These patterns are mostly used by FORWPROP1 to fold some operations into more
|
||||
simple IR. The following scenario should be matched:
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
index 2a3b74604..f61cf5e6f 100644
|
||||
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
|
||||
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
||||
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
|
||||
retHi = m11;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
|
||||
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
66
0009-MULL64-Disable-mull64-transformation-by-default.patch
Normal file
66
0009-MULL64-Disable-mull64-transformation-by-default.patch
Normal file
@ -0,0 +1,66 @@
|
||||
From 7c1f4425c680ea144d29bc55a1283d46444a2691 Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Wed, 7 Dec 2022 09:43:15 +0800
|
||||
Subject: [PATCH 09/22] [MULL64] Disable mull64 transformation by default
|
||||
|
||||
This commit disables mull64 transformation by default since
|
||||
it shows some runtime failure in workloads.
|
||||
|
||||
This is a workaround fix for https://gitee.com/src-openeuler/gcc/issues/I64UQH
|
||||
---
|
||||
gcc/match.pd | 2 +-
|
||||
gcc/opts.cc | 1 -
|
||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +-
|
||||
gcc/testsuite/gcc.dg/pr107190.c | 2 +-
|
||||
4 files changed, 3 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index b7e3588e8..6f24d5079 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -4290,7 +4290,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(simplify
|
||||
(cond @0 (op:s @1 integer_pow2p@2) @1)
|
||||
/* powerof2cst */
|
||||
- (if (INTEGRAL_TYPE_P (type))
|
||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type))
|
||||
(with {
|
||||
tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
||||
}
|
||||
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||
index eae71ed20..a97630d1c 100644
|
||||
--- a/gcc/opts.cc
|
||||
+++ b/gcc/opts.cc
|
||||
@@ -647,7 +647,6 @@ static const struct default_options default_options_table[] =
|
||||
VECT_COST_MODEL_VERY_CHEAP },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
|
||||
- { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
|
||||
|
||||
/* -O2 and above optimizations, but not -Os or -Og. */
|
||||
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
index f61cf5e6f..cad891e62 100644
|
||||
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
||||
+/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
||||
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
index 235b2761a..d1e72e5df 100644
|
||||
--- a/gcc/testsuite/gcc.dg/pr107190.c
|
||||
+++ b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
||||
+/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
||||
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
26
0010-Version-Clear-DATESTAMP_s.patch
Normal file
26
0010-Version-Clear-DATESTAMP_s.patch
Normal file
@ -0,0 +1,26 @@
|
||||
From 8e8f783b02df155e3aafa94af6cc1f66604e08eb Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Fri, 21 Jul 2023 14:45:27 +0800
|
||||
Subject: [PATCH 10/22] [Version] Clear DATESTAMP_s
|
||||
|
||||
---
|
||||
gcc/Makefile.in | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
|
||||
index 31ff95500..db2a0e1bd 100644
|
||||
--- a/gcc/Makefile.in
|
||||
+++ b/gcc/Makefile.in
|
||||
@@ -897,8 +897,7 @@ PATCHLEVEL_c := \
|
||||
# significant - do not remove it.
|
||||
BASEVER_s := "\"$(BASEVER_c)\""
|
||||
DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\""
|
||||
-DATESTAMP_s := \
|
||||
- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
|
||||
+DATESTAMP_s := "\"\""
|
||||
PKGVERSION_s:= "\"@PKGVERSION@\""
|
||||
BUGURL_s := "\"@REPORT_BUGS_TO@\""
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
From 355eb8e20327242442d139fb052d3a3befde3dd7 Mon Sep 17 00:00:00 2001
|
||||
From: "Cui,Lili" <lili.cui@intel.com>
|
||||
Date: Tue, 1 Nov 2022 09:16:49 +0800
|
||||
Subject: [PATCH] Add attribute hot judgement for INLINE_HINT_known_hot
|
||||
Subject: [PATCH 11/22] Add attribute hot judgement for INLINE_HINT_known_hot
|
||||
hint.
|
||||
|
||||
We set up INLINE_HINT_known_hot hint only when we have profile feedback,
|
||||
@ -120,5 +120,5 @@ index 000000000..1f3be641c
|
||||
+/* { dg-final { scan-ipa-dump "known_hot" "inline" } } */
|
||||
+
|
||||
--
|
||||
2.31.1
|
||||
2.33.0
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
From 1070bc24f53e851cae55320e26715cc594efcd2f Mon Sep 17 00:00:00 2001
|
||||
From: Hongyu Wang <hongyu.wang@intel.com>
|
||||
Date: Thu, 8 Sep 2022 16:52:02 +0800
|
||||
Subject: [PATCH] Enable small loop unrolling for O2
|
||||
Subject: [PATCH 12/22] Enable small loop unrolling for O2
|
||||
|
||||
Modern processors has multiple way instruction decoders
|
||||
For x86, icelake/zen3 has 5 uops, so for small loop with <= 4
|
||||
@ -486,5 +486,5 @@ index 0248fcc00..f75a847f7 100644
|
||||
|
||||
volatile int sink;
|
||||
--
|
||||
2.31.1
|
||||
2.33.0
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
From 96898a9cd8c159625848247bd2f3a09e5c12fcfa Mon Sep 17 00:00:00 2001
|
||||
From: Hongyu Wang <hongyu.wang@intel.com>
|
||||
Date: Sat, 19 Nov 2022 09:38:00 +0800
|
||||
Subject: [PATCH] i386: Only enable small loop unrolling in backend [PR
|
||||
Subject: [PATCH 13/22] i386: Only enable small loop unrolling in backend [PR
|
||||
107692]
|
||||
|
||||
Followed by the discussion in pr107692, -munroll-only-small-loops
|
||||
@ -226,5 +226,5 @@ index f75a847f7..7e2d869e1 100644
|
||||
|
||||
volatile int sink;
|
||||
--
|
||||
2.31.1
|
||||
2.33.0
|
||||
|
||||
1981
0014-Array-widen-compare-Add-a-new-optimization-for-array.patch
Normal file
1981
0014-Array-widen-compare-Add-a-new-optimization-for-array.patch
Normal file
File diff suppressed because it is too large
Load Diff
6170
0015-Backport-Structure-reorganization-optimization.patch
Normal file
6170
0015-Backport-Structure-reorganization-optimization.patch
Normal file
File diff suppressed because it is too large
Load Diff
2056
0016-CompleteStructRelayout-Complete-Structure-Relayout.patch
Normal file
2056
0016-CompleteStructRelayout-Complete-Structure-Relayout.patch
Normal file
File diff suppressed because it is too large
Load Diff
489
0017-StructReorg-Some-bugfix-for-structure-reorganization.patch
Normal file
489
0017-StructReorg-Some-bugfix-for-structure-reorganization.patch
Normal file
@ -0,0 +1,489 @@
|
||||
From 2b4db34d3b21ff8597373e9e67858b3b60cc7dae Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Fri, 21 Jul 2023 11:20:51 +0800
|
||||
Subject: [PATCH 17/22] [StructReorg] Some bugfix for structure reorganization
|
||||
|
||||
Some bugfix for structure reorganization,
|
||||
1. disable type simplify in LTO within optimizations
|
||||
2. only enable optimizations in C language
|
||||
3. use new to initialize allocated memory in symbol-summary.h
|
||||
4. cover escape scenarios not considered
|
||||
---
|
||||
gcc/ipa-free-lang-data.cc | 11 ++
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 101 +++++++++++--------
|
||||
gcc/symbol-summary.h | 13 ++-
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-5.c | 31 ++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-6.c | 54 ++++++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-7.c | 38 +++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-8.c | 25 +++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-9.c | 54 ++++++++++
|
||||
8 files changed, 283 insertions(+), 44 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
|
||||
|
||||
diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc
|
||||
index a74215685..5450be9fe 100644
|
||||
--- a/gcc/ipa-free-lang-data.cc
|
||||
+++ b/gcc/ipa-free-lang-data.cc
|
||||
@@ -102,6 +102,12 @@ fld_worklist_push (tree t, class free_lang_data_d *fld)
|
||||
static tree
|
||||
fld_simplified_type_name (tree type)
|
||||
{
|
||||
+ /* Simplify type will cause that struct A and struct A within
|
||||
+ struct B are different type pointers, so skip it in structure
|
||||
+ optimizations. */
|
||||
+ if (flag_ipa_struct_reorg)
|
||||
+ return TYPE_NAME (type);
|
||||
+
|
||||
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
|
||||
return TYPE_NAME (type);
|
||||
/* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
|
||||
@@ -340,6 +346,11 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
|
||||
{
|
||||
if (!t)
|
||||
return t;
|
||||
+ /* Simplify type will cause that struct A and struct A within
|
||||
+ struct B are different type pointers, so skip it in structure
|
||||
+ optimizations. */
|
||||
+ if (flag_ipa_struct_reorg)
|
||||
+ return t;
|
||||
if (POINTER_TYPE_P (t))
|
||||
return fld_incomplete_type_of (t, fld);
|
||||
/* FIXME: This triggers verification error, see PR88140. */
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
index c8b975a92..9f790b28b 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
@@ -105,6 +105,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "ipa-param-manipulation.h"
|
||||
#include "gimplify-me.h"
|
||||
#include "cfgloop.h"
|
||||
+#include "langhooks.h"
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -196,6 +197,39 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
|
||||
GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
+/* Check whether in C language or LTO with only C language. */
|
||||
+
|
||||
+static bool
|
||||
+lang_c_p (void)
|
||||
+{
|
||||
+ const char *language_string = lang_hooks.name;
|
||||
+
|
||||
+ if (!language_string)
|
||||
+ return false;
|
||||
+
|
||||
+ if (strcmp (language_string, "GNU GIMPLE") == 0)
|
||||
+ {
|
||||
+ unsigned i = 0;
|
||||
+ tree t = NULL;
|
||||
+ const char *unit_string = NULL;
|
||||
+
|
||||
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
|
||||
+ {
|
||||
+ unit_string = TRANSLATION_UNIT_LANGUAGE (t);
|
||||
+ if (!unit_string
|
||||
+ || (strncmp (unit_string, "GNU C", 5) != 0)
|
||||
+ || (!ISDIGIT (unit_string[5])))
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+ }
|
||||
+ else if (strncmp (language_string, "GNU C", 5) == 0
|
||||
+ && ISDIGIT (language_string[5]))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
enum srmode
|
||||
{
|
||||
NORMAL = 0,
|
||||
@@ -1018,7 +1052,6 @@ public:
|
||||
void analyze_types (void);
|
||||
void clear_visited (void);
|
||||
bool create_new_types (void);
|
||||
- void restore_field_type (void);
|
||||
void create_new_decls (void);
|
||||
srdecl *find_decl (tree);
|
||||
void create_new_functions (void);
|
||||
@@ -2107,7 +2140,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
||||
srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
|
||||
srdecl *d = find_decl (lhs);
|
||||
if (!d && t)
|
||||
- current_function->record_decl (t, lhs, -1);
|
||||
+ {
|
||||
+ current_function->record_decl (t, lhs, -1);
|
||||
+ tree var = SSA_NAME_VAR (lhs);
|
||||
+ if (var && VOID_POINTER_P (TREE_TYPE (var)))
|
||||
+ current_function->record_decl (t, var, -1);
|
||||
+ }
|
||||
}
|
||||
if (TREE_CODE (rhs) == SSA_NAME
|
||||
&& VOID_POINTER_P (TREE_TYPE (rhs))
|
||||
@@ -2116,7 +2154,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
||||
srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
|
||||
srdecl *d = find_decl (rhs);
|
||||
if (!d && t)
|
||||
- current_function->record_decl (t, rhs, -1);
|
||||
+ {
|
||||
+ current_function->record_decl (t, rhs, -1);
|
||||
+ tree var = SSA_NAME_VAR (rhs);
|
||||
+ if (var && VOID_POINTER_P (TREE_TYPE (var)))
|
||||
+ current_function->record_decl (t, var, -1);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -2796,8 +2839,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
|
||||
if (escapes != does_not_escape)
|
||||
{
|
||||
for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
|
||||
- mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
|
||||
- escapes);
|
||||
+ {
|
||||
+ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
|
||||
+ escapes);
|
||||
+ srdecl *d = current_function->find_decl (
|
||||
+ gimple_call_arg (stmt, i));
|
||||
+ if (d)
|
||||
+ d->type->mark_escape (escapes, stmt);
|
||||
+ }
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3731,42 +3780,6 @@ ipa_struct_reorg::analyze_types (void)
|
||||
}
|
||||
}
|
||||
|
||||
-/* When struct A has a struct B member, B's type info
|
||||
- is not stored in
|
||||
- TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA)))
|
||||
- Try to restore B's type information. */
|
||||
-
|
||||
-void
|
||||
-ipa_struct_reorg::restore_field_type (void)
|
||||
-{
|
||||
- for (unsigned i = 0; i < types.length (); i++)
|
||||
- {
|
||||
- for (unsigned j = 0; j < types[i]->fields.length (); j++)
|
||||
- {
|
||||
- srfield *field = types[i]->fields[j];
|
||||
- if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE)
|
||||
- {
|
||||
- /* If field type has TYPE_FIELDS information,
|
||||
- we do not need to do this. */
|
||||
- if (TYPE_FIELDS (field->type->type) != NULL)
|
||||
- continue;
|
||||
- for (unsigned k = 0; k < types.length (); k++)
|
||||
- {
|
||||
- if (i == k)
|
||||
- continue;
|
||||
- const char *type1 = get_type_name (field->type->type);
|
||||
- const char *type2 = get_type_name (types[k]->type);
|
||||
- if (type1 == NULL || type2 == NULL)
|
||||
- continue;
|
||||
- if (type1 == type2
|
||||
- && TYPE_FIELDS (types[k]->type))
|
||||
- field->type = types[k];
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
/* Create all new types we want to create. */
|
||||
|
||||
bool
|
||||
@@ -4647,7 +4660,6 @@ ipa_struct_reorg::rewrite_functions (void)
|
||||
{
|
||||
unsigned retval = 0;
|
||||
|
||||
- restore_field_type ();
|
||||
/* Create new types, if we did not create any new types,
|
||||
then don't rewrite any accesses. */
|
||||
if (!create_new_types ())
|
||||
@@ -4866,7 +4878,10 @@ pass_ipa_struct_reorg::gate (function *)
|
||||
&& flag_ipa_struct_reorg
|
||||
/* Don't bother doing anything if the program has errors. */
|
||||
&& !seen_error ()
|
||||
- && flag_lto_partition == LTO_PARTITION_ONE);
|
||||
+ && flag_lto_partition == LTO_PARTITION_ONE
|
||||
+ /* Only enable struct optimizations in C since other
|
||||
+ languages' grammar forbid. */
|
||||
+ && lang_c_p ());
|
||||
}
|
||||
|
||||
} // anon namespace
|
||||
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
|
||||
index c54d3084c..3fe64047c 100644
|
||||
--- a/gcc/symbol-summary.h
|
||||
+++ b/gcc/symbol-summary.h
|
||||
@@ -103,6 +103,12 @@ protected:
|
||||
/* Allocates new data that are stored within map. */
|
||||
T* allocate_new ()
|
||||
{
|
||||
+ /* In structure optimizatons, we call new to ensure that
|
||||
+ the allocated memory is initialized to 0. */
|
||||
+ if (flag_ipa_struct_reorg)
|
||||
+ return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
|
||||
+ : new T ();
|
||||
+
|
||||
/* Call gcc_internal_because we do not want to call finalizer for
|
||||
a type T. We call dtor explicitly. */
|
||||
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
|
||||
@@ -115,7 +121,12 @@ protected:
|
||||
if (is_ggc ())
|
||||
ggc_delete (item);
|
||||
else
|
||||
- m_allocator.remove (item);
|
||||
+ {
|
||||
+ if (flag_ipa_struct_reorg)
|
||||
+ delete item;
|
||||
+ else
|
||||
+ m_allocator.remove (item);
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Unregister all call-graph hooks. */
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
|
||||
new file mode 100644
|
||||
index 000000000..273baa9a3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
|
||||
@@ -0,0 +1,31 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
|
||||
+
|
||||
+struct D
|
||||
+{
|
||||
+ int n;
|
||||
+ int c [8];
|
||||
+};
|
||||
+
|
||||
+struct A
|
||||
+{
|
||||
+ int i;
|
||||
+ char *p;
|
||||
+};
|
||||
+
|
||||
+struct B
|
||||
+{
|
||||
+ struct A *a;
|
||||
+ struct D *d;
|
||||
+};
|
||||
+
|
||||
+int dtInsert1 (struct B *b)
|
||||
+{
|
||||
+ struct A a = { 0, 0 };
|
||||
+ struct D *d;
|
||||
+ b->a = &a;
|
||||
+ d = b->d;
|
||||
+ &d->c [d->n];
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
|
||||
new file mode 100644
|
||||
index 000000000..455f9b501
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
|
||||
@@ -0,0 +1,54 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
|
||||
+
|
||||
+typedef struct basic_block_def *basic_block;
|
||||
+typedef struct gimple_seq_node_d *gimple_seq_node;
|
||||
+typedef struct gimple_seq_d *gimple_seq;
|
||||
+typedef struct
|
||||
+{
|
||||
+ gimple_seq_node ptr;
|
||||
+ gimple_seq seq;
|
||||
+ basic_block bb;
|
||||
+} gimple_stmt_iterator;
|
||||
+typedef void *gimple;
|
||||
+extern void exit(int);
|
||||
+struct gimple_seq_node_d
|
||||
+{
|
||||
+ gimple stmt;
|
||||
+ struct gimple_seq_node_d *next;
|
||||
+};
|
||||
+struct gimple_seq_d
|
||||
+{
|
||||
+};
|
||||
+static __inline__ gimple_stmt_iterator
|
||||
+gsi_start (gimple_seq seq)
|
||||
+{
|
||||
+ gimple_stmt_iterator i;
|
||||
+ i.seq = seq;
|
||||
+ return i;
|
||||
+}
|
||||
+static __inline__ unsigned char
|
||||
+gsi_end_p (gimple_stmt_iterator i)
|
||||
+{
|
||||
+ return i.ptr == ((void *)0);
|
||||
+}
|
||||
+static __inline__ void
|
||||
+gsi_next (gimple_stmt_iterator *i)
|
||||
+{
|
||||
+ i->ptr = i->ptr->next;
|
||||
+}
|
||||
+static __inline__ gimple
|
||||
+gsi_stmt (gimple_stmt_iterator i)
|
||||
+{
|
||||
+ return i.ptr->stmt;
|
||||
+}
|
||||
+void
|
||||
+c_warn_unused_result (gimple_seq seq)
|
||||
+{
|
||||
+ gimple_stmt_iterator i;
|
||||
+ for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
|
||||
+ {
|
||||
+ gimple g = gsi_stmt (i);
|
||||
+ if (!g) exit(0);
|
||||
+ }
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
|
||||
new file mode 100644
|
||||
index 000000000..afc0bd86c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
|
||||
@@ -0,0 +1,38 @@
|
||||
+/* { dg-do run } */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+struct gki_elem {
|
||||
+ char *key;
|
||||
+ int idx;
|
||||
+};
|
||||
+
|
||||
+typedef struct {
|
||||
+ struct gki_elem *table;
|
||||
+
|
||||
+ int primelevel;
|
||||
+ int nhash;
|
||||
+ int nkeys;
|
||||
+} GKI;
|
||||
+
|
||||
+void *
|
||||
+sre_malloc(size_t size)
|
||||
+{
|
||||
+ void *ptr = malloc (size);
|
||||
+ return ptr;
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline)) int
|
||||
+GKIStoreKey(GKI *hash)
|
||||
+{
|
||||
+ hash->table = sre_malloc(sizeof(struct gki_elem));
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ GKI *hash = malloc (sizeof(GKI));
|
||||
+ GKIStoreKey(hash);
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
|
||||
new file mode 100644
|
||||
index 000000000..9bcfaf368
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* { dg-do run } */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+typedef struct {
|
||||
+ unsigned char blue;
|
||||
+ unsigned char green;
|
||||
+} Pixel;
|
||||
+
|
||||
+typedef struct {
|
||||
+ unsigned short colormaplength;
|
||||
+ Pixel *colormapdata;
|
||||
+} TargaImage;
|
||||
+
|
||||
+TargaImage *img;
|
||||
+
|
||||
+int main() {
|
||||
+ img = (TargaImage *) malloc( sizeof(TargaImage) );
|
||||
+ if (img->colormaplength > 0) {
|
||||
+ img->colormapdata = (Pixel *) malloc(sizeof(Pixel) * img->colormaplength);
|
||||
+ memset(img->colormapdata, 0, (sizeof(Pixel) * img->colormaplength) );
|
||||
+ }
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
|
||||
new file mode 100644
|
||||
index 000000000..052f4e3bd
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
|
||||
@@ -0,0 +1,54 @@
|
||||
+/* { dg-do run } */
|
||||
+
|
||||
+extern void abort(void);
|
||||
+
|
||||
+struct packed_ushort {
|
||||
+ unsigned short ucs;
|
||||
+} __attribute__((packed));
|
||||
+
|
||||
+struct source {
|
||||
+ int pos, length;
|
||||
+};
|
||||
+
|
||||
+static int flag;
|
||||
+
|
||||
+static void __attribute__((noinline)) fetch(struct source *p)
|
||||
+{
|
||||
+ p->length = 128;
|
||||
+}
|
||||
+
|
||||
+static struct packed_ushort __attribute__((noinline)) next(struct source *p)
|
||||
+{
|
||||
+ struct packed_ushort rv;
|
||||
+
|
||||
+ if (p->pos >= p->length) {
|
||||
+ if (flag) {
|
||||
+ flag = 0;
|
||||
+ fetch(p);
|
||||
+ return next(p);
|
||||
+ }
|
||||
+ flag = 1;
|
||||
+ rv.ucs = 0xffff;
|
||||
+ return rv;
|
||||
+ }
|
||||
+ rv.ucs = 0;
|
||||
+ return rv;
|
||||
+}
|
||||
+
|
||||
+int main(void)
|
||||
+{
|
||||
+ struct source s;
|
||||
+ int i;
|
||||
+
|
||||
+ s.pos = 0;
|
||||
+ s.length = 0;
|
||||
+ flag = 0;
|
||||
+
|
||||
+ for (i = 0; i < 16; i++) {
|
||||
+ struct packed_ushort rv = next(&s);
|
||||
+ if ((i == 0 && rv.ucs != 0xffff)
|
||||
+ || (i > 0 && rv.ucs != 0))
|
||||
+ abort();
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
342
0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
Normal file
342
0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
Normal file
@ -0,0 +1,342 @@
|
||||
From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001
|
||||
From: dingguangya <dingguangya1@huawei.com>
|
||||
Date: Sat, 29 Jul 2023 18:27:10 +0800
|
||||
Subject: [PATCH 18/22] [ccmp] Add another optimization opportunity for ccmp
|
||||
instruction
|
||||
|
||||
Add flag -fccmp2.
|
||||
Enables the use of the ccmp instruction by creating a new conflict
|
||||
relationship for instances where temporary expressions replacement
|
||||
cannot be effectively created.
|
||||
---
|
||||
gcc/ccmp.cc | 33 ++++
|
||||
gcc/ccmp.h | 1 +
|
||||
gcc/common.opt | 4 +
|
||||
gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++
|
||||
gcc/tree-ssa-coalesce.cc | 197 ++++++++++++++++++++++
|
||||
5 files changed, 250 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
|
||||
|
||||
diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
|
||||
index 3db0a264e..e34f3bcc6 100644
|
||||
--- a/gcc/ccmp.cc
|
||||
+++ b/gcc/ccmp.cc
|
||||
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "cfgexpand.h"
|
||||
#include "ccmp.h"
|
||||
#include "predict.h"
|
||||
+#include "gimple-iterator.h"
|
||||
|
||||
/* Check whether T is a simple boolean variable or a SSA name
|
||||
set by a comparison operator in the same basic block. */
|
||||
@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
|
||||
return false;
|
||||
}
|
||||
|
||||
+/* Check whether bb is a potential conditional compare candidate. */
|
||||
+bool
|
||||
+check_ccmp_candidate (basic_block bb)
|
||||
+{
|
||||
+ gimple_stmt_iterator gsi;
|
||||
+ gimple *bb_last_stmt, *stmt;
|
||||
+ tree op0, op1;
|
||||
+
|
||||
+ gsi = gsi_last_bb (bb);
|
||||
+ bb_last_stmt = gsi_stmt (gsi);
|
||||
+
|
||||
+ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
|
||||
+ {
|
||||
+ op0 = gimple_cond_lhs (bb_last_stmt);
|
||||
+ op1 = gimple_cond_rhs (bb_last_stmt);
|
||||
+
|
||||
+ if (TREE_CODE (op0) == SSA_NAME
|
||||
+ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
|
||||
+ && TREE_CODE (op1) == INTEGER_CST
|
||||
+ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
|
||||
+ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
|
||||
+ {
|
||||
+ stmt = SSA_NAME_DEF_STMT (op0);
|
||||
+ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
|
||||
+ {
|
||||
+ return ccmp_candidate_p (stmt);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
/* Extract the comparison we want to do from the tree. */
|
||||
void
|
||||
get_compare_parts (tree t, int *up, rtx_code *rcode,
|
||||
diff --git a/gcc/ccmp.h b/gcc/ccmp.h
|
||||
index 1799d5fed..efe3a1c14 100644
|
||||
--- a/gcc/ccmp.h
|
||||
+++ b/gcc/ccmp.h
|
||||
@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define GCC_CCMP_H
|
||||
|
||||
extern rtx expand_ccmp_expr (gimple *, machine_mode);
|
||||
+extern bool check_ccmp_candidate (basic_block bb);
|
||||
|
||||
#endif /* GCC_CCMP_H */
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 4d91ce8cf..0aa516719 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2017,6 +2017,10 @@ fira-verbose=
|
||||
Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
|
||||
-fira-verbose=<number> Control IRA's level of diagnostic messages.
|
||||
|
||||
+fccmp2
|
||||
+Common Var(flag_ccmp2) Init(0) Optimization
|
||||
+Optimize potential ccmp instruction in complex scenarios.
|
||||
+
|
||||
fivopts
|
||||
Common Var(flag_ivopts) Init(1) Optimization
|
||||
Optimize induction variables on trees.
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
|
||||
new file mode 100644
|
||||
index 000000000..b509ba810
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
|
||||
+
|
||||
+int func (int a, int b, int c)
|
||||
+{
|
||||
+ while(1)
|
||||
+ {
|
||||
+ if(a-- == 0 || b >= c)
|
||||
+ {
|
||||
+ return 1;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
|
||||
diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc
|
||||
index dccf41ab8..195e06428 100644
|
||||
--- a/gcc/tree-ssa-coalesce.cc
|
||||
+++ b/gcc/tree-ssa-coalesce.cc
|
||||
@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "explow.h"
|
||||
#include "tree-dfa.h"
|
||||
#include "stor-layout.h"
|
||||
+#include "ccmp.h"
|
||||
+#include "target.h"
|
||||
+#include "tree-outof-ssa.h"
|
||||
|
||||
/* This set of routines implements a coalesce_list. This is an object which
|
||||
is used to track pairs of ssa_names which are desirable to coalesce
|
||||
@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
|
||||
bitmap_clear (&ptr->live_base_var);
|
||||
}
|
||||
|
||||
+/* Return true if gimple is a copy assignment. */
|
||||
+
|
||||
+static inline bool
|
||||
+gimple_is_assign_copy_p (gimple *gs)
|
||||
+{
|
||||
+ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
|
||||
+ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
|
||||
+ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
|
||||
+}
|
||||
+
|
||||
+#define MAX_CCMP_CONFLICT_NUM 5
|
||||
+
|
||||
+/* Clear high-cost conflict graphs. */
|
||||
+
|
||||
+static void
|
||||
+remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
|
||||
+{
|
||||
+ unsigned x = 0;
|
||||
+ int add_conflict_num = 0;
|
||||
+ bitmap b;
|
||||
+ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
|
||||
+ {
|
||||
+ if (b)
|
||||
+ {
|
||||
+ add_conflict_num++;
|
||||
+ }
|
||||
+ }
|
||||
+ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
|
||||
+ {
|
||||
+ conflict_graph->conflicts.release ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Adding a new conflict graph to the original graph. */
|
||||
+
|
||||
+static void
|
||||
+process_add_graph (live_track *live, basic_block bb,
|
||||
+ ssa_conflicts *conflict_graph)
|
||||
+{
|
||||
+ tree use, def;
|
||||
+ ssa_op_iter iter;
|
||||
+ gimple *first_visit_stmt = NULL;
|
||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
||||
+ gsi_next (&gsi))
|
||||
+ {
|
||||
+ if (gimple_visited_p (gsi_stmt (gsi)))
|
||||
+ {
|
||||
+ first_visit_stmt = gsi_stmt (gsi);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (!first_visit_stmt)
|
||||
+ return;
|
||||
+
|
||||
+ for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||||
+ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
|
||||
+ {
|
||||
+ gimple *stmt = gsi_stmt (gsi);
|
||||
+ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (gimple_is_assign_copy_p (stmt))
|
||||
+ {
|
||||
+ live_track_clear_var (live, gimple_assign_rhs1 (stmt));
|
||||
+ }
|
||||
+ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
|
||||
+ {
|
||||
+ live_track_process_def (live, def, conflict_graph);
|
||||
+ }
|
||||
+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
||||
+ {
|
||||
+ live_track_process_use (live, use);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Build a conflict graph based on ccmp candidate. */
|
||||
+
|
||||
+static void
|
||||
+add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
|
||||
+ tree_live_info_p liveinfo, var_map map, basic_block bb)
|
||||
+{
|
||||
+ live_track *live;
|
||||
+ tree use, def;
|
||||
+ ssa_op_iter iter;
|
||||
+ live = new_live_track (map);
|
||||
+ live_track_init (live, live_on_exit (liveinfo, bb));
|
||||
+
|
||||
+ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
|
||||
+ gcc_assert (gimple_cond_lhs (last_stmt));
|
||||
+
|
||||
+ auto_vec<tree> stack;
|
||||
+ stack.safe_push (gimple_cond_lhs (last_stmt));
|
||||
+ while (!stack.is_empty ())
|
||||
+ {
|
||||
+ tree op = stack.pop ();
|
||||
+ gimple *op_stmt = SSA_NAME_DEF_STMT (op);
|
||||
+ if (!op_stmt || gimple_bb (op_stmt) != bb
|
||||
+ || !is_gimple_assign (op_stmt)
|
||||
+ || !ssa_is_replaceable_p (op_stmt))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (gimple_is_assign_copy_p (op_stmt))
|
||||
+ {
|
||||
+ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
|
||||
+ }
|
||||
+ gimple_set_visited (op_stmt, true);
|
||||
+ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
|
||||
+ {
|
||||
+ live_track_process_def (live, def, conflict_graph);
|
||||
+ }
|
||||
+ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
|
||||
+ {
|
||||
+ stack.safe_push (use);
|
||||
+ live_track_process_use (live, use);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ process_add_graph (live, bb, conflict_graph);
|
||||
+ delete_live_track (live);
|
||||
+ remove_high_cost_graph_for_ccmp (conflict_graph);
|
||||
+}
|
||||
+
|
||||
+/* Determine whether the ccmp conflict graph can be added.
|
||||
+ i.e,
|
||||
+
|
||||
+ ;; basic block 3, loop depth 1
|
||||
+ ;; pred: 2
|
||||
+ ;; 3
|
||||
+ # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
|
||||
+ _7 = b_4 (D) >= c_5 (D);
|
||||
+ _8 = ivtmp.5_10 == 0;
|
||||
+ _9 = _7 | _8;
|
||||
+ ivtmp.5_11 = ivtmp.5_10 - 1;
|
||||
+ if (_9 != 0)
|
||||
+ goto <bb 4>; [10.70%]
|
||||
+ else
|
||||
+ goto <bb 3>; [89.30%]
|
||||
+
|
||||
+ In the above loop, the expression will be replaced:
|
||||
+
|
||||
+ _7 replaced by b_4 (D) >= c_5 (D)
|
||||
+ _8 replaced by ivtmp.5_10 == 0
|
||||
+
|
||||
+ If the current case want use the ccmp instruction, then
|
||||
+
|
||||
+ _9 can replaced by _7 | _8
|
||||
+
|
||||
+ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
|
||||
+ partitions.
|
||||
+
|
||||
+ Now this function can achieve this ability. */
|
||||
+
|
||||
+static void
|
||||
+determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
|
||||
+ var_map map, ssa_conflicts *graph)
|
||||
+{
|
||||
+ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
|
||||
+ return;
|
||||
+ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
|
||||
+ gsi_next (&bsi))
|
||||
+ {
|
||||
+ gimple_set_visited (gsi_stmt (bsi), false);
|
||||
+ }
|
||||
+ ssa_conflicts *ccmp_conflict_graph;
|
||||
+ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
|
||||
+ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
|
||||
+ unsigned x;
|
||||
+ bitmap b;
|
||||
+ if (ccmp_conflict_graph)
|
||||
+ {
|
||||
+ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
|
||||
+ {
|
||||
+ if (!b)
|
||||
+ continue;
|
||||
+ unsigned y = bitmap_first_set_bit (b);
|
||||
+ if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
|
||||
+ {
|
||||
+ ssa_conflicts_add (graph, x, y);
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "potential ccmp: add additional "
|
||||
+ "conflict-ssa : bb[%d] %d:%d\n",
|
||||
+ bb->index, x, y);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ ssa_conflicts_delete (ccmp_conflict_graph);
|
||||
+}
|
||||
|
||||
/* Build a conflict graph based on LIVEINFO. Any partitions which are in the
|
||||
partition view of the var_map liveinfo is based on get entries in the
|
||||
@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
|
||||
live_track_process_use (live, var);
|
||||
}
|
||||
|
||||
+ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
|
||||
+
|
||||
/* If result of a PHI is unused, looping over the statements will not
|
||||
record any conflicts since the def was never live. Since the PHI node
|
||||
is going to be translated out of SSA form, it will insert a copy.
|
||||
--
|
||||
2.33.0
|
||||
|
||||
405
0019-fp-model-Enable-fp-model-on-kunpeng.patch
Normal file
405
0019-fp-model-Enable-fp-model-on-kunpeng.patch
Normal file
@ -0,0 +1,405 @@
|
||||
From 8cdb316a3fe205a3089b9c17aec0442f4d5f75be Mon Sep 17 00:00:00 2001
|
||||
From: bule <bule1@huawei.com>
|
||||
Date: Sun, 27 Aug 2023 16:49:04 +0800
|
||||
Subject: [PATCH 19/22] [fp-model] Enable fp-model on kunpeng
|
||||
|
||||
Enable fp-model options on kunpeng for precision control.
|
||||
---
|
||||
gcc/common.opt | 26 +++++
|
||||
gcc/config/aarch64/aarch64-linux.h | 3 +-
|
||||
gcc/flag-types.h | 9 ++
|
||||
gcc/fortran/options.cc | 8 ++
|
||||
gcc/opts-common.cc | 146 ++++++++++++++++++++++++++++-
|
||||
gcc/opts.cc | 68 ++++++++++++++
|
||||
6 files changed, 256 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 8a0dafc52..f5eef8a45 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1642,6 +1642,32 @@ ffp-int-builtin-inexact
|
||||
Common Var(flag_fp_int_builtin_inexact) Init(1) Optimization
|
||||
Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions.
|
||||
|
||||
+fftz
|
||||
+Common Var(flag_ftz) Optimization
|
||||
+Control fpcr register for flush to zero.
|
||||
+
|
||||
+fp-model=
|
||||
+Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization
|
||||
+-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control.
|
||||
+
|
||||
+Enum
|
||||
+Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(fp_model) String(fast) Value(FP_MODEL_FAST)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT)
|
||||
+
|
||||
+EnumValue
|
||||
+Enum(fp_model) String(strict) Value(FP_MODEL_STRICT)
|
||||
+
|
||||
; Nonzero means don't put addresses of constant functions in registers.
|
||||
; Used for compiling the Unix kernel, where strange substitutions are
|
||||
; done on the assembly output.
|
||||
diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
|
||||
index 5e4553d79..a5cba6391 100644
|
||||
--- a/gcc/config/aarch64/aarch64-linux.h
|
||||
+++ b/gcc/config/aarch64/aarch64-linux.h
|
||||
@@ -50,7 +50,8 @@
|
||||
#define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
|
||||
|
||||
#define GNU_USER_TARGET_MATHFILE_SPEC \
|
||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
|
||||
+ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\
|
||||
+ %{!fno-ftz:crtfastmath.o%s}}"
|
||||
|
||||
#undef ENDFILE_SPEC
|
||||
#define ENDFILE_SPEC \
|
||||
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
|
||||
index 2c8498169..64c64eb32 100644
|
||||
--- a/gcc/flag-types.h
|
||||
+++ b/gcc/flag-types.h
|
||||
@@ -260,6 +260,15 @@ enum fp_contract_mode {
|
||||
FP_CONTRACT_FAST = 2
|
||||
};
|
||||
|
||||
+/* Floating-point precision mode. */
|
||||
+enum fp_model {
|
||||
+ FP_MODEL_NORMAL = 0,
|
||||
+ FP_MODEL_FAST = 1,
|
||||
+ FP_MODEL_PRECISE = 2,
|
||||
+ FP_MODEL_EXCEPT = 3,
|
||||
+ FP_MODEL_STRICT = 4
|
||||
+};
|
||||
+
|
||||
/* Scalar storage order kind. */
|
||||
enum scalar_storage_order_kind {
|
||||
SSO_NATIVE = 0,
|
||||
diff --git a/gcc/fortran/options.cc b/gcc/fortran/options.cc
|
||||
index d0fa634f1..3eb99a84a 100644
|
||||
--- a/gcc/fortran/options.cc
|
||||
+++ b/gcc/fortran/options.cc
|
||||
@@ -243,6 +243,7 @@ form_from_filename (const char *filename)
|
||||
return f_form;
|
||||
}
|
||||
|
||||
+static void gfc_handle_fpe_option (const char *arg, bool trap);
|
||||
|
||||
/* Finalize commandline options. */
|
||||
|
||||
@@ -286,6 +287,13 @@ gfc_post_options (const char **pfilename)
|
||||
if (flag_protect_parens == -1)
|
||||
flag_protect_parens = !optimize_fast;
|
||||
|
||||
+ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */
|
||||
+ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT)
|
||||
+ {
|
||||
+ gfc_handle_fpe_option ("all", false);
|
||||
+ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true);
|
||||
+ }
|
||||
+
|
||||
/* -Ofast sets implies -fstack-arrays unless an explicit size is set for
|
||||
stack arrays. */
|
||||
if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2)
|
||||
diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc
|
||||
index 7c07d5046..489a6e02a 100644
|
||||
--- a/gcc/opts-common.cc
|
||||
+++ b/gcc/opts-common.cc
|
||||
@@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "spellcheck.h"
|
||||
#include "opts-jobserver.h"
|
||||
|
||||
-static void prune_options (struct cl_decoded_option **, unsigned int *);
|
||||
+static void prune_options (struct cl_decoded_option **, unsigned int *,
|
||||
+ unsigned int);
|
||||
|
||||
/* An option that is undocumented, that takes a joined argument, and
|
||||
that doesn't fit any of the classes of uses (language/common,
|
||||
@@ -1091,7 +1092,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
|
||||
|
||||
*decoded_options = opt_array;
|
||||
*decoded_options_count = num_decoded_options;
|
||||
- prune_options (decoded_options, decoded_options_count);
|
||||
+ prune_options (decoded_options, decoded_options_count, lang_mask);
|
||||
}
|
||||
|
||||
/* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the
|
||||
@@ -1112,11 +1113,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx)
|
||||
return false;
|
||||
}
|
||||
|
||||
+/* Check whether opt_idx exists in decoded_options array between index
|
||||
+ start and end. If found, return its index in decoded_options,
|
||||
+ else return end. */
|
||||
+static unsigned int
|
||||
+find_opt_idx (const struct cl_decoded_option *decoded_options,
|
||||
+ unsigned int decoded_options_count,
|
||||
+ unsigned int start, unsigned int end, unsigned int opt_idx)
|
||||
+{
|
||||
+ gcc_assert (end <= decoded_options_count);
|
||||
+ gcc_assert (opt_idx < cl_options_count);
|
||||
+ unsigned int k;
|
||||
+ for (k = start; k < end; k++)
|
||||
+ {
|
||||
+ if (decoded_options[k].opt_index == opt_idx)
|
||||
+ {
|
||||
+ return k;
|
||||
+ }
|
||||
+ }
|
||||
+ return k;
|
||||
+}
|
||||
+
|
||||
+/* remove the opt_index element from decoded_options array. */
|
||||
+static unsigned int
|
||||
+remove_option (struct cl_decoded_option *decoded_options,
|
||||
+ unsigned int decoded_options_count,
|
||||
+ unsigned int opt_index)
|
||||
+{
|
||||
+ gcc_assert (opt_index < decoded_options_count);
|
||||
+ unsigned int i;
|
||||
+ for (i = opt_index; i < decoded_options_count - 1; i++)
|
||||
+ {
|
||||
+ decoded_options[i] = decoded_options[i + 1];
|
||||
+ }
|
||||
+ return decoded_options_count - 1;
|
||||
+}
|
||||
+
|
||||
+/* Handle the priority between fp-model, Ofast, and
|
||||
+ ffast-math. */
|
||||
+static unsigned int
|
||||
+handle_fp_model_driver (struct cl_decoded_option *decoded_options,
|
||||
+ unsigned int decoded_options_count,
|
||||
+ unsigned int fp_model_index,
|
||||
+ unsigned int lang_mask)
|
||||
+{
|
||||
+ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index];
|
||||
+ enum fp_model model = (enum fp_model) fp_model_opt.value;
|
||||
+ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT)
|
||||
+ {
|
||||
+ /* If found Ofast, override Ofast with O3. */
|
||||
+ unsigned int Ofast_index;
|
||||
+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
|
||||
+ 0, decoded_options_count, OPT_Ofast);
|
||||
+ while (Ofast_index != decoded_options_count)
|
||||
+ {
|
||||
+ const char *tmp_argv = "-O3";
|
||||
+ decode_cmdline_option (&tmp_argv, lang_mask,
|
||||
+ &decoded_options[Ofast_index]);
|
||||
+ warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs",
|
||||
+ fp_model_opt.orig_option_with_args_text);
|
||||
+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
|
||||
+ 0, decoded_options_count, OPT_Ofast);
|
||||
+ }
|
||||
+ /* If found ffast-math before fp-model=precise/strict
|
||||
+ it, cancel it. */
|
||||
+ unsigned int ffast_math_index;
|
||||
+ ffast_math_index
|
||||
+ = find_opt_idx (decoded_options, decoded_options_count, 0,
|
||||
+ fp_model_index, OPT_ffast_math);
|
||||
+ if (ffast_math_index != fp_model_index)
|
||||
+ {
|
||||
+ decoded_options_count
|
||||
+ = remove_option (decoded_options, decoded_options_count,
|
||||
+ ffast_math_index);
|
||||
+ warning (0, "%<-ffast-math%> before %qs is canceled",
|
||||
+ fp_model_opt.orig_option_with_args_text);
|
||||
+ }
|
||||
+ }
|
||||
+ if (model == FP_MODEL_FAST)
|
||||
+ {
|
||||
+ /* If found -fno-fast-math after fp-model=fast, cancel this one. */
|
||||
+ unsigned int fno_fast_math_index;
|
||||
+ fno_fast_math_index
|
||||
+ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index,
|
||||
+ decoded_options_count, OPT_ffast_math);
|
||||
+ if (fno_fast_math_index != decoded_options_count
|
||||
+ && decoded_options[fno_fast_math_index].value == 0)
|
||||
+ {
|
||||
+ decoded_options_count
|
||||
+ = remove_option (decoded_options, decoded_options_count,
|
||||
+ fp_model_index);
|
||||
+ warning (0,
|
||||
+ "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled");
|
||||
+ }
|
||||
+ }
|
||||
+ return decoded_options_count;
|
||||
+}
|
||||
+
|
||||
/* Filter out options canceled by the ones after them. */
|
||||
|
||||
static void
|
||||
prune_options (struct cl_decoded_option **decoded_options,
|
||||
- unsigned int *decoded_options_count)
|
||||
+ unsigned int *decoded_options_count,
|
||||
+ unsigned int lang_mask)
|
||||
{
|
||||
unsigned int old_decoded_options_count = *decoded_options_count;
|
||||
struct cl_decoded_option *old_decoded_options = *decoded_options;
|
||||
@@ -1127,7 +1226,12 @@ prune_options (struct cl_decoded_option **decoded_options,
|
||||
const struct cl_option *option;
|
||||
unsigned int fdiagnostics_color_idx = 0;
|
||||
|
||||
+ if (!diagnostic_ready_p ())
|
||||
+ diagnostic_initialize (global_dc, 0);
|
||||
+
|
||||
/* Remove arguments which are negated by others after them. */
|
||||
+
|
||||
+ unsigned int fp_model_index = old_decoded_options_count;
|
||||
new_decoded_options_count = 0;
|
||||
for (i = 0; i < old_decoded_options_count; i++)
|
||||
{
|
||||
@@ -1151,6 +1255,34 @@ prune_options (struct cl_decoded_option **decoded_options,
|
||||
fdiagnostics_color_idx = i;
|
||||
continue;
|
||||
|
||||
+ case OPT_fp_model_:
|
||||
+ /* Only the last fp-model option will take effect. */
|
||||
+ unsigned int next_fp_model_idx;
|
||||
+ next_fp_model_idx = find_opt_idx (old_decoded_options,
|
||||
+ old_decoded_options_count,
|
||||
+ i + 1,
|
||||
+ old_decoded_options_count,
|
||||
+ OPT_fp_model_);
|
||||
+ if (next_fp_model_idx != old_decoded_options_count)
|
||||
+ {
|
||||
+ /* Found more than one fp-model, cancel this one. */
|
||||
+ if (old_decoded_options[i].value
|
||||
+ != old_decoded_options[next_fp_model_idx].value)
|
||||
+ {
|
||||
+ warning (0, "%qs is overrided by %qs",
|
||||
+ old_decoded_options[i].
|
||||
+ orig_option_with_args_text,
|
||||
+ old_decoded_options[next_fp_model_idx].
|
||||
+ orig_option_with_args_text);
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Found the last fp-model option. */
|
||||
+ fp_model_index = new_decoded_options_count;
|
||||
+ }
|
||||
+ /* FALLTHRU. */
|
||||
default:
|
||||
gcc_assert (opt_idx < cl_options_count);
|
||||
option = &cl_options[opt_idx];
|
||||
@@ -1190,6 +1322,14 @@ keep:
|
||||
break;
|
||||
}
|
||||
}
|
||||
+ if (fp_model_index < new_decoded_options_count)
|
||||
+ {
|
||||
+ new_decoded_options_count
|
||||
+ = handle_fp_model_driver (new_decoded_options,
|
||||
+ new_decoded_options_count,
|
||||
+ fp_model_index,
|
||||
+ lang_mask);
|
||||
+ }
|
||||
|
||||
if (fdiagnostics_color_idx >= 1)
|
||||
{
|
||||
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||
index a97630d1c..b522ed7e2 100644
|
||||
--- a/gcc/opts.cc
|
||||
+++ b/gcc/opts.cc
|
||||
@@ -328,6 +328,7 @@ static void set_debug_level (uint32_t dinfo, int extended,
|
||||
struct gcc_options *opts_set,
|
||||
location_t loc);
|
||||
static void set_fast_math_flags (struct gcc_options *opts, int set);
|
||||
+static void set_fp_model_flags (struct gcc_options *opts, int set);
|
||||
static void decode_d_option (const char *arg, struct gcc_options *opts,
|
||||
location_t loc, diagnostic_context *dc);
|
||||
static void set_unsafe_math_optimizations_flags (struct gcc_options *opts,
|
||||
@@ -2857,6 +2858,10 @@ common_handle_option (struct gcc_options *opts,
|
||||
set_fast_math_flags (opts, value);
|
||||
break;
|
||||
|
||||
+ case OPT_fp_model_:
|
||||
+ set_fp_model_flags (opts, value);
|
||||
+ break;
|
||||
+
|
||||
case OPT_funsafe_math_optimizations:
|
||||
set_unsafe_math_optimizations_flags (opts, value);
|
||||
break;
|
||||
@@ -3266,6 +3271,69 @@ set_fast_math_flags (struct gcc_options *opts, int set)
|
||||
}
|
||||
}
|
||||
|
||||
+/* Handle fp-model options. */
|
||||
+static void
|
||||
+set_fp_model_flags (struct gcc_options *opts, int set)
|
||||
+{
|
||||
+ enum fp_model model = (enum fp_model) set;
|
||||
+ switch (model)
|
||||
+ {
|
||||
+ case FP_MODEL_FAST:
|
||||
+ /* Equivalent to open ffast-math. */
|
||||
+ set_fast_math_flags (opts, 1);
|
||||
+ break;
|
||||
+
|
||||
+ case FP_MODEL_PRECISE:
|
||||
+ /* Equivalent to close ffast-math. */
|
||||
+ set_fast_math_flags (opts, 0);
|
||||
+ /* Turn on -frounding-math -fsignaling-nans. */
|
||||
+ if (!opts->frontend_set_flag_signaling_nans)
|
||||
+ opts->x_flag_signaling_nans = 1;
|
||||
+ if (!opts->frontend_set_flag_rounding_math)
|
||||
+ opts->x_flag_rounding_math = 1;
|
||||
+ opts->x_flag_expensive_optimizations = 0;
|
||||
+ opts->x_flag_code_hoisting = 0;
|
||||
+ opts->x_flag_predictive_commoning = 0;
|
||||
+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
|
||||
+ break;
|
||||
+
|
||||
+ case FP_MODEL_EXCEPT:
|
||||
+ if (!opts->frontend_set_flag_signaling_nans)
|
||||
+ opts->x_flag_signaling_nans = 1;
|
||||
+ if (!opts->frontend_set_flag_errno_math)
|
||||
+ opts->x_flag_errno_math = 1;
|
||||
+ if (!opts->frontend_set_flag_trapping_math)
|
||||
+ opts->x_flag_trapping_math = 1;
|
||||
+ opts->x_flag_fp_int_builtin_inexact = 1;
|
||||
+ /* Also turn on ffpe-trap in fortran. */
|
||||
+ break;
|
||||
+
|
||||
+ case FP_MODEL_STRICT:
|
||||
+ /* Turn on both precise and except. */
|
||||
+ if (!opts->frontend_set_flag_signaling_nans)
|
||||
+ opts->x_flag_signaling_nans = 1;
|
||||
+ if (!opts->frontend_set_flag_rounding_math)
|
||||
+ opts->x_flag_rounding_math = 1;
|
||||
+ opts->x_flag_expensive_optimizations = 0;
|
||||
+ opts->x_flag_code_hoisting = 0;
|
||||
+ opts->x_flag_predictive_commoning = 0;
|
||||
+ if (!opts->frontend_set_flag_errno_math)
|
||||
+ opts->x_flag_errno_math = 1;
|
||||
+ if (!opts->frontend_set_flag_trapping_math)
|
||||
+ opts->x_flag_trapping_math = 1;
|
||||
+ opts->x_flag_fp_int_builtin_inexact = 1;
|
||||
+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
|
||||
+ break;
|
||||
+
|
||||
+ case FP_MODEL_NORMAL:
|
||||
+ /* Do nothing. */
|
||||
+ break;
|
||||
+
|
||||
+ default:
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* When -funsafe-math-optimizations is set the following
|
||||
flags are set as well. */
|
||||
static void
|
||||
--
|
||||
2.33.0
|
||||
|
||||
317
0020-simdmath-Enable-simdmath-on-kunpeng.patch
Normal file
317
0020-simdmath-Enable-simdmath-on-kunpeng.patch
Normal file
@ -0,0 +1,317 @@
|
||||
From 49ad10199dbdda2c36850a2617f5c985977939c5 Mon Sep 17 00:00:00 2001
|
||||
From: bule <bule1@huawei.com>
|
||||
Date: Sun, 27 Aug 2023 16:49:42 +0800
|
||||
Subject: [PATCH 20/22] [simdmath] Enable simdmath on kunpeng
|
||||
|
||||
This enable simd math function supported by libmathlib on fortran/c/c++.
|
||||
Use -fsimdmath to turn on the generation of simdmath function. The
|
||||
supported functions can be found in simdmath.h. Add more simd declaration
|
||||
if you need more kinds of math functions. -msimdmath-64 is used to turn
|
||||
on 64-bit simd math functions which is not supported by libmathlib.
|
||||
Therefore, this option is default to off.
|
||||
---
|
||||
gcc/c-family/c-opts.cc | 4 ++
|
||||
gcc/common.opt | 4 ++
|
||||
gcc/config/aarch64/aarch64.cc | 9 ++++-
|
||||
gcc/config/aarch64/aarch64.opt | 6 +++
|
||||
gcc/fortran/scanner.cc | 3 ++
|
||||
gcc/opts.cc | 17 ++++++++
|
||||
.../gcc.target/aarch64/simd_pcs_attribute-3.c | 2 +-
|
||||
libgomp/Makefile.am | 4 +-
|
||||
libgomp/Makefile.in | 10 +++--
|
||||
libgomp/configure | 4 +-
|
||||
libgomp/configure.ac | 2 +-
|
||||
libgomp/simdmath.h.in | 40 +++++++++++++++++++
|
||||
libgomp/simdmath_f.h.in | 11 +++++
|
||||
13 files changed, 106 insertions(+), 10 deletions(-)
|
||||
create mode 100644 libgomp/simdmath.h.in
|
||||
create mode 100644 libgomp/simdmath_f.h.in
|
||||
|
||||
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
|
||||
index a341a0617..5134f6128 100644
|
||||
--- a/gcc/c-family/c-opts.cc
|
||||
+++ b/gcc/c-family/c-opts.cc
|
||||
@@ -801,6 +801,10 @@ c_common_post_options (const char **pfilename)
|
||||
if (cpp_opts->deps.style == DEPS_NONE)
|
||||
check_deps_environment_vars ();
|
||||
|
||||
+ if (flag_simdmath)
|
||||
+ {
|
||||
+ defer_opt (OPT_include, "simdmath.h");
|
||||
+ }
|
||||
handle_deferred_opts ();
|
||||
|
||||
sanitize_cpp_opts ();
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index f5eef8a45..e9d580957 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2125,6 +2125,10 @@ fmath-errno
|
||||
Common Var(flag_errno_math) Init(1) Optimization SetByCombined
|
||||
Set errno after built-in math functions.
|
||||
|
||||
+fsimdmath
|
||||
+Common Var(flag_simdmath) Init(0) Optimization
|
||||
+Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd.
|
||||
+
|
||||
fmax-errors=
|
||||
Common Joined RejectNegative UInteger Var(flag_max_errors)
|
||||
-fmax-errors=<number> Maximum number of errors to report.
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 226dc9dff..a3da4ca30 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -26904,8 +26904,13 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
|
||||
elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
|
||||
if (known_eq (clonei->simdlen, 0U))
|
||||
{
|
||||
- count = 2;
|
||||
- vec_bits = (num == 0 ? 64 : 128);
|
||||
+ /* Currently mathlib or sleef hasn't provide function for V2SF mode
|
||||
+ simdclone of single precision functions. (e.g._ZCVnN2v_expf)
|
||||
+ Therefore this mode is disabled by default to avoid link error.
|
||||
+ Use -msimdmath-64 option to enable this mode. */
|
||||
+ count = flag_simdmath_64 ? 2 : 1;
|
||||
+ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128);
|
||||
+
|
||||
clonei->simdlen = exact_div (vec_bits, elt_bits);
|
||||
}
|
||||
else
|
||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
index 92220b26e..a64b927e9 100644
|
||||
--- a/gcc/config/aarch64/aarch64.opt
|
||||
+++ b/gcc/config/aarch64/aarch64.opt
|
||||
@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for
|
||||
single precision and to 32 bits for double precision.
|
||||
If enabled, it implies -mlow-precision-recip-sqrt.
|
||||
|
||||
+msimdmath-64
|
||||
+Target Var(flag_simdmath_64) Optimization
|
||||
+Allow compiler to generate V2SF 64 bits simdclone of math functions,
|
||||
+which is not currently supported in mathlib or sleef.
|
||||
+Therefore this option is disabled by default.
|
||||
+
|
||||
mlow-precision-div
|
||||
Target Var(flag_mlow_precision_div) Optimization
|
||||
Enable the division approximation. Enabling this reduces
|
||||
diff --git a/gcc/fortran/scanner.cc b/gcc/fortran/scanner.cc
|
||||
index 2dff25147..63e262f51 100644
|
||||
--- a/gcc/fortran/scanner.cc
|
||||
+++ b/gcc/fortran/scanner.cc
|
||||
@@ -2769,6 +2769,9 @@ gfc_new_file (void)
|
||||
if (flag_pre_include != NULL)
|
||||
load_file (flag_pre_include, NULL, false);
|
||||
|
||||
+ if (flag_simdmath)
|
||||
+ load_file ("simdmath_f.h", NULL, false);
|
||||
+
|
||||
if (gfc_cpp_enabled ())
|
||||
{
|
||||
gfc_cpp_preprocess (gfc_source_file);
|
||||
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||
index b522ed7e2..c3cc2c169 100644
|
||||
--- a/gcc/opts.cc
|
||||
+++ b/gcc/opts.cc
|
||||
@@ -322,6 +322,7 @@ static const char undocumented_msg[] = N_("This option lacks documentation.");
|
||||
static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed.");
|
||||
|
||||
typedef char *char_p; /* For DEF_VEC_P. */
|
||||
+static void set_simdmath_flags (struct gcc_options *opts, int set);
|
||||
|
||||
static void set_debug_level (uint32_t dinfo, int extended,
|
||||
const char *arg, struct gcc_options *opts,
|
||||
@@ -2850,6 +2851,10 @@ common_handle_option (struct gcc_options *opts,
|
||||
dc->min_margin_width = value;
|
||||
break;
|
||||
|
||||
+ case OPT_fsimdmath:
|
||||
+ set_simdmath_flags (opts, value);
|
||||
+ break;
|
||||
+
|
||||
case OPT_fdump_:
|
||||
/* Deferred. */
|
||||
break;
|
||||
@@ -3227,6 +3232,18 @@ common_handle_option (struct gcc_options *opts,
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* The following routines are used to set -fno-math-errno and -fopenmp-simd
|
||||
+ to enable vector mathlib. */
|
||||
+static void
|
||||
+set_simdmath_flags (struct gcc_options *opts, int set)
|
||||
+{
|
||||
+ if (set)
|
||||
+ {
|
||||
+ opts->x_flag_errno_math = 0;
|
||||
+ opts->x_flag_openmp_simd = 1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Used to set the level of strict aliasing warnings in OPTS,
|
||||
when no level is specified (i.e., when -Wstrict-aliasing, and not
|
||||
-Wstrict-aliasing=level was given).
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
|
||||
index 95f6a6803..e0e0efa9d 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-Ofast" } */
|
||||
+/* { dg-options "-Ofast -msimdmath-64" } */
|
||||
|
||||
__attribute__ ((__simd__))
|
||||
__attribute__ ((__nothrow__ , __leaf__ , __const__))
|
||||
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
|
||||
index f8b2a06d6..8dfa160d6 100644
|
||||
--- a/libgomp/Makefile.am
|
||||
+++ b/libgomp/Makefile.am
|
||||
@@ -75,10 +75,10 @@ libgomp_la_SOURCES += openacc.f90
|
||||
endif
|
||||
|
||||
nodist_noinst_HEADERS = libgomp_f.h
|
||||
-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
|
||||
+nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
|
||||
if USE_FORTRAN
|
||||
nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
|
||||
- openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
|
||||
+ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
|
||||
endif
|
||||
|
||||
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
|
||||
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
|
||||
index 6f0cb7161..90fc326f0 100644
|
||||
--- a/libgomp/Makefile.in
|
||||
+++ b/libgomp/Makefile.in
|
||||
@@ -147,7 +147,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
||||
configure.lineno config.status.lineno
|
||||
mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
|
||||
CONFIG_HEADER = config.h
|
||||
-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \
|
||||
+CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \
|
||||
libgomp.spec
|
||||
CONFIG_CLEAN_VPATH_FILES =
|
||||
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
||||
@@ -583,9 +583,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
|
||||
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS)
|
||||
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static
|
||||
nodist_noinst_HEADERS = libgomp_f.h
|
||||
-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
|
||||
+nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
|
||||
@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
|
||||
-@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
|
||||
+@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
|
||||
|
||||
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
|
||||
LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
|
||||
@@ -676,6 +676,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $@
|
||||
omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $@
|
||||
+simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in
|
||||
+ cd $(top_builddir) && $(SHELL) ./config.status $@
|
||||
+simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in
|
||||
+ cd $(top_builddir) && $(SHELL) ./config.status $@
|
||||
omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $@
|
||||
libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in
|
||||
diff --git a/libgomp/configure b/libgomp/configure
|
||||
index 85fdb4d3f..471c957b7 100755
|
||||
--- a/libgomp/configure
|
||||
+++ b/libgomp/configure
|
||||
@@ -17064,7 +17064,7 @@ fi
|
||||
|
||||
|
||||
|
||||
-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h"
|
||||
+ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h"
|
||||
|
||||
ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
|
||||
|
||||
@@ -18215,6 +18215,8 @@ do
|
||||
"libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
|
||||
"omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;;
|
||||
"omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;;
|
||||
+ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;;
|
||||
+ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;;
|
||||
"omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;;
|
||||
"libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;;
|
||||
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
|
||||
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
|
||||
index a9b1f3973..1f81a0d30 100644
|
||||
--- a/libgomp/configure.ac
|
||||
+++ b/libgomp/configure.ac
|
||||
@@ -472,7 +472,7 @@ CFLAGS="$save_CFLAGS"
|
||||
# Determine what GCC version number to use in filesystem paths.
|
||||
GCC_BASE_VER
|
||||
|
||||
-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
|
||||
+AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h)
|
||||
AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
|
||||
AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
|
||||
AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp])
|
||||
diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in
|
||||
new file mode 100644
|
||||
index 000000000..ab91a4ec3
|
||||
--- /dev/null
|
||||
+++ b/libgomp/simdmath.h.in
|
||||
@@ -0,0 +1,40 @@
|
||||
+#ifdef __cplusplus
|
||||
+extern "C" {
|
||||
+#endif
|
||||
+
|
||||
+#pragma omp declare simd simdlen(2) notinbranch
|
||||
+double cos (double x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(4) notinbranch
|
||||
+float cosf (float x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(2) notinbranch
|
||||
+double sin (double x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(4) notinbranch
|
||||
+float sinf (float x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(2) notinbranch
|
||||
+double exp (double x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(4) notinbranch
|
||||
+float expf (float x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(2) notinbranch
|
||||
+double log (double x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(4) notinbranch
|
||||
+float logf (float x);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(2) notinbranch
|
||||
+double pow (double x, double y);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(4) notinbranch
|
||||
+float powf (float x, float y);
|
||||
+
|
||||
+#pragma omp declare simd simdlen(4) notinbranch
|
||||
+float exp2f (float x);
|
||||
+
|
||||
+#ifdef __cplusplus
|
||||
+} // extern "C"
|
||||
+#endif
|
||||
diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in
|
||||
new file mode 100644
|
||||
index 000000000..550595015
|
||||
--- /dev/null
|
||||
+++ b/libgomp/simdmath_f.h.in
|
||||
@@ -0,0 +1,11 @@
|
||||
+!GCC$ builtin (cos) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (cosf) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (sin) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (sinf) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (exp) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (expf) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (exp2f) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (log) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (logf) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (pow) attributes simd (notinbranch)
|
||||
+!GCC$ builtin (powf) attributes simd (notinbranch)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
5739
0021-StructReorderFields-Structure-reorder-fields.patch
Normal file
5739
0021-StructReorderFields-Structure-reorder-fields.patch
Normal file
File diff suppressed because it is too large
Load Diff
1753
0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
Normal file
1753
0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
Normal file
File diff suppressed because it is too large
Load Diff
52
gcc.spec
52
gcc.spec
@ -2,7 +2,7 @@
|
||||
%global gcc_major 12
|
||||
# Note, gcc_release must be integer, if you want to add suffixes to
|
||||
# %%{release}, append them after %%{gcc_release} on Release: line.
|
||||
%global gcc_release 7
|
||||
%global gcc_release 8
|
||||
|
||||
%global _unpackaged_files_terminate_build 0
|
||||
%global _performance_build 1
|
||||
@ -136,12 +136,27 @@ Provides: bundled(libbacktrace)
|
||||
Provides: bundled(libffi)
|
||||
Provides: gcc(major) = %{gcc_major}
|
||||
|
||||
Patch0: 0000-Version-Set-version-to-12.3.1.patch
|
||||
Patch1: 0001-CONFIG-Regenerate-configure-file.patch
|
||||
Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch
|
||||
Patch3: 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch
|
||||
Patch4: 0004-Enable-small-loop-unrolling-for-O2.patch
|
||||
Patch5: 0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch
|
||||
Patch1: 0001-Version-Set-version-to-12.3.1.patch
|
||||
Patch2: 0002-RISCV-Backport-inline-subword-atomic-patches.patch
|
||||
Patch3: 0003-CONFIG-Regenerate-configure-file.patch
|
||||
Patch4: 0004-libquadmath-Enable-libquadmath-on-kunpeng.patch
|
||||
Patch6: 0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch
|
||||
Patch7: 0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch
|
||||
Patch8: 0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch
|
||||
Patch9: 0009-MULL64-Disable-mull64-transformation-by-default.patch
|
||||
Patch10: 0010-Version-Clear-DATESTAMP_s.patch
|
||||
Patch11: 0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch
|
||||
Patch12: 0012-Enable-small-loop-unrolling-for-O2.patch
|
||||
Patch13: 0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch
|
||||
Patch14: 0014-Array-widen-compare-Add-a-new-optimization-for-array.patch
|
||||
Patch15: 0015-Backport-Structure-reorganization-optimization.patch
|
||||
Patch16: 0016-CompleteStructRelayout-Complete-Structure-Relayout.patch
|
||||
Patch17: 0017-StructReorg-Some-bugfix-for-structure-reorganization.patch
|
||||
Patch18: 0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
|
||||
Patch19: 0019-fp-model-Enable-fp-model-on-kunpeng.patch
|
||||
Patch20: 0020-simdmath-Enable-simdmath-on-kunpeng.patch
|
||||
Patch21: 0021-StructReorderFields-Structure-reorder-fields.patch
|
||||
Patch22: 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
|
||||
|
||||
# On ARM EABI systems, we do want -gnueabi to be part of the
|
||||
# target triple.
|
||||
@ -609,12 +624,27 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%prep
|
||||
%setup -q -n gcc-12.3.0
|
||||
|
||||
%patch0 -p1
|
||||
%patch1 -p1
|
||||
%patch2 -p1
|
||||
%patch3 -p1
|
||||
%patch4 -p1
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
%patch7 -p1
|
||||
%patch8 -p1
|
||||
%patch9 -p1
|
||||
%patch10 -p1
|
||||
%patch11 -p1
|
||||
%patch12 -p1
|
||||
%patch13 -p1
|
||||
%patch14 -p1
|
||||
%patch15 -p1
|
||||
%patch16 -p1
|
||||
%patch17 -p1
|
||||
%patch18 -p1
|
||||
%patch19 -p1
|
||||
%patch20 -p1
|
||||
%patch21 -p1
|
||||
%patch22 -p1
|
||||
|
||||
echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
|
||||
|
||||
@ -2718,6 +2748,10 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Tue Aug 29 2023 huangxiaoquan <huangxiaoquan1@huawei.com> 12.3.1-8
|
||||
- Type: Sync
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Fri Aug 11 2023 Hongyu Wang <hongyu.wang@intel.com> 12.3.1-7
|
||||
- Type:Sync
|
||||
- i386: Only enable small loop unrolling in backend [PR 107692].
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user