!315 [Init] Init GCC 12.3.0 repository
From: @huang-xiaoquan Reviewed-by: @eastb233 Signed-off-by: @eastb233
This commit is contained in:
commit
577a463f57
19
0000-Version-Set-version-to-12.3.1.patch
Normal file
19
0000-Version-Set-version-to-12.3.1.patch
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
From 73ee6351353b036f466ba1aab9a9e7d7865bf972 Mon Sep 17 00:00:00 2001
|
||||||
|
From: eastb233 <xiezhiheng@huawei.com>
|
||||||
|
Date: Tue, 11 Jul 2023 16:07:51 +0800
|
||||||
|
Subject: [PATCH] [Version] Set version to 12.3.1
|
||||||
|
|
||||||
|
---
|
||||||
|
gcc/BASE-VER | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/BASE-VER b/gcc/BASE-VER
|
||||||
|
index 4d23cb8e0..9c028e25d 100644
|
||||||
|
--- a/gcc/BASE-VER
|
||||||
|
+++ b/gcc/BASE-VER
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-12.3.0
|
||||||
|
+12.3.1
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -1,473 +0,0 @@
|
|||||||
From 85740d3cc56fda699beae689b5d73233d16097af Mon Sep 17 00:00:00 2001
|
|
||||||
From: bule <bule1@huawei.com>
|
|
||||||
Date: Thu, 8 Jul 2021 11:52:47 +0800
|
|
||||||
Subject: [PATCH 01/13] [libquadmath] Enable libquadmath on kunpeng
|
|
||||||
|
|
||||||
This enable libquadmath on kunpeng platform to convenient
|
|
||||||
users that migrating from x86 platform. libquadmath uses "__float128"
|
|
||||||
as quad precision floating point type and with math functions with "q"
|
|
||||||
suffix like "cosq". For those who do not need to adapt to x86 platform,
|
|
||||||
you can use "long double" as quad precision floating point type and math
|
|
||||||
functions with "l" suffix like "cosl" in libm for quad precision math.
|
|
||||||
|
|
||||||
diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in
|
|
||||||
index 8c011212258..66df9c922f8 100644
|
|
||||||
--- a/libquadmath/Makefile.in
|
|
||||||
+++ b/libquadmath/Makefile.in
|
|
||||||
@@ -90,7 +90,7 @@ POST_UNINSTALL = :
|
|
||||||
build_triplet = @build@
|
|
||||||
host_triplet = @host@
|
|
||||||
target_triplet = @target@
|
|
||||||
-@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
|
|
||||||
+#libquadmath_la_DEPENDENCIES =
|
|
||||||
subdir = .
|
|
||||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
|
||||||
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
|
|
||||||
@@ -147,68 +147,68 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
|
|
||||||
"$(DESTDIR)$(libsubincludedir)"
|
|
||||||
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
|
|
||||||
am__dirstamp = $(am__leading_dot)dirstamp
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo
|
|
||||||
+am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
|
|
||||||
+ math/acoshq.lo math/fmodq.lo \
|
|
||||||
+ math/acosq.lo math/frexpq.lo \
|
|
||||||
+ math/rem_pio2q.lo math/asinhq.lo \
|
|
||||||
+ math/hypotq.lo math/remainderq.lo \
|
|
||||||
+ math/asinq.lo math/rintq.lo \
|
|
||||||
+ math/atan2q.lo math/isinfq.lo \
|
|
||||||
+ math/roundq.lo math/atanhq.lo \
|
|
||||||
+ math/isnanq.lo math/scalblnq.lo \
|
|
||||||
+ math/atanq.lo math/j0q.lo \
|
|
||||||
+ math/scalbnq.lo math/cbrtq.lo \
|
|
||||||
+ math/j1q.lo math/signbitq.lo \
|
|
||||||
+ math/ceilq.lo math/jnq.lo \
|
|
||||||
+ math/sincos_table.lo math/complex.lo \
|
|
||||||
+ math/ldexpq.lo math/sincosq.lo \
|
|
||||||
+ math/copysignq.lo math/lgammaq.lo \
|
|
||||||
+ math/sincosq_kernel.lo math/coshq.lo \
|
|
||||||
+ math/llroundq.lo math/sinhq.lo \
|
|
||||||
+ math/cosq.lo math/log10q.lo \
|
|
||||||
+ math/sinq.lo math/cosq_kernel.lo \
|
|
||||||
+ math/log1pq.lo math/sinq_kernel.lo \
|
|
||||||
+ math/erfq.lo math/logq.lo \
|
|
||||||
+ math/sqrtq.lo math/expm1q.lo \
|
|
||||||
+ math/lroundq.lo math/tanhq.lo \
|
|
||||||
+ math/expq.lo math/modfq.lo \
|
|
||||||
+ math/tanq.lo math/fabsq.lo \
|
|
||||||
+ math/nanq.lo math/tgammaq.lo \
|
|
||||||
+ math/finiteq.lo math/nextafterq.lo \
|
|
||||||
+ math/truncq.lo math/floorq.lo \
|
|
||||||
+ math/powq.lo math/fmaq.lo \
|
|
||||||
+ math/logbq.lo math/exp2q.lo \
|
|
||||||
+ math/issignalingq.lo \
|
|
||||||
+ math/lgammaq_neg.lo \
|
|
||||||
+ math/lgammaq_product.lo \
|
|
||||||
+ math/tanq_kernel.lo \
|
|
||||||
+ math/tgammaq_product.lo \
|
|
||||||
+ math/casinhq_kernel.lo math/cacoshq.lo \
|
|
||||||
+ math/cacosq.lo math/casinhq.lo \
|
|
||||||
+ math/casinq.lo math/catanhq.lo \
|
|
||||||
+ math/catanq.lo math/cimagq.lo \
|
|
||||||
+ math/conjq.lo math/cprojq.lo \
|
|
||||||
+ math/crealq.lo math/fdimq.lo \
|
|
||||||
+ math/fmaxq.lo math/fminq.lo \
|
|
||||||
+ math/ilogbq.lo math/llrintq.lo \
|
|
||||||
+ math/log2q.lo math/lrintq.lo \
|
|
||||||
+ math/nearbyintq.lo math/remquoq.lo \
|
|
||||||
+ math/ccoshq.lo math/cexpq.lo \
|
|
||||||
+ math/clog10q.lo math/clogq.lo \
|
|
||||||
+ math/csinq.lo math/csinhq.lo \
|
|
||||||
+ math/csqrtq.lo math/ctanq.lo \
|
|
||||||
+ math/ctanhq.lo printf/addmul_1.lo \
|
|
||||||
+ printf/add_n.lo printf/cmp.lo \
|
|
||||||
+ printf/divrem.lo printf/flt1282mpn.lo \
|
|
||||||
+ printf/fpioconst.lo printf/lshift.lo \
|
|
||||||
+ printf/mul_1.lo printf/mul_n.lo \
|
|
||||||
+ printf/mul.lo printf/printf_fphex.lo \
|
|
||||||
+ printf/printf_fp.lo \
|
|
||||||
+ printf/quadmath-printf.lo \
|
|
||||||
+ printf/rshift.lo printf/submul_1.lo \
|
|
||||||
+ printf/sub_n.lo strtod/strtoflt128.lo \
|
|
||||||
+ strtod/mpn2flt128.lo \
|
|
||||||
+ strtod/tens_in_limb.lo
|
|
||||||
libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS)
|
|
||||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
|
||||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
|
||||||
@@ -218,8 +218,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
|
|
||||||
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
|
|
||||||
$(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \
|
|
||||||
$@
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir)
|
|
||||||
+am_libquadmath_la_rpath = -rpath \
|
|
||||||
+ $(toolexeclibdir)
|
|
||||||
AM_V_P = $(am__v_P_@AM_V@)
|
|
||||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
|
||||||
am__v_P_0 = false
|
|
||||||
@@ -337,7 +337,7 @@ CFLAGS = @CFLAGS@
|
|
||||||
CPP = @CPP@
|
|
||||||
CPPFLAGS = @CPPFLAGS@
|
|
||||||
CYGPATH_W = @CYGPATH_W@
|
|
||||||
-DEFS = @DEFS@
|
|
||||||
+DEFS = @DEFS@ -D__float128="long double"
|
|
||||||
DEPDIR = @DEPDIR@
|
|
||||||
DSYMUTIL = @DSYMUTIL@
|
|
||||||
DUMPBIN = @DUMPBIN@
|
|
||||||
@@ -409,7 +409,7 @@ datadir = @datadir@
|
|
||||||
datarootdir = @datarootdir@
|
|
||||||
docdir = @docdir@
|
|
||||||
dvidir = @dvidir@
|
|
||||||
-enable_shared = @enable_shared@
|
|
||||||
+enable_shared = yes
|
|
||||||
enable_static = @enable_static@
|
|
||||||
exec_prefix = @exec_prefix@
|
|
||||||
get_gcc_base_ver = @get_gcc_base_ver@
|
|
||||||
@@ -451,109 +451,109 @@ top_build_prefix = @top_build_prefix@
|
|
||||||
top_builddir = @top_builddir@
|
|
||||||
top_srcdir = @top_srcdir@
|
|
||||||
AUTOMAKE_OPTIONS = foreign info-in-builddir
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg =
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep =
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD =
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm
|
|
||||||
-
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
|
|
||||||
+ACLOCAL_AMFLAGS = -I .. -I ../config
|
|
||||||
+AM_CPPFLAGS = -I $(top_srcdir)/../include
|
|
||||||
+AM_CFLAGS = $(XCFLAGS)
|
|
||||||
+gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
|
|
||||||
+@LIBQUAD_USE_SYMVER_FALSE@version_arg =
|
|
||||||
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
|
|
||||||
+@LIBQUAD_USE_SYMVER_FALSE@version_dep =
|
|
||||||
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
|
|
||||||
+toolexeclib_LTLIBRARIES = libquadmath.la
|
|
||||||
+libquadmath_la_LIBADD =
|
|
||||||
+libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
|
|
||||||
+ $(version_arg) $(lt_host_flags) -lm
|
|
||||||
+
|
|
||||||
+libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
|
|
||||||
+nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
|
|
||||||
+libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
|
|
||||||
+libquadmath_la_SOURCES = \
|
|
||||||
+ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
|
|
||||||
+ math/acosq.c math/frexpq.c \
|
|
||||||
+ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
|
|
||||||
+ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
|
|
||||||
+ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
|
|
||||||
+ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
|
|
||||||
+ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
|
|
||||||
+ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
|
|
||||||
+ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
|
|
||||||
+ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
|
|
||||||
+ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
|
|
||||||
+ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
|
|
||||||
+ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
|
|
||||||
+ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
|
|
||||||
+ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
|
|
||||||
+ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
|
|
||||||
+ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
|
|
||||||
+ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
|
|
||||||
+ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
|
|
||||||
+ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
|
|
||||||
+ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
|
|
||||||
+ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
|
|
||||||
+ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
|
|
||||||
+ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
|
|
||||||
+ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
|
|
||||||
+ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
|
|
||||||
+ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
|
|
||||||
|
|
||||||
|
|
||||||
# Work around what appears to be a GNU make bug handling MAKEFLAGS
|
|
||||||
# values defined in terms of make variables, as is the case for CC and
|
|
||||||
# friends when we are called from the top level Makefile.
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)"
|
|
||||||
+AM_MAKEFLAGS = \
|
|
||||||
+ "AR_FLAGS=$(AR_FLAGS)" \
|
|
||||||
+ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
|
|
||||||
+ "CFLAGS=$(CFLAGS)" \
|
|
||||||
+ "CXXFLAGS=$(CXXFLAGS)" \
|
|
||||||
+ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
|
|
||||||
+ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
|
|
||||||
+ "INSTALL=$(INSTALL)" \
|
|
||||||
+ "INSTALL_DATA=$(INSTALL_DATA)" \
|
|
||||||
+ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
|
|
||||||
+ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
|
|
||||||
+ "JC1FLAGS=$(JC1FLAGS)" \
|
|
||||||
+ "LDFLAGS=$(LDFLAGS)" \
|
|
||||||
+ "LIBCFLAGS=$(LIBCFLAGS)" \
|
|
||||||
+ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
|
|
||||||
+ "MAKE=$(MAKE)" \
|
|
||||||
+ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
|
|
||||||
+ "PICFLAG=$(PICFLAG)" \
|
|
||||||
+ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
|
|
||||||
+ "SHELL=$(SHELL)" \
|
|
||||||
+ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
|
|
||||||
+ "exec_prefix=$(exec_prefix)" \
|
|
||||||
+ "infodir=$(infodir)" \
|
|
||||||
+ "libdir=$(libdir)" \
|
|
||||||
+ "prefix=$(prefix)" \
|
|
||||||
+ "includedir=$(includedir)" \
|
|
||||||
+ "AR=$(AR)" \
|
|
||||||
+ "AS=$(AS)" \
|
|
||||||
+ "CC=$(CC)" \
|
|
||||||
+ "CXX=$(CXX)" \
|
|
||||||
+ "LD=$(LD)" \
|
|
||||||
+ "LIBCFLAGS=$(LIBCFLAGS)" \
|
|
||||||
+ "NM=$(NM)" \
|
|
||||||
+ "PICFLAG=$(PICFLAG)" \
|
|
||||||
+ "RANLIB=$(RANLIB)" \
|
|
||||||
+ "DESTDIR=$(DESTDIR)"
|
|
||||||
|
|
||||||
|
|
||||||
# Subdir rules rely on $(FLAGS_TO_PASS)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES =
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC =
|
|
||||||
+FLAGS_TO_PASS = $(AM_MAKEFLAGS)
|
|
||||||
+MAKEOVERRIDES =
|
|
||||||
+@GENINSRC_FALSE@STAMP_GENINSRC =
|
|
||||||
|
|
||||||
# AM_CONDITIONAL on configure option --generated-files-in-srcdir
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
|
|
||||||
-@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO =
|
|
||||||
+@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
|
|
||||||
+ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
|
|
||||||
+@BUILD_INFO_FALSE@STAMP_BUILD_INFO =
|
|
||||||
|
|
||||||
# AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO])
|
|
||||||
-@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
|
|
||||||
+@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info
|
|
||||||
+CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
|
|
||||||
+MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
|
|
||||||
|
|
||||||
# Automake Documentation:
|
|
||||||
# If your package has Texinfo files in many directories, you can use the
|
|
||||||
@@ -564,8 +564,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo.tex
|
|
||||||
|
|
||||||
# Defines info, dvi, pdf and html targets
|
|
||||||
MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include
|
|
||||||
-@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS =
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi
|
|
||||||
+info_TEXINFOS =
|
|
||||||
+info_TEXINFOS = libquadmath.texi
|
|
||||||
libquadmath_TEXINFOS = libquadmath-vers.texi
|
|
||||||
MULTISRCTOP =
|
|
||||||
MULTIBUILDTOP =
|
|
||||||
@@ -1187,6 +1187,7 @@ distclean-tags:
|
|
||||||
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
|
|
||||||
check-am: all-am
|
|
||||||
check: check-am
|
|
||||||
+#all-local
|
|
||||||
all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \
|
|
||||||
all-local
|
|
||||||
installdirs:
|
|
||||||
@@ -1425,22 +1426,22 @@ uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \
|
|
||||||
|
|
||||||
.PRECIOUS: Makefile
|
|
||||||
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
|
|
||||||
-
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
|
|
||||||
-
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
|
|
||||||
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
|
|
||||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
|
|
||||||
+
|
|
||||||
+stamp-geninsrc: libquadmath.info
|
|
||||||
+ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
|
|
||||||
+ @touch $@
|
|
||||||
+
|
|
||||||
+stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
|
|
||||||
+ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
|
|
||||||
+ @touch $@
|
|
||||||
|
|
||||||
all-local: $(ALL_LOCAL_DEPS)
|
|
||||||
|
|
||||||
diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h
|
|
||||||
index 81eb957d2fa..faa5977cbc9 100644
|
|
||||||
--- a/libquadmath/quadmath.h
|
|
||||||
+++ b/libquadmath/quadmath.h
|
|
||||||
@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#ifdef AARCH64_QUADMATH
|
|
||||||
+typedef long double __float128;
|
|
||||||
+#endif
|
|
||||||
/* Define the complex type corresponding to __float128
|
|
||||||
("_Complex __float128" is not allowed) */
|
|
||||||
#if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
|
|
||||||
@@ -160,10 +163,9 @@ extern int quadmath_snprintf (char *str, size_t size,
|
|
||||||
#define FLT128_MAX_10_EXP 4932
|
|
||||||
|
|
||||||
|
|
||||||
-#define HUGE_VALQ __builtin_huge_valq()
|
|
||||||
/* The following alternative is valid, but brings the warning:
|
|
||||||
(floating constant exceeds range of ‘__float128’) */
|
|
||||||
-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
|
|
||||||
+ #define HUGE_VALQ (__extension__ 0x1.0p32767Q)
|
|
||||||
|
|
||||||
#define M_Eq 2.718281828459045235360287471352662498Q /* e */
|
|
||||||
#define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,318 +0,0 @@
|
|||||||
From d1e1ec0cd539f96be5a86b369b8c20b36ce9567f Mon Sep 17 00:00:00 2001
|
|
||||||
From: yangyang <yangyang305@huawei.com>
|
|
||||||
Date: Thu, 8 Jul 2021 14:38:39 +0800
|
|
||||||
Subject: [PATCH 02/13] [Backport] cselim: Extend to check non-trapping for
|
|
||||||
more references
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=54ecfb182bc32140722022c1d9818dee4bdc0e45
|
|
||||||
|
|
||||||
If there is a dominating store, a store to the same reference can not be
|
|
||||||
trapped. But previously, it only supports such check on MEM_REFs.
|
|
||||||
So this patch extends it to support ARRAY_REFs and COMPONENT_REFs.
|
|
||||||
|
|
||||||
This patch also supports a special case: if there is a dominating load of
|
|
||||||
local variable without address escape, a store is not trapped, as local
|
|
||||||
stack is always writable. Other loads are ignored for simplicity, as they
|
|
||||||
don't help to check if a store can be trapped (the memory may be read-only).
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
|
|
||||||
index ce242ba569b..8ee1850ac63 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
|
|
||||||
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
|
|
||||||
return a[0]+a[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
|
|
||||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
|
|
||||||
index 90ae36bfce2..9b96875ac7a 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
|
|
||||||
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
|
|
||||||
return a[0]+a[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
|
|
||||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
|
|
||||||
index c633cbe947d..b2d04119381 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
|
|
||||||
@@ -13,4 +13,4 @@ int test(int b, int k) {
|
|
||||||
return a.data[0] + a.data[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
|
|
||||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
|
|
||||||
index 7cad563128d..8d3c4f7cc6a 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
|
|
||||||
@@ -16,4 +16,4 @@ int test(int b, int k) {
|
|
||||||
return a.data[0].x + a.data[1].x;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
|
|
||||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..c35a2afc70b
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
|
|
||||||
@@ -0,0 +1,17 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
|
||||||
+
|
|
||||||
+typedef union {
|
|
||||||
+ int i;
|
|
||||||
+ float f;
|
|
||||||
+} U;
|
|
||||||
+
|
|
||||||
+int foo(U *u, int b, int i)
|
|
||||||
+{
|
|
||||||
+ u->i = 0;
|
|
||||||
+ if (b)
|
|
||||||
+ u->i = i;
|
|
||||||
+ return u->i;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..f9e66aefb13
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
|
|
||||||
@@ -0,0 +1,15 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
|
||||||
+
|
|
||||||
+int *t;
|
|
||||||
+
|
|
||||||
+int f1 (int tt)
|
|
||||||
+{
|
|
||||||
+ int *t1 = t;
|
|
||||||
+ *t1 = -5;
|
|
||||||
+ if (*t1 < tt)
|
|
||||||
+ *((unsigned *) t1) = 5;
|
|
||||||
+ return *t1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
|
|
||||||
index 09313716598..a06f339f0bb 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
|
|
||||||
|
|
||||||
typedef union {
|
|
||||||
int i;
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index b1e0dce93d8..3b5b6907679 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -1986,26 +1986,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|
|
||||||
??? We currently are very conservative and assume that a load might
|
|
||||||
trap even if a store doesn't (write-only memory). This probably is
|
|
||||||
- overly conservative. */
|
|
||||||
+ overly conservative.
|
|
||||||
|
|
||||||
-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
|
|
||||||
- through it was seen, which would constitute a no-trap region for
|
|
||||||
- same accesses. */
|
|
||||||
-struct name_to_bb
|
|
||||||
+ We currently support a special case that for !TREE_ADDRESSABLE automatic
|
|
||||||
+ variables, it could ignore whether something is a load or store because the
|
|
||||||
+ local stack should be always writable. */
|
|
||||||
+
|
|
||||||
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
|
|
||||||
+ basic block an *_REF through it was seen, which would constitute a
|
|
||||||
+ no-trap region for same accesses.
|
|
||||||
+
|
|
||||||
+ Size is needed to support 2 MEM_REFs of different types, like
|
|
||||||
+ MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with
|
|
||||||
+ OEP_ADDRESS_OF. */
|
|
||||||
+struct ref_to_bb
|
|
||||||
{
|
|
||||||
- unsigned int ssa_name_ver;
|
|
||||||
+ tree exp;
|
|
||||||
+ HOST_WIDE_INT size;
|
|
||||||
unsigned int phase;
|
|
||||||
- bool store;
|
|
||||||
- HOST_WIDE_INT offset, size;
|
|
||||||
basic_block bb;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Hashtable helpers. */
|
|
||||||
|
|
||||||
-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
|
|
||||||
+struct refs_hasher : free_ptr_hash<ref_to_bb>
|
|
||||||
{
|
|
||||||
- static inline hashval_t hash (const name_to_bb *);
|
|
||||||
- static inline bool equal (const name_to_bb *, const name_to_bb *);
|
|
||||||
+ static inline hashval_t hash (const ref_to_bb *);
|
|
||||||
+ static inline bool equal (const ref_to_bb *, const ref_to_bb *);
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Used for quick clearing of the hash-table when we see calls.
|
|
||||||
@@ -2015,28 +2022,29 @@ static unsigned int nt_call_phase;
|
|
||||||
/* The hash function. */
|
|
||||||
|
|
||||||
inline hashval_t
|
|
||||||
-ssa_names_hasher::hash (const name_to_bb *n)
|
|
||||||
+refs_hasher::hash (const ref_to_bb *n)
|
|
||||||
{
|
|
||||||
- return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
|
|
||||||
- ^ (n->offset << 6) ^ (n->size << 3);
|
|
||||||
+ inchash::hash hstate;
|
|
||||||
+ inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF);
|
|
||||||
+ hstate.add_hwi (n->size);
|
|
||||||
+ return hstate.end ();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The equality function of *P1 and *P2. */
|
|
||||||
|
|
||||||
inline bool
|
|
||||||
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
|
|
||||||
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
|
|
||||||
{
|
|
||||||
- return n1->ssa_name_ver == n2->ssa_name_ver
|
|
||||||
- && n1->store == n2->store
|
|
||||||
- && n1->offset == n2->offset
|
|
||||||
- && n1->size == n2->size;
|
|
||||||
+ return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF)
|
|
||||||
+ && n1->size == n2->size;
|
|
||||||
}
|
|
||||||
|
|
||||||
class nontrapping_dom_walker : public dom_walker
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
|
|
||||||
- : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
|
|
||||||
+ : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
|
|
||||||
+ {}
|
|
||||||
|
|
||||||
virtual edge before_dom_children (basic_block);
|
|
||||||
virtual void after_dom_children (basic_block);
|
|
||||||
@@ -2053,7 +2061,7 @@ private:
|
|
||||||
hash_set<tree> *m_nontrapping;
|
|
||||||
|
|
||||||
/* The hash table for remembering what we've seen. */
|
|
||||||
- hash_table<ssa_names_hasher> m_seen_ssa_names;
|
|
||||||
+ hash_table<refs_hasher> m_seen_refs;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Called by walk_dominator_tree, when entering the block BB. */
|
|
||||||
@@ -2102,65 +2110,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We see the expression EXP in basic block BB. If it's an interesting
|
|
||||||
- expression (an MEM_REF through an SSA_NAME) possibly insert the
|
|
||||||
- expression into the set NONTRAP or the hash table of seen expressions.
|
|
||||||
- STORE is true if this expression is on the LHS, otherwise it's on
|
|
||||||
- the RHS. */
|
|
||||||
+ expression of:
|
|
||||||
+ 1) MEM_REF
|
|
||||||
+ 2) ARRAY_REF
|
|
||||||
+ 3) COMPONENT_REF
|
|
||||||
+ possibly insert the expression into the set NONTRAP or the hash table
|
|
||||||
+ of seen expressions. STORE is true if this expression is on the LHS,
|
|
||||||
+ otherwise it's on the RHS. */
|
|
||||||
void
|
|
||||||
nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
|
|
||||||
{
|
|
||||||
HOST_WIDE_INT size;
|
|
||||||
|
|
||||||
- if (TREE_CODE (exp) == MEM_REF
|
|
||||||
- && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
|
|
||||||
- && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
|
|
||||||
+ if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
|
|
||||||
+ || TREE_CODE (exp) == COMPONENT_REF)
|
|
||||||
&& (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
|
|
||||||
{
|
|
||||||
- tree name = TREE_OPERAND (exp, 0);
|
|
||||||
- struct name_to_bb map;
|
|
||||||
- name_to_bb **slot;
|
|
||||||
- struct name_to_bb *n2bb;
|
|
||||||
+ struct ref_to_bb map;
|
|
||||||
+ ref_to_bb **slot;
|
|
||||||
+ struct ref_to_bb *r2bb;
|
|
||||||
basic_block found_bb = 0;
|
|
||||||
|
|
||||||
- /* Try to find the last seen MEM_REF through the same
|
|
||||||
- SSA_NAME, which can trap. */
|
|
||||||
- map.ssa_name_ver = SSA_NAME_VERSION (name);
|
|
||||||
- map.phase = 0;
|
|
||||||
- map.bb = 0;
|
|
||||||
- map.store = store;
|
|
||||||
- map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
|
|
||||||
- map.size = size;
|
|
||||||
+ if (!store)
|
|
||||||
+ {
|
|
||||||
+ tree base = get_base_address (exp);
|
|
||||||
+ /* Only record a LOAD of a local variable without address-taken, as
|
|
||||||
+ the local stack is always writable. This allows cselim on a STORE
|
|
||||||
+ with a dominating LOAD. */
|
|
||||||
+ if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- slot = m_seen_ssa_names.find_slot (&map, INSERT);
|
|
||||||
- n2bb = *slot;
|
|
||||||
- if (n2bb && n2bb->phase >= nt_call_phase)
|
|
||||||
- found_bb = n2bb->bb;
|
|
||||||
+ /* Try to find the last seen *_REF, which can trap. */
|
|
||||||
+ map.exp = exp;
|
|
||||||
+ map.size = size;
|
|
||||||
+ slot = m_seen_refs.find_slot (&map, INSERT);
|
|
||||||
+ r2bb = *slot;
|
|
||||||
+ if (r2bb && r2bb->phase >= nt_call_phase)
|
|
||||||
+ found_bb = r2bb->bb;
|
|
||||||
|
|
||||||
- /* If we've found a trapping MEM_REF, _and_ it dominates EXP
|
|
||||||
- (it's in a basic block on the path from us to the dominator root)
|
|
||||||
+ /* If we've found a trapping *_REF, _and_ it dominates EXP
|
|
||||||
+ (it's in a basic block on the path from us to the dominator root)
|
|
||||||
then we can't trap. */
|
|
||||||
if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
|
|
||||||
{
|
|
||||||
m_nontrapping->add (exp);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
- {
|
|
||||||
+ {
|
|
||||||
/* EXP might trap, so insert it into the hash table. */
|
|
||||||
- if (n2bb)
|
|
||||||
+ if (r2bb)
|
|
||||||
{
|
|
||||||
- n2bb->phase = nt_call_phase;
|
|
||||||
- n2bb->bb = bb;
|
|
||||||
+ r2bb->phase = nt_call_phase;
|
|
||||||
+ r2bb->bb = bb;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- n2bb = XNEW (struct name_to_bb);
|
|
||||||
- n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
|
|
||||||
- n2bb->phase = nt_call_phase;
|
|
||||||
- n2bb->bb = bb;
|
|
||||||
- n2bb->store = store;
|
|
||||||
- n2bb->offset = map.offset;
|
|
||||||
- n2bb->size = size;
|
|
||||||
- *slot = n2bb;
|
|
||||||
+ r2bb = XNEW (struct ref_to_bb);
|
|
||||||
+ r2bb->phase = nt_call_phase;
|
|
||||||
+ r2bb->bb = bb;
|
|
||||||
+ r2bb->exp = exp;
|
|
||||||
+ r2bb->size = size;
|
|
||||||
+ *slot = r2bb;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
From 309f459021a3681d728e5cf644a288ecf2b95175 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhanghaijian <z.zhanghaijian@huawei.com>
|
|
||||||
Date: Mon, 12 Jul 2021 09:42:11 +0800
|
|
||||||
Subject: [PATCH 03/13] [version] Set version to 10.3.1
|
|
||||||
|
|
||||||
Set version to 10.3.1 and clear DATESTAMP_s.
|
|
||||||
|
|
||||||
diff --git a/gcc/BASE-VER b/gcc/BASE-VER
|
|
||||||
index 0719d810258..a9368325816 100644
|
|
||||||
--- a/gcc/BASE-VER
|
|
||||||
+++ b/gcc/BASE-VER
|
|
||||||
@@ -1 +1 @@
|
|
||||||
-10.3.0
|
|
||||||
+10.3.1
|
|
||||||
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
|
|
||||||
index 646db219460..fdc2857d44a 100644
|
|
||||||
--- a/gcc/Makefile.in
|
|
||||||
+++ b/gcc/Makefile.in
|
|
||||||
@@ -885,8 +885,7 @@ PATCHLEVEL_c := \
|
|
||||||
# significant - do not remove it.
|
|
||||||
BASEVER_s := "\"$(BASEVER_c)\""
|
|
||||||
DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\""
|
|
||||||
-DATESTAMP_s := \
|
|
||||||
- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
|
|
||||||
+DATESTAMP_s := "\"\""
|
|
||||||
PKGVERSION_s:= "\"@PKGVERSION@\""
|
|
||||||
BUGURL_s := "\"@REPORT_BUGS_TO@\""
|
|
||||||
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,138 +0,0 @@
|
|||||||
From bdb0f40cea4aa1a92ead381b645363ae0571c065 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhanghaijian <z.zhanghaijian@huawei.com>
|
|
||||||
Date: Mon, 12 Jul 2021 10:36:15 +0800
|
|
||||||
Subject: [PATCH 04/13] [Backport]tree-optimization: Avoid issueing loads in SM
|
|
||||||
when possible
|
|
||||||
|
|
||||||
Reference:https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9e1ea10e657af9fb02fafecf1a600740fd34409
|
|
||||||
|
|
||||||
Currently store-motion emits a load of the value in the loop
|
|
||||||
preheader even when the original loop does not contain any read
|
|
||||||
of the reference. This avoids doing this. In the conditional
|
|
||||||
store-motion case we need to mark the sunk stores with no-warning
|
|
||||||
since the control dependence is too tricky to figure out for
|
|
||||||
the uninit warning.
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
|
|
||||||
new file mode 100755
|
|
||||||
index 00000000000..884f905148f
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
|
|
||||||
@@ -0,0 +1,21 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */
|
|
||||||
+
|
|
||||||
+void foo(int *);
|
|
||||||
+void f2(int dst[3], int R)
|
|
||||||
+{
|
|
||||||
+ int i, inter[2];
|
|
||||||
+
|
|
||||||
+ for (i = 1; i < R; i++) {
|
|
||||||
+ if (i & 8)
|
|
||||||
+ {
|
|
||||||
+ inter[0] = 1;
|
|
||||||
+ inter[1] = 1;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ foo(inter);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */
|
|
||||||
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
|
|
||||||
index abd5f702b91..b3fd1647fbd 100644
|
|
||||||
--- a/gcc/tree-ssa-loop-im.c
|
|
||||||
+++ b/gcc/tree-ssa-loop-im.c
|
|
||||||
@@ -127,6 +127,8 @@ public:
|
|
||||||
|
|
||||||
bitmap stored; /* The set of loops in that this memory location
|
|
||||||
is stored to. */
|
|
||||||
+ bitmap loaded; /* The set of loops in that this memory location
|
|
||||||
+ is loaded from. */
|
|
||||||
vec<mem_ref_loc> accesses_in_loop;
|
|
||||||
/* The locations of the accesses. Vector
|
|
||||||
indexed by the loop number. */
|
|
||||||
@@ -1395,6 +1397,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
|
|
||||||
ref->ref_decomposed = false;
|
|
||||||
ref->hash = hash;
|
|
||||||
ref->stored = NULL;
|
|
||||||
+ ref->loaded = NULL;
|
|
||||||
bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
|
|
||||||
bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
|
|
||||||
ref->accesses_in_loop.create (1);
|
|
||||||
@@ -1435,6 +1438,27 @@ mark_ref_stored (im_mem_ref *ref, class loop *loop)
|
|
||||||
loop = loop_outer (loop);
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Set the LOOP bit in REF loaded bitmap and allocate that if
|
|
||||||
+ necessary. Return whether a bit was changed. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
|
|
||||||
+{
|
|
||||||
+ if (!ref->loaded)
|
|
||||||
+ ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
|
|
||||||
+ return bitmap_set_bit (ref->loaded, loop->num);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Marks reference REF as loaded in LOOP. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+mark_ref_loaded (im_mem_ref *ref, class loop *loop)
|
|
||||||
+{
|
|
||||||
+ while (loop != current_loops->tree_root
|
|
||||||
+ && set_ref_loaded_in_loop (ref, loop))
|
|
||||||
+ loop = loop_outer (loop);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Gathers memory references in statement STMT in LOOP, storing the
|
|
||||||
information about them in the memory_accesses structure. Marks
|
|
||||||
the vops accessed through unrecognized statements there as
|
|
||||||
@@ -1571,6 +1595,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
|
|
||||||
bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
|
|
||||||
mark_ref_stored (ref, loop);
|
|
||||||
}
|
|
||||||
+ else
|
|
||||||
+ mark_ref_loaded (ref, loop);
|
|
||||||
init_lim_data (stmt)->ref = ref->id;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
@@ -1968,6 +1994,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
|
|
||||||
gsi = gsi_start_bb (then_bb);
|
|
||||||
/* Insert actual store. */
|
|
||||||
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
|
|
||||||
+ /* Make sure to not warn about maybe-uninit uses of tmp_var here. */
|
|
||||||
+ gimple_set_no_warning (stmt, true);
|
|
||||||
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
|
|
||||||
|
|
||||||
edge e1 = single_succ_edge (new_bb);
|
|
||||||
@@ -2115,14 +2143,17 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
|
|
||||||
by move_computations after all dependencies. */
|
|
||||||
gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
|
|
||||||
|
|
||||||
- /* FIXME/TODO: For the multi-threaded variant, we could avoid this
|
|
||||||
- load altogether, since the store is predicated by a flag. We
|
|
||||||
- could, do the load only if it was originally in the loop. */
|
|
||||||
- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
|
|
||||||
- lim_data = init_lim_data (load);
|
|
||||||
- lim_data->max_loop = loop;
|
|
||||||
- lim_data->tgt_loop = loop;
|
|
||||||
- gsi_insert_before (&gsi, load, GSI_SAME_STMT);
|
|
||||||
+ /* Avoid doing a load if there was no load of the ref in the loop.
|
|
||||||
+ Esp. when the ref is not always stored we cannot optimize it
|
|
||||||
+ away later. */
|
|
||||||
+ if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
|
|
||||||
+ {
|
|
||||||
+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
|
|
||||||
+ lim_data = init_lim_data (load);
|
|
||||||
+ lim_data->max_loop = loop;
|
|
||||||
+ lim_data->tgt_loop = loop;
|
|
||||||
+ gsi_insert_before (&gsi, load, GSI_SAME_STMT);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (multi_threaded_model_p)
|
|
||||||
{
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,66 +0,0 @@
|
|||||||
From dc238e97a75835231939e77e8568ccd9bc5187d5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhanghaijian <z.zhanghaijian@huawei.com>
|
|
||||||
Date: Mon, 12 Jul 2021 10:46:16 +0800
|
|
||||||
Subject: [PATCH 05/13] [Backport]tree-optimization: Fix load eliding in SM
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0424a5ece5307cc22bbc0fe97edf4707d7a798ed
|
|
||||||
|
|
||||||
This fixes the case of not using the multithreaded model when
|
|
||||||
only conditionally storing to the destination. We cannot elide
|
|
||||||
the load in this case.
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c
|
|
||||||
new file mode 100755
|
|
||||||
index 00000000000..6182d77b3cd
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/torture/pr94949.c
|
|
||||||
@@ -0,0 +1,17 @@
|
|
||||||
+/* { dg-do run } */
|
|
||||||
+/* { dg-additional-options "-fallow-store-data-races" } */
|
|
||||||
+
|
|
||||||
+static int x = 1;
|
|
||||||
+static volatile int y = -1;
|
|
||||||
+int
|
|
||||||
+main()
|
|
||||||
+{
|
|
||||||
+ for (int i = 0; i < 128; ++i)
|
|
||||||
+ {
|
|
||||||
+ if (i == y)
|
|
||||||
+ x = i;
|
|
||||||
+ }
|
|
||||||
+ if (x != 1)
|
|
||||||
+ __builtin_abort ();
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
|
|
||||||
index b3fd1647fbd..8c33735b1fa 100644
|
|
||||||
--- a/gcc/tree-ssa-loop-im.c
|
|
||||||
+++ b/gcc/tree-ssa-loop-im.c
|
|
||||||
@@ -2128,9 +2128,9 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
|
|
||||||
fmt_data.orig_loop = loop;
|
|
||||||
for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
|
|
||||||
|
|
||||||
+ bool always_stored = ref_always_accessed_p (loop, ref, true);
|
|
||||||
if (bb_in_transaction (loop_preheader_edge (loop)->src)
|
|
||||||
- || (! flag_store_data_races
|
|
||||||
- && ! ref_always_accessed_p (loop, ref, true)))
|
|
||||||
+ || (! flag_store_data_races && ! always_stored))
|
|
||||||
multi_threaded_model_p = true;
|
|
||||||
|
|
||||||
if (multi_threaded_model_p)
|
|
||||||
@@ -2145,8 +2145,10 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
|
|
||||||
|
|
||||||
/* Avoid doing a load if there was no load of the ref in the loop.
|
|
||||||
Esp. when the ref is not always stored we cannot optimize it
|
|
||||||
- away later. */
|
|
||||||
- if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
|
|
||||||
+ away later. But when it is not always stored we must use a conditional
|
|
||||||
+ store then. */
|
|
||||||
+ if ((!always_stored && !multi_threaded_model_p)
|
|
||||||
+ || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
|
|
||||||
{
|
|
||||||
load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
|
|
||||||
lim_data = init_lim_data (load);
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,289 +0,0 @@
|
|||||||
From cfd6920125f7968f0c1f5cb225f9fbd5bc8988b9 Mon Sep 17 00:00:00 2001
|
|
||||||
From: bule <bule1@huawei.com>
|
|
||||||
Date: Tue, 13 Jul 2021 15:26:54 +0800
|
|
||||||
Subject: [PATCH 06/13] [simdmath] Enable simdmath on kunpeng
|
|
||||||
|
|
||||||
This enable simd math function supported by libmathlib on fortran/c/c++.
|
|
||||||
Use -fsimdmath to turn on the generation of simdmath function. The
|
|
||||||
supported functions can be found in simdmath.h. Add more simd declaration
|
|
||||||
if you need more kinds of math functions. -msimdmath-64 is used to turn
|
|
||||||
on 64-bit simd math functions which is not supported by libmathlib.
|
|
||||||
Therefore, this option is default to off.
|
|
||||||
|
|
||||||
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
|
|
||||||
index c51d6d34726..dc1a8984871 100644
|
|
||||||
--- a/gcc/c-family/c-opts.c
|
|
||||||
+++ b/gcc/c-family/c-opts.c
|
|
||||||
@@ -780,6 +780,10 @@ c_common_post_options (const char **pfilename)
|
|
||||||
if (cpp_opts->deps.style == DEPS_NONE)
|
|
||||||
check_deps_environment_vars ();
|
|
||||||
|
|
||||||
+ if (flag_simdmath)
|
|
||||||
+ {
|
|
||||||
+ defer_opt (OPT_include, "simdmath.h");
|
|
||||||
+ }
|
|
||||||
handle_deferred_opts ();
|
|
||||||
|
|
||||||
sanitize_cpp_opts ();
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index ec5235c3a41..8eb05570418 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -1977,6 +1977,10 @@ fmath-errno
|
|
||||||
Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined
|
|
||||||
Set errno after built-in math functions.
|
|
||||||
|
|
||||||
+fsimdmath
|
|
||||||
+Common Report Var(flag_simdmath) Init(0) Optimization
|
|
||||||
+Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd.
|
|
||||||
+
|
|
||||||
fmax-errors=
|
|
||||||
Common Joined RejectNegative UInteger Var(flag_max_errors)
|
|
||||||
-fmax-errors=<number> Maximum number of errors to report.
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
||||||
index 9b400c49ac6..79dc8f186f4 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64.c
|
|
||||||
+++ b/gcc/config/aarch64/aarch64.c
|
|
||||||
@@ -23077,8 +23077,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
|
|
||||||
elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
|
|
||||||
if (clonei->simdlen == 0)
|
|
||||||
{
|
|
||||||
- count = 2;
|
|
||||||
- vec_bits = (num == 0 ? 64 : 128);
|
|
||||||
+ /* Currently mathlib or sleef hasn't provide function for V2SF mode
|
|
||||||
+ simdclone of single precision functions. (e.g._ZCVnN2v_expf)
|
|
||||||
+ Therefore this mode is disabled by default to avoid link error.
|
|
||||||
+ Use -msimdmath-64 option to enable this mode. */
|
|
||||||
+ count = flag_simdmath_64 ? 2 : 1;
|
|
||||||
+ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128);
|
|
||||||
clonei->simdlen = vec_bits / elt_bits;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
|
||||||
index 1b3d942e0f5..4539156d6f4 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64.opt
|
|
||||||
+++ b/gcc/config/aarch64/aarch64.opt
|
|
||||||
@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for
|
|
||||||
single precision and to 32 bits for double precision.
|
|
||||||
If enabled, it implies -mlow-precision-recip-sqrt.
|
|
||||||
|
|
||||||
+msimdmath-64
|
|
||||||
+Target Var(flag_simdmath_64) Optimization
|
|
||||||
+Allow compiler to generate V2SF 64 bits simdclone of math functions,
|
|
||||||
+which is not currently supported in mathlib or sleef.
|
|
||||||
+Therefore this option is disabled by default.
|
|
||||||
+
|
|
||||||
mlow-precision-div
|
|
||||||
Target Var(flag_mlow_precision_div) Optimization
|
|
||||||
Enable the division approximation. Enabling this reduces
|
|
||||||
diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c
|
|
||||||
index 6f93508f934..42fd5a8be1e 100644
|
|
||||||
--- a/gcc/fortran/scanner.c
|
|
||||||
+++ b/gcc/fortran/scanner.c
|
|
||||||
@@ -2737,6 +2737,10 @@ gfc_new_file (void)
|
|
||||||
&& !load_file (flag_pre_include, NULL, false))
|
|
||||||
exit (FATAL_EXIT_CODE);
|
|
||||||
|
|
||||||
+ if (flag_simdmath
|
|
||||||
+ && !load_file ("simdmath_f.h", NULL, false))
|
|
||||||
+ exit (FATAL_EXIT_CODE);
|
|
||||||
+
|
|
||||||
if (gfc_cpp_enabled ())
|
|
||||||
{
|
|
||||||
result = gfc_cpp_preprocess (gfc_source_file);
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index 73162528938..e31aa560564 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -189,6 +189,7 @@ static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed.")
|
|
||||||
|
|
||||||
typedef char *char_p; /* For DEF_VEC_P. */
|
|
||||||
|
|
||||||
+static void set_simdmath_flags (struct gcc_options *opts, int set);
|
|
||||||
static void set_debug_level (enum debug_info_type type, int extended,
|
|
||||||
const char *arg, struct gcc_options *opts,
|
|
||||||
struct gcc_options *opts_set,
|
|
||||||
@@ -2469,6 +2470,10 @@ common_handle_option (struct gcc_options *opts,
|
|
||||||
dc->min_margin_width = value;
|
|
||||||
break;
|
|
||||||
|
|
||||||
+ case OPT_fsimdmath:
|
|
||||||
+ set_simdmath_flags (opts, value);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
case OPT_fdump_:
|
|
||||||
/* Deferred. */
|
|
||||||
break;
|
|
||||||
@@ -2847,6 +2852,18 @@ common_handle_option (struct gcc_options *opts,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* The following routines are used to set -fno-math-errno and -fopenmp-simd
|
|
||||||
+ to enable vector mathlib. */
|
|
||||||
+static void
|
|
||||||
+set_simdmath_flags (struct gcc_options *opts, int set)
|
|
||||||
+{
|
|
||||||
+ if (set)
|
|
||||||
+ {
|
|
||||||
+ opts->x_flag_errno_math = 0;
|
|
||||||
+ opts->x_flag_openmp_simd = 1;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Used to set the level of strict aliasing warnings in OPTS,
|
|
||||||
when no level is specified (i.e., when -Wstrict-aliasing, and not
|
|
||||||
-Wstrict-aliasing=level was given).
|
|
||||||
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
|
|
||||||
index 669b9e4defd..0d9cc96481c 100644
|
|
||||||
--- a/libgomp/Makefile.am
|
|
||||||
+++ b/libgomp/Makefile.am
|
|
||||||
@@ -74,10 +74,10 @@ libgomp_la_SOURCES += openacc.f90
|
|
||||||
endif
|
|
||||||
|
|
||||||
nodist_noinst_HEADERS = libgomp_f.h
|
|
||||||
-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
|
|
||||||
+nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
|
|
||||||
if USE_FORTRAN
|
|
||||||
nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
|
|
||||||
- openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
|
|
||||||
+ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
|
|
||||||
endif
|
|
||||||
|
|
||||||
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
|
|
||||||
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
|
|
||||||
index ae5d9d54705..dd4b334895e 100644
|
|
||||||
--- a/libgomp/Makefile.in
|
|
||||||
+++ b/libgomp/Makefile.in
|
|
||||||
@@ -148,7 +148,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
|
||||||
configure.lineno config.status.lineno
|
|
||||||
mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
|
|
||||||
CONFIG_HEADER = config.h
|
|
||||||
-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \
|
|
||||||
+CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \
|
|
||||||
libgomp.spec
|
|
||||||
CONFIG_CLEAN_VPATH_FILES =
|
|
||||||
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
|
||||||
@@ -609,9 +609,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
|
|
||||||
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS)
|
|
||||||
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static
|
|
||||||
nodist_noinst_HEADERS = libgomp_f.h
|
|
||||||
-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
|
|
||||||
+nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
|
|
||||||
@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
|
|
||||||
-@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
|
|
||||||
+@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
|
|
||||||
|
|
||||||
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
|
|
||||||
LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
|
|
||||||
@@ -702,6 +702,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in
|
|
||||||
cd $(top_builddir) && $(SHELL) ./config.status $@
|
|
||||||
omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in
|
|
||||||
cd $(top_builddir) && $(SHELL) ./config.status $@
|
|
||||||
+simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in
|
|
||||||
+ cd $(top_builddir) && $(SHELL) ./config.status $@
|
|
||||||
+simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in
|
|
||||||
+ cd $(top_builddir) && $(SHELL) ./config.status $@
|
|
||||||
omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in
|
|
||||||
cd $(top_builddir) && $(SHELL) ./config.status $@
|
|
||||||
libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in
|
|
||||||
diff --git a/libgomp/configure b/libgomp/configure
|
|
||||||
index 5240f7e9d39..b03036c2738 100644
|
|
||||||
--- a/libgomp/configure
|
|
||||||
+++ b/libgomp/configure
|
|
||||||
@@ -17050,7 +17050,7 @@ fi
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h"
|
|
||||||
+ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h"
|
|
||||||
|
|
||||||
ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
|
|
||||||
|
|
||||||
@@ -18205,6 +18205,8 @@ do
|
|
||||||
"libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
|
|
||||||
"omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;;
|
|
||||||
"omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;;
|
|
||||||
+ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;;
|
|
||||||
+ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;;
|
|
||||||
"omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;;
|
|
||||||
"libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;;
|
|
||||||
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
|
|
||||||
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
|
|
||||||
index ef5d293c31e..569c2065a66 100644
|
|
||||||
--- a/libgomp/configure.ac
|
|
||||||
+++ b/libgomp/configure.ac
|
|
||||||
@@ -433,7 +433,7 @@ CFLAGS="$save_CFLAGS"
|
|
||||||
# Determine what GCC version number to use in filesystem paths.
|
|
||||||
GCC_BASE_VER
|
|
||||||
|
|
||||||
-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
|
|
||||||
+AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h)
|
|
||||||
AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
|
|
||||||
AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
|
|
||||||
AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp])
|
|
||||||
diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..ab91a4ec317
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/libgomp/simdmath.h.in
|
|
||||||
@@ -0,0 +1,40 @@
|
|
||||||
+#ifdef __cplusplus
|
|
||||||
+extern "C" {
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(2) notinbranch
|
|
||||||
+double cos (double x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(4) notinbranch
|
|
||||||
+float cosf (float x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(2) notinbranch
|
|
||||||
+double sin (double x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(4) notinbranch
|
|
||||||
+float sinf (float x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(2) notinbranch
|
|
||||||
+double exp (double x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(4) notinbranch
|
|
||||||
+float expf (float x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(2) notinbranch
|
|
||||||
+double log (double x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(4) notinbranch
|
|
||||||
+float logf (float x);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(2) notinbranch
|
|
||||||
+double pow (double x, double y);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(4) notinbranch
|
|
||||||
+float powf (float x, float y);
|
|
||||||
+
|
|
||||||
+#pragma omp declare simd simdlen(4) notinbranch
|
|
||||||
+float exp2f (float x);
|
|
||||||
+
|
|
||||||
+#ifdef __cplusplus
|
|
||||||
+} // extern "C"
|
|
||||||
+#endif
|
|
||||||
diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..550595015db
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/libgomp/simdmath_f.h.in
|
|
||||||
@@ -0,0 +1,11 @@
|
|
||||||
+!GCC$ builtin (cos) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (cosf) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (sin) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (sinf) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (exp) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (expf) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (exp2f) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (log) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (logf) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (pow) attributes simd (notinbranch)
|
|
||||||
+!GCC$ builtin (powf) attributes simd (notinbranch)
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,68 +0,0 @@
|
|||||||
From 07033bcc5b9e4c03846cd84b4587cd493fcf7d53 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhoukaipeng <zhoukaipeng3@huawei.com>
|
|
||||||
Date: Wed, 14 Jul 2021 11:24:06 +0800
|
|
||||||
Subject: [PATCH 07/13] [Vect] Enable skipping vectorization on reduction
|
|
||||||
chains
|
|
||||||
|
|
||||||
Sometimes either vectorization on reduction chains or reductions is
|
|
||||||
possible. But the latter is better. The option "-ftree-vect-analyze
|
|
||||||
-slp-group" skips the former.
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 8eb05570418..55d4eb5a351 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -2968,6 +2968,10 @@ ftree-slp-vectorize
|
|
||||||
Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
|
|
||||||
Enable basic block vectorization (SLP) on trees.
|
|
||||||
|
|
||||||
+ftree-vect-analyze-slp-group
|
|
||||||
+Common Report Var(flag_tree_slp_group) Init(0)
|
|
||||||
+Disable SLP vectorization for reduction chain on tree.
|
|
||||||
+
|
|
||||||
fvect-cost-model=
|
|
||||||
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
|
|
||||||
-fvect-cost-model=[unlimited|dynamic|cheap] Specifies the cost model for vectorization.
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..913f1ef28df
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
|
|
||||||
@@ -0,0 +1,20 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */
|
|
||||||
+void f(double *a, double *res, double m) {
|
|
||||||
+ double res1, res0;
|
|
||||||
+ res1 = 0;
|
|
||||||
+ res0 = 0;
|
|
||||||
+ for (int i = 0; i < 1000; i+=8) {
|
|
||||||
+ res0 += a[i] * m;
|
|
||||||
+ res1 += a[i+1] * m;
|
|
||||||
+ res0 += a[i+2] * m;
|
|
||||||
+ res1 += a[i+3] * m;
|
|
||||||
+ res0 += a[i+4] * m;
|
|
||||||
+ res1 += a[i+5] * m;
|
|
||||||
+ res0 += a[i+6] * m;
|
|
||||||
+ res1 += a[i+7] * m;
|
|
||||||
+ }
|
|
||||||
+ res[0] += res0;
|
|
||||||
+ res[1] += res1;
|
|
||||||
+}
|
|
||||||
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
|
|
||||||
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
|
||||||
index adc579ff544..476b3237054 100644
|
|
||||||
--- a/gcc/tree-vect-slp.c
|
|
||||||
+++ b/gcc/tree-vect-slp.c
|
|
||||||
@@ -2480,7 +2480,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
|
||||||
{
|
|
||||||
/* Find SLP sequences starting from reduction chains. */
|
|
||||||
FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
|
|
||||||
- if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
|
|
||||||
+ if (flag_tree_slp_group
|
|
||||||
+ || ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
|
|
||||||
max_tree_size))
|
|
||||||
{
|
|
||||||
/* Dissolve reduction chain group. */
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,97 +0,0 @@
|
|||||||
From 79d1ed2d7f166a498662f6111a4defc55f0061c7 Mon Sep 17 00:00:00 2001
|
|
||||||
From: yangyang <yangyang305@huawei.com>
|
|
||||||
Date: Thu, 15 Jul 2021 09:27:27 +0800
|
|
||||||
Subject: [PATCH 08/13] [Backport]tree-optimization: Add checks to avoid
|
|
||||||
spoiling if-conversion
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=33d114f570b4a3583421c700396fd5945acebc28
|
|
||||||
|
|
||||||
Add some checks in pass_splits_paths, so that pass_split_paths can recognize
|
|
||||||
the missed if-conversion opportunity and do not duplicate the corresponding
|
|
||||||
block.
|
|
||||||
|
|
||||||
diff --git a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c
|
|
||||||
index b3efd43c7ef..9c32da76369 100644
|
|
||||||
--- a/gcc/gimple-ssa-split-paths.c
|
|
||||||
+++ b/gcc/gimple-ssa-split-paths.c
|
|
||||||
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "gimple-ssa.h"
|
|
||||||
#include "tree-phinodes.h"
|
|
||||||
#include "ssa-iterators.h"
|
|
||||||
+#include "fold-const.h"
|
|
||||||
|
|
||||||
/* Given LATCH, the latch block in a loop, see if the shape of the
|
|
||||||
path reaching LATCH is suitable for being split by duplication.
|
|
||||||
@@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /* Canonicalize the form. */
|
|
||||||
+ if (single_pred_p (pred1) && single_pred (pred1) == pred2
|
|
||||||
+ && num_stmts_in_pred1 == 0)
|
|
||||||
+ std::swap (pred1, pred2);
|
|
||||||
+
|
|
||||||
+ /* This is meant to catch another kind of cases that are likely opportunities
|
|
||||||
+ for if-conversion. After canonicalizing, PRED2 must be an empty block and
|
|
||||||
+ PRED1 must be the only predecessor of PRED2. Moreover, PRED1 is supposed
|
|
||||||
+ to end with a cond_stmt which has the same args with the PHI in BB. */
|
|
||||||
+ if (single_pred_p (pred2) && single_pred (pred2) == pred1
|
|
||||||
+ && num_stmts_in_pred2 == 0)
|
|
||||||
+ {
|
|
||||||
+ gimple *cond_stmt = last_stmt (pred1);
|
|
||||||
+ if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND)
|
|
||||||
+ {
|
|
||||||
+ tree lhs = gimple_cond_lhs (cond_stmt);
|
|
||||||
+ tree rhs = gimple_cond_rhs (cond_stmt);
|
|
||||||
+
|
|
||||||
+ gimple_stmt_iterator gsi;
|
|
||||||
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ gimple *phi = gsi_stmt (gsi);
|
|
||||||
+ if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs)
|
|
||||||
+ && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs))
|
|
||||||
+ || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs)
|
|
||||||
+ && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs))))
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file,
|
|
||||||
+ "Block %d appears to be optimized to a join "
|
|
||||||
+ "point for if-convertable half-diamond.\n",
|
|
||||||
+ bb->index);
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
/* If the joiner has no PHIs with useful uses there is zero chance
|
|
||||||
of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
|
|
||||||
bool found_useful_phi = false;
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..19a130d9bf1
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
|
|
||||||
@@ -0,0 +1,19 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */
|
|
||||||
+
|
|
||||||
+double
|
|
||||||
+foo(double *d1, double *d2, double *d3, int num, double *ip)
|
|
||||||
+{
|
|
||||||
+ double dmax[3];
|
|
||||||
+
|
|
||||||
+ for (int i = 0; i < num; i++) {
|
|
||||||
+ dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i];
|
|
||||||
+ dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i];
|
|
||||||
+ dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i];
|
|
||||||
+ ip[i] = dmax[2];
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return dmax[0] + dmax[1] + dmax[2];
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,141 +0,0 @@
|
|||||||
From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhanghaijian <z.zhanghaijian@huawei.com>
|
|
||||||
Date: Thu, 15 Jul 2021 09:04:55 +0800
|
|
||||||
Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when
|
|
||||||
expanding a copy [PR95254]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
|
|
||||||
|
|
||||||
In rtl expand, if we have a copy that matches one of the following patterns:
|
|
||||||
(set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
|
|
||||||
(set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
|
|
||||||
(set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
|
|
||||||
(set (subreg:M1 (reg:M2 ...)) (constant C))
|
|
||||||
where mode M1 is equal in size to M2, try to detect whether the mode change
|
|
||||||
involves an implicit round trip through memory. If so, see if we can avoid
|
|
||||||
that by removing the subregs and doing the move in mode M2 instead.
|
|
||||||
|
|
||||||
diff --git a/gcc/expr.c b/gcc/expr.c
|
|
||||||
index 991b26f3341..d66fdd4e93d 100644
|
|
||||||
--- a/gcc/expr.c
|
|
||||||
+++ b/gcc/expr.c
|
|
||||||
@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y)
|
|
||||||
gcc_assert (mode != BLKmode
|
|
||||||
&& (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
|
|
||||||
|
|
||||||
+ /* If we have a copy that looks like one of the following patterns:
|
|
||||||
+ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
|
|
||||||
+ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
|
|
||||||
+ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
|
|
||||||
+ (set (subreg:M1 (reg:M2 ...)) (constant C))
|
|
||||||
+ where mode M1 is equal in size to M2, try to detect whether the
|
|
||||||
+ mode change involves an implicit round trip through memory.
|
|
||||||
+ If so, see if we can avoid that by removing the subregs and
|
|
||||||
+ doing the move in mode M2 instead. */
|
|
||||||
+
|
|
||||||
+ rtx x_inner = NULL_RTX;
|
|
||||||
+ rtx y_inner = NULL_RTX;
|
|
||||||
+
|
|
||||||
+#define CANDIDATE_SUBREG_P(subreg) \
|
|
||||||
+ (REG_P (SUBREG_REG (subreg)) \
|
|
||||||
+ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
|
|
||||||
+ GET_MODE_SIZE (GET_MODE (subreg))) \
|
|
||||||
+ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
|
|
||||||
+ != CODE_FOR_nothing)
|
|
||||||
+
|
|
||||||
+#define CANDIDATE_MEM_P(innermode, mem) \
|
|
||||||
+ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
|
|
||||||
+ && !push_operand ((mem), GET_MODE (mem)) \
|
|
||||||
+ /* Not a candiate if innermode requires too much alignment. */ \
|
|
||||||
+ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \
|
|
||||||
+ || targetm.slow_unaligned_access (GET_MODE (mem), \
|
|
||||||
+ MEM_ALIGN (mem)) \
|
|
||||||
+ || !targetm.slow_unaligned_access ((innermode), \
|
|
||||||
+ MEM_ALIGN (mem))))
|
|
||||||
+
|
|
||||||
+ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
|
|
||||||
+ x_inner = SUBREG_REG (x);
|
|
||||||
+
|
|
||||||
+ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
|
|
||||||
+ y_inner = SUBREG_REG (y);
|
|
||||||
+
|
|
||||||
+ if (x_inner != NULL_RTX
|
|
||||||
+ && y_inner != NULL_RTX
|
|
||||||
+ && GET_MODE (x_inner) == GET_MODE (y_inner)
|
|
||||||
+ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
|
|
||||||
+ {
|
|
||||||
+ x = x_inner;
|
|
||||||
+ y = y_inner;
|
|
||||||
+ mode = GET_MODE (x_inner);
|
|
||||||
+ }
|
|
||||||
+ else if (x_inner != NULL_RTX
|
|
||||||
+ && MEM_P (y)
|
|
||||||
+ && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
|
|
||||||
+ {
|
|
||||||
+ x = x_inner;
|
|
||||||
+ y = adjust_address (y, GET_MODE (x_inner), 0);
|
|
||||||
+ mode = GET_MODE (x_inner);
|
|
||||||
+ }
|
|
||||||
+ else if (y_inner != NULL_RTX
|
|
||||||
+ && MEM_P (x)
|
|
||||||
+ && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
|
|
||||||
+ {
|
|
||||||
+ x = adjust_address (x, GET_MODE (y_inner), 0);
|
|
||||||
+ y = y_inner;
|
|
||||||
+ mode = GET_MODE (y_inner);
|
|
||||||
+ }
|
|
||||||
+ else if (x_inner != NULL_RTX
|
|
||||||
+ && CONSTANT_P (y)
|
|
||||||
+ && !targetm.can_change_mode_class (GET_MODE (x_inner),
|
|
||||||
+ mode, ALL_REGS)
|
|
||||||
+ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
|
|
||||||
+ {
|
|
||||||
+ x = x_inner;
|
|
||||||
+ y = y_inner;
|
|
||||||
+ mode = GET_MODE (x_inner);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (CONSTANT_P (y))
|
|
||||||
{
|
|
||||||
if (optimize
|
|
||||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..10bfc868197
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
|
|
||||||
@@ -0,0 +1,19 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
|
|
||||||
+
|
|
||||||
+typedef short __attribute__((vector_size (8))) v4hi;
|
|
||||||
+
|
|
||||||
+typedef union U4HI { v4hi v; short a[4]; } u4hi;
|
|
||||||
+
|
|
||||||
+short b[4];
|
|
||||||
+
|
|
||||||
+void pass_v4hi (v4hi v)
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+ u4hi u;
|
|
||||||
+ u.v = v;
|
|
||||||
+ for (i = 0; i < 4; i++)
|
|
||||||
+ b[i] = u.a[i];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-assembler-not "ptrue" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
|
|
||||||
index 518071bdd86..398cdba5d5f 100644
|
|
||||||
--- a/gcc/testsuite/gcc.target/i386/pr67609.c
|
|
||||||
+++ b/gcc/testsuite/gcc.target/i386/pr67609.c
|
|
||||||
@@ -1,7 +1,7 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
/* { dg-options "-O2 -msse2" } */
|
|
||||||
/* { dg-require-effective-target lp64 } */
|
|
||||||
-/* { dg-final { scan-assembler "movdqa" } } */
|
|
||||||
+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
|
|
||||||
|
|
||||||
#include <emmintrin.h>
|
|
||||||
__m128d reg;
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,98 +0,0 @@
|
|||||||
From b8b3e29e4cceae2bab6e0774b1af994dbe713d97 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhanghaijian <z.zhanghaijian@huawei.com>
|
|
||||||
Date: Thu, 15 Jul 2021 09:13:11 +0800
|
|
||||||
Subject: [PATCH 10/13] [Backport]tree-optimization/94963 - avoid bogus uninit
|
|
||||||
warning with store-motion
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=371905d12259c180efb9b1f1b5716e969feb60f9
|
|
||||||
|
|
||||||
Eliding the load for store-motion causes an uninitialized variable
|
|
||||||
flowing into the loop, conditionally initialized and used. The
|
|
||||||
uninit warning cannot relate the flag used to guard the initialization
|
|
||||||
and use with the actual initialization so the following robustifies
|
|
||||||
the previous approach of marking the conditional store as not to
|
|
||||||
be warned on by instead initializing the variable on loop entry
|
|
||||||
from an uninitialized variable we mark as not to be warned for.
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/pr94963.c b/gcc/testsuite/gcc.dg/pr94963.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..09c0524fb3a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/pr94963.c
|
|
||||||
@@ -0,0 +1,35 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -Wall" } */
|
|
||||||
+
|
|
||||||
+typedef struct
|
|
||||||
+{
|
|
||||||
+ int p1;
|
|
||||||
+ int p2;
|
|
||||||
+ int p3;
|
|
||||||
+} P;
|
|
||||||
+struct S
|
|
||||||
+{
|
|
||||||
+ int field;
|
|
||||||
+};
|
|
||||||
+extern int v2;
|
|
||||||
+extern void foo (struct S *map);
|
|
||||||
+static struct S var;
|
|
||||||
+const P *pv;
|
|
||||||
+int ps;
|
|
||||||
+void
|
|
||||||
+f (void)
|
|
||||||
+{
|
|
||||||
+ if (pv != 0)
|
|
||||||
+ for (const P *ph = pv; ph < &pv[ps]; ++ph)
|
|
||||||
+ switch (ph->p1)
|
|
||||||
+ {
|
|
||||||
+ case 1:
|
|
||||||
+ v2 = ph->p2;
|
|
||||||
+ break;
|
|
||||||
+ case 2:
|
|
||||||
+ var.field = ph->p3;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ if (var.field != 0) /* { dg-bogus "uninitialized" } */
|
|
||||||
+ foo (&var);
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
|
|
||||||
index 8c33735b1fa..d74a46ef352 100644
|
|
||||||
--- a/gcc/tree-ssa-loop-im.c
|
|
||||||
+++ b/gcc/tree-ssa-loop-im.c
|
|
||||||
@@ -1994,8 +1994,6 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
|
|
||||||
gsi = gsi_start_bb (then_bb);
|
|
||||||
/* Insert actual store. */
|
|
||||||
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
|
|
||||||
- /* Make sure to not warn about maybe-uninit uses of tmp_var here. */
|
|
||||||
- gimple_set_no_warning (stmt, true);
|
|
||||||
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
|
|
||||||
|
|
||||||
edge e1 = single_succ_edge (new_bb);
|
|
||||||
@@ -2149,13 +2147,19 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
|
|
||||||
store then. */
|
|
||||||
if ((!always_stored && !multi_threaded_model_p)
|
|
||||||
|| (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
|
|
||||||
+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
|
|
||||||
+ else
|
|
||||||
{
|
|
||||||
- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
|
|
||||||
- lim_data = init_lim_data (load);
|
|
||||||
- lim_data->max_loop = loop;
|
|
||||||
- lim_data->tgt_loop = loop;
|
|
||||||
- gsi_insert_before (&gsi, load, GSI_SAME_STMT);
|
|
||||||
+ /* If not emitting a load mark the uninitialized state on the
|
|
||||||
+ loop entry as not to be warned for. */
|
|
||||||
+ tree uninit = create_tmp_reg (TREE_TYPE (tmp_var));
|
|
||||||
+ TREE_NO_WARNING (uninit) = 1;
|
|
||||||
+ load = gimple_build_assign (tmp_var, uninit);
|
|
||||||
}
|
|
||||||
+ lim_data = init_lim_data (load);
|
|
||||||
+ lim_data->max_loop = loop;
|
|
||||||
+ lim_data->tgt_loop = loop;
|
|
||||||
+ gsi_insert_before (&gsi, load, GSI_SAME_STMT);
|
|
||||||
|
|
||||||
if (multi_threaded_model_p)
|
|
||||||
{
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
From 78cf3b95d7b895cfe8d6f1c2a48ebc08a662eef0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: bule <bule1@huawei.com>
|
|
||||||
Date: Sat, 17 Jul 2021 16:38:10 +0800
|
|
||||||
Subject: [PATCH 11/13] [simdmath] Enable 64-bits simd when test
|
|
||||||
simd_pcs_attribute-3
|
|
||||||
|
|
||||||
Enable 64-bits simd when test simd_pcs_attribute-3. The 64-bits simd
|
|
||||||
is default to off without specify the -msimdmath-64.
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
|
|
||||||
index 95f6a6803e8..e0e0efa9d7e 100644
|
|
||||||
--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
|
|
||||||
+++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-Ofast" } */
|
|
||||||
+/* { dg-options "-Ofast -msimdmath-64" } */
|
|
||||||
|
|
||||||
__attribute__ ((__simd__))
|
|
||||||
__attribute__ ((__nothrow__ , __leaf__ , __const__))
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,397 +0,0 @@
|
|||||||
From 26ea42402eede6a441c9d74ec6b6086e5bf0bf79 Mon Sep 17 00:00:00 2001
|
|
||||||
From: bule <bule1@huawei.com>
|
|
||||||
Date: Mon, 19 Jul 2021 12:04:08 +0800
|
|
||||||
Subject: [PATCH 12/13] [fp-model] Enable fp-model on kunpeng
|
|
||||||
|
|
||||||
Enable fp-model options on kunpeng for precision control.
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 55d4eb5a351..79c9ef6615b 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -1545,6 +1545,32 @@ ffp-int-builtin-inexact
|
|
||||||
Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization
|
|
||||||
Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions.
|
|
||||||
|
|
||||||
+fftz
|
|
||||||
+Common Report Var(flag_ftz) Optimization
|
|
||||||
+Control fpcr register for flush to zero.
|
|
||||||
+
|
|
||||||
+fp-model=
|
|
||||||
+Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization
|
|
||||||
+-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control.
|
|
||||||
+
|
|
||||||
+Enum
|
|
||||||
+Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs)
|
|
||||||
+
|
|
||||||
+EnumValue
|
|
||||||
+Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL)
|
|
||||||
+
|
|
||||||
+EnumValue
|
|
||||||
+Enum(fp_model) String(fast) Value(FP_MODEL_FAST)
|
|
||||||
+
|
|
||||||
+EnumValue
|
|
||||||
+Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE)
|
|
||||||
+
|
|
||||||
+EnumValue
|
|
||||||
+Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT)
|
|
||||||
+
|
|
||||||
+EnumValue
|
|
||||||
+Enum(fp_model) String(strict) Value(FP_MODEL_STRICT)
|
|
||||||
+
|
|
||||||
; Nonzero means don't put addresses of constant functions in registers.
|
|
||||||
; Used for compiling the Unix kernel, where strange substitutions are
|
|
||||||
; done on the assembly output.
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
|
|
||||||
index e587e2e9ad6..331b12c8702 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64-linux.h
|
|
||||||
+++ b/gcc/config/aarch64/aarch64-linux.h
|
|
||||||
@@ -50,7 +50,8 @@
|
|
||||||
#define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
|
|
||||||
|
|
||||||
#define GNU_USER_TARGET_MATHFILE_SPEC \
|
|
||||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
|
|
||||||
+ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\
|
|
||||||
+ %{!fno-ftz:crtfastmath.o%s}}"
|
|
||||||
|
|
||||||
#undef ENDFILE_SPEC
|
|
||||||
#define ENDFILE_SPEC \
|
|
||||||
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
|
|
||||||
index 852ea76eaa2..5832298251e 100644
|
|
||||||
--- a/gcc/flag-types.h
|
|
||||||
+++ b/gcc/flag-types.h
|
|
||||||
@@ -223,6 +223,15 @@ enum fp_contract_mode {
|
|
||||||
FP_CONTRACT_FAST = 2
|
|
||||||
};
|
|
||||||
|
|
||||||
+/* Floating-point precision mode. */
|
|
||||||
+enum fp_model {
|
|
||||||
+ FP_MODEL_NORMAL = 0,
|
|
||||||
+ FP_MODEL_FAST = 1,
|
|
||||||
+ FP_MODEL_PRECISE = 2,
|
|
||||||
+ FP_MODEL_EXCEPT = 3,
|
|
||||||
+ FP_MODEL_STRICT = 4
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
/* Scalar storage order kind. */
|
|
||||||
enum scalar_storage_order_kind {
|
|
||||||
SSO_NATIVE = 0,
|
|
||||||
diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c
|
|
||||||
index 4cc8a908417..c59dcf63781 100644
|
|
||||||
--- a/gcc/fortran/options.c
|
|
||||||
+++ b/gcc/fortran/options.c
|
|
||||||
@@ -250,6 +250,7 @@ form_from_filename (const char *filename)
|
|
||||||
return f_form;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void gfc_handle_fpe_option (const char *arg, bool trap);
|
|
||||||
|
|
||||||
/* Finalize commandline options. */
|
|
||||||
|
|
||||||
@@ -277,6 +278,13 @@ gfc_post_options (const char **pfilename)
|
|
||||||
if (flag_protect_parens == -1)
|
|
||||||
flag_protect_parens = !optimize_fast;
|
|
||||||
|
|
||||||
+ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */
|
|
||||||
+ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT)
|
|
||||||
+ {
|
|
||||||
+ gfc_handle_fpe_option ("all", false);
|
|
||||||
+ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
/* -Ofast sets implies -fstack-arrays unless an explicit size is set for
|
|
||||||
stack arrays. */
|
|
||||||
if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2)
|
|
||||||
diff --git a/gcc/opts-common.c b/gcc/opts-common.c
|
|
||||||
index de9510abd64..bf82b05c8a2 100644
|
|
||||||
--- a/gcc/opts-common.c
|
|
||||||
+++ b/gcc/opts-common.c
|
|
||||||
@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "diagnostic.h"
|
|
||||||
#include "spellcheck.h"
|
|
||||||
|
|
||||||
-static void prune_options (struct cl_decoded_option **, unsigned int *);
|
|
||||||
+static void prune_options (struct cl_decoded_option **, unsigned int *,
|
|
||||||
+ unsigned int);
|
|
||||||
|
|
||||||
/* An option that is undocumented, that takes a joined argument, and
|
|
||||||
that doesn't fit any of the classes of uses (language/common,
|
|
||||||
@@ -988,7 +989,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
|
|
||||||
|
|
||||||
*decoded_options = opt_array;
|
|
||||||
*decoded_options_count = num_decoded_options;
|
|
||||||
- prune_options (decoded_options, decoded_options_count);
|
|
||||||
+ prune_options (decoded_options, decoded_options_count, lang_mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the
|
|
||||||
@@ -1009,11 +1010,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Check whether opt_idx exists in decoded_options array bewteen index
|
|
||||||
+ start and end. If found, return its index in decoded_options,
|
|
||||||
+ else return end. */
|
|
||||||
+static unsigned int
|
|
||||||
+find_opt_idx (const struct cl_decoded_option *decoded_options,
|
|
||||||
+ unsigned int decoded_options_count,
|
|
||||||
+ unsigned int start, unsigned int end, unsigned int opt_idx)
|
|
||||||
+{
|
|
||||||
+ gcc_assert (end <= decoded_options_count);
|
|
||||||
+ gcc_assert (opt_idx < cl_options_count);
|
|
||||||
+ unsigned int k;
|
|
||||||
+ for (k = start; k < end; k++)
|
|
||||||
+ {
|
|
||||||
+ if (decoded_options[k].opt_index == opt_idx)
|
|
||||||
+ {
|
|
||||||
+ return k;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return k;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* remove the opt_index element from decoded_options array. */
|
|
||||||
+static unsigned int
|
|
||||||
+remove_option (struct cl_decoded_option *decoded_options,
|
|
||||||
+ unsigned int decoded_options_count,
|
|
||||||
+ unsigned int opt_index)
|
|
||||||
+{
|
|
||||||
+ gcc_assert (opt_index < decoded_options_count);
|
|
||||||
+ unsigned int i;
|
|
||||||
+ for (i = opt_index; i < decoded_options_count - 1; i++)
|
|
||||||
+ {
|
|
||||||
+ decoded_options[i] = decoded_options[i + 1];
|
|
||||||
+ }
|
|
||||||
+ return decoded_options_count - 1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Handle the priority between fp-model, Ofast, and
|
|
||||||
+ ffast-math. */
|
|
||||||
+static unsigned int
|
|
||||||
+handle_fp_model_driver (struct cl_decoded_option *decoded_options,
|
|
||||||
+ unsigned int decoded_options_count,
|
|
||||||
+ unsigned int fp_model_index,
|
|
||||||
+ unsigned int lang_mask)
|
|
||||||
+{
|
|
||||||
+ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index];
|
|
||||||
+ enum fp_model model = (enum fp_model) fp_model_opt.value;
|
|
||||||
+ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT)
|
|
||||||
+ {
|
|
||||||
+ /* If found Ofast, override Ofast with O3. */
|
|
||||||
+ unsigned int Ofast_index;
|
|
||||||
+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
|
|
||||||
+ 0, decoded_options_count, OPT_Ofast);
|
|
||||||
+ while (Ofast_index != decoded_options_count)
|
|
||||||
+ {
|
|
||||||
+ const char *tmp_argv = "-O3";
|
|
||||||
+ decode_cmdline_option (&tmp_argv, lang_mask,
|
|
||||||
+ &decoded_options[Ofast_index]);
|
|
||||||
+ warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs",
|
|
||||||
+ fp_model_opt.orig_option_with_args_text);
|
|
||||||
+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
|
|
||||||
+ 0, decoded_options_count, OPT_Ofast);
|
|
||||||
+ }
|
|
||||||
+ /* If found ffast-math before fp-model=precise/strict
|
|
||||||
+ it, cancel it. */
|
|
||||||
+ unsigned int ffast_math_index;
|
|
||||||
+ ffast_math_index
|
|
||||||
+ = find_opt_idx (decoded_options, decoded_options_count, 0,
|
|
||||||
+ fp_model_index, OPT_ffast_math);
|
|
||||||
+ if (ffast_math_index != fp_model_index)
|
|
||||||
+ {
|
|
||||||
+ decoded_options_count
|
|
||||||
+ = remove_option (decoded_options, decoded_options_count,
|
|
||||||
+ ffast_math_index);
|
|
||||||
+ warning (0, "%<-ffast-math%> before %qs is canceled",
|
|
||||||
+ fp_model_opt.orig_option_with_args_text);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ if (model == FP_MODEL_FAST)
|
|
||||||
+ {
|
|
||||||
+ /* If found -fno-fast-math after fp-model=fast, cancel this one. */
|
|
||||||
+ unsigned int fno_fast_math_index;
|
|
||||||
+ fno_fast_math_index
|
|
||||||
+ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index,
|
|
||||||
+ decoded_options_count, OPT_ffast_math);
|
|
||||||
+ if (fno_fast_math_index != decoded_options_count
|
|
||||||
+ && decoded_options[fno_fast_math_index].value == 0)
|
|
||||||
+ {
|
|
||||||
+ decoded_options_count
|
|
||||||
+ = remove_option (decoded_options, decoded_options_count,
|
|
||||||
+ fp_model_index);
|
|
||||||
+ warning (0,
|
|
||||||
+ "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled");
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return decoded_options_count;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Filter out options canceled by the ones after them. */
|
|
||||||
|
|
||||||
static void
|
|
||||||
prune_options (struct cl_decoded_option **decoded_options,
|
|
||||||
- unsigned int *decoded_options_count)
|
|
||||||
+ unsigned int *decoded_options_count,
|
|
||||||
+ unsigned int lang_mask)
|
|
||||||
{
|
|
||||||
unsigned int old_decoded_options_count = *decoded_options_count;
|
|
||||||
struct cl_decoded_option *old_decoded_options = *decoded_options;
|
|
||||||
@@ -1024,7 +1123,12 @@ prune_options (struct cl_decoded_option **decoded_options,
|
|
||||||
const struct cl_option *option;
|
|
||||||
unsigned int fdiagnostics_color_idx = 0;
|
|
||||||
|
|
||||||
+ if (!diagnostic_ready_p ())
|
|
||||||
+ diagnostic_initialize (global_dc, 0);
|
|
||||||
+
|
|
||||||
/* Remove arguments which are negated by others after them. */
|
|
||||||
+
|
|
||||||
+ unsigned int fp_model_index = old_decoded_options_count;
|
|
||||||
new_decoded_options_count = 0;
|
|
||||||
for (i = 0; i < old_decoded_options_count; i++)
|
|
||||||
{
|
|
||||||
@@ -1048,6 +1152,34 @@ prune_options (struct cl_decoded_option **decoded_options,
|
|
||||||
fdiagnostics_color_idx = i;
|
|
||||||
continue;
|
|
||||||
|
|
||||||
+ case OPT_fp_model_:
|
|
||||||
+ /* Only the last fp-model option will take effect. */
|
|
||||||
+ unsigned int next_fp_model_idx;
|
|
||||||
+ next_fp_model_idx = find_opt_idx (old_decoded_options,
|
|
||||||
+ old_decoded_options_count,
|
|
||||||
+ i + 1,
|
|
||||||
+ old_decoded_options_count,
|
|
||||||
+ OPT_fp_model_);
|
|
||||||
+ if (next_fp_model_idx != old_decoded_options_count)
|
|
||||||
+ {
|
|
||||||
+ /* Found more than one fp-model, cancel this one. */
|
|
||||||
+ if (old_decoded_options[i].value
|
|
||||||
+ != old_decoded_options[next_fp_model_idx].value)
|
|
||||||
+ {
|
|
||||||
+ warning (0, "%qs is overrided by %qs",
|
|
||||||
+ old_decoded_options[i].
|
|
||||||
+ orig_option_with_args_text,
|
|
||||||
+ old_decoded_options[next_fp_model_idx].
|
|
||||||
+ orig_option_with_args_text);
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ /* Found the last fp-model option. */
|
|
||||||
+ fp_model_index = new_decoded_options_count;
|
|
||||||
+ }
|
|
||||||
+ /* FALLTHRU. */
|
|
||||||
default:
|
|
||||||
gcc_assert (opt_idx < cl_options_count);
|
|
||||||
option = &cl_options[opt_idx];
|
|
||||||
@@ -1087,6 +1219,14 @@ keep:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+ if (fp_model_index < new_decoded_options_count)
|
|
||||||
+ {
|
|
||||||
+ new_decoded_options_count
|
|
||||||
+ = handle_fp_model_driver (new_decoded_options,
|
|
||||||
+ new_decoded_options_count,
|
|
||||||
+ fp_model_index,
|
|
||||||
+ lang_mask);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (fdiagnostics_color_idx >= 1)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index e31aa560564..6924a973a5b 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -195,6 +195,7 @@ static void set_debug_level (enum debug_info_type type, int extended,
|
|
||||||
struct gcc_options *opts_set,
|
|
||||||
location_t loc);
|
|
||||||
static void set_fast_math_flags (struct gcc_options *opts, int set);
|
|
||||||
+static void set_fp_model_flags (struct gcc_options *opts, int set);
|
|
||||||
static void decode_d_option (const char *arg, struct gcc_options *opts,
|
|
||||||
location_t loc, diagnostic_context *dc);
|
|
||||||
static void set_unsafe_math_optimizations_flags (struct gcc_options *opts,
|
|
||||||
@@ -2482,6 +2483,10 @@ common_handle_option (struct gcc_options *opts,
|
|
||||||
set_fast_math_flags (opts, value);
|
|
||||||
break;
|
|
||||||
|
|
||||||
+ case OPT_fp_model_:
|
|
||||||
+ set_fp_model_flags (opts, value);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
case OPT_funsafe_math_optimizations:
|
|
||||||
set_unsafe_math_optimizations_flags (opts, value);
|
|
||||||
break;
|
|
||||||
@@ -2908,6 +2913,69 @@ set_fast_math_flags (struct gcc_options *opts, int set)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Handle fp-model options. */
|
|
||||||
+static void
|
|
||||||
+set_fp_model_flags (struct gcc_options *opts, int set)
|
|
||||||
+{
|
|
||||||
+ enum fp_model model = (enum fp_model) set;
|
|
||||||
+ switch (model)
|
|
||||||
+ {
|
|
||||||
+ case FP_MODEL_FAST:
|
|
||||||
+ /* Equivalent to open ffast-math. */
|
|
||||||
+ set_fast_math_flags (opts, 1);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case FP_MODEL_PRECISE:
|
|
||||||
+ /* Equivalent to close ffast-math. */
|
|
||||||
+ set_fast_math_flags (opts, 0);
|
|
||||||
+ /* Turn on -frounding-math -fsignaling-nans. */
|
|
||||||
+ if (!opts->frontend_set_flag_signaling_nans)
|
|
||||||
+ opts->x_flag_signaling_nans = 1;
|
|
||||||
+ if (!opts->frontend_set_flag_rounding_math)
|
|
||||||
+ opts->x_flag_rounding_math = 1;
|
|
||||||
+ opts->x_flag_expensive_optimizations = 0;
|
|
||||||
+ opts->x_flag_code_hoisting = 0;
|
|
||||||
+ opts->x_flag_predictive_commoning = 0;
|
|
||||||
+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case FP_MODEL_EXCEPT:
|
|
||||||
+ if (!opts->frontend_set_flag_signaling_nans)
|
|
||||||
+ opts->x_flag_signaling_nans = 1;
|
|
||||||
+ if (!opts->frontend_set_flag_errno_math)
|
|
||||||
+ opts->x_flag_errno_math = 1;
|
|
||||||
+ if (!opts->frontend_set_flag_trapping_math)
|
|
||||||
+ opts->x_flag_trapping_math = 1;
|
|
||||||
+ opts->x_flag_fp_int_builtin_inexact = 1;
|
|
||||||
+ /* Also turn on ffpe-trap in fortran. */
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case FP_MODEL_STRICT:
|
|
||||||
+ /* Turn on both precise and except. */
|
|
||||||
+ if (!opts->frontend_set_flag_signaling_nans)
|
|
||||||
+ opts->x_flag_signaling_nans = 1;
|
|
||||||
+ if (!opts->frontend_set_flag_rounding_math)
|
|
||||||
+ opts->x_flag_rounding_math = 1;
|
|
||||||
+ opts->x_flag_expensive_optimizations = 0;
|
|
||||||
+ opts->x_flag_code_hoisting = 0;
|
|
||||||
+ opts->x_flag_predictive_commoning = 0;
|
|
||||||
+ if (!opts->frontend_set_flag_errno_math)
|
|
||||||
+ opts->x_flag_errno_math = 1;
|
|
||||||
+ if (!opts->frontend_set_flag_trapping_math)
|
|
||||||
+ opts->x_flag_trapping_math = 1;
|
|
||||||
+ opts->x_flag_fp_int_builtin_inexact = 1;
|
|
||||||
+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case FP_MODEL_NORMAL:
|
|
||||||
+ /* Do nothing. */
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ default:
|
|
||||||
+ gcc_unreachable ();
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* When -funsafe-math-optimizations is set the following
|
|
||||||
flags are set as well. */
|
|
||||||
static void
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,499 +0,0 @@
|
|||||||
From 0d14a2b7a3defc82ed16c99a18c2bc2e6be9f5b1 Mon Sep 17 00:00:00 2001
|
|
||||||
From: xiezhiheng <xiezhiheng@huawei.com>
|
|
||||||
Date: Fri, 16 Jul 2021 23:21:38 -0400
|
|
||||||
Subject: [PATCH 13/13] [LoopElim] Redundant loop elimination optimization
|
|
||||||
|
|
||||||
Introduce redundant loop elimination optimization controlled
|
|
||||||
by -floop-elim. And it's often used with -ffinite-loops.
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 79c9ef6615b..b2b0aac7fdf 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -1169,6 +1169,10 @@ fcompare-elim
|
|
||||||
Common Report Var(flag_compare_elim_after_reload) Optimization
|
|
||||||
Perform comparison elimination after register allocation has finished.
|
|
||||||
|
|
||||||
+floop-elim
|
|
||||||
+Common Report Var(flag_loop_elim) Init(0) Optimization
|
|
||||||
+Perform redundant loop elimination.
|
|
||||||
+
|
|
||||||
fconserve-stack
|
|
||||||
Common Var(flag_conserve_stack) Optimization
|
|
||||||
Do not perform optimizations increasing noticeably stack usage.
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 3b5b6907679..591b6435f78 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -69,6 +69,7 @@ static hash_set<tree> * get_non_trapping ();
|
|
||||||
static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
|
|
||||||
static void hoist_adjacent_loads (basic_block, basic_block,
|
|
||||||
basic_block, basic_block);
|
|
||||||
+static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
|
|
||||||
static bool gate_hoist_loads (void);
|
|
||||||
|
|
||||||
/* This pass tries to transform conditional stores into unconditional
|
|
||||||
@@ -257,6 +258,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
hoist_adjacent_loads (bb, bb1, bb2, bb3);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
+ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
else
|
|
||||||
continue;
|
|
||||||
|
|
||||||
@@ -2819,6 +2824,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool check_uses (tree, hash_set<tree> *);
|
|
||||||
+
|
|
||||||
+/* Check SSA_NAME is used in
|
|
||||||
+ if (SSA_NAME == 0)
|
|
||||||
+ ...
|
|
||||||
+ or
|
|
||||||
+ if (SSA_NAME != 0)
|
|
||||||
+ ...
|
|
||||||
+*/
|
|
||||||
+static bool
|
|
||||||
+check_uses_cond (const_tree ssa_name, gimple *stmt,
|
|
||||||
+ hash_set<tree> *hset ATTRIBUTE_UNUSED)
|
|
||||||
+{
|
|
||||||
+ tree_code code = gimple_cond_code (stmt);
|
|
||||||
+ if (code != EQ_EXPR && code != NE_EXPR)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ tree lhs = gimple_cond_lhs (stmt);
|
|
||||||
+ tree rhs = gimple_cond_rhs (stmt);
|
|
||||||
+ if ((lhs == ssa_name && integer_zerop (rhs))
|
|
||||||
+ || (rhs == ssa_name && integer_zerop (lhs)))
|
|
||||||
+ {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Check SSA_NAME is used in
|
|
||||||
+ _tmp = SSA_NAME == 0;
|
|
||||||
+ or
|
|
||||||
+ _tmp = SSA_NAME != 0;
|
|
||||||
+ or
|
|
||||||
+ _tmp = SSA_NAME | _tmp2;
|
|
||||||
+*/
|
|
||||||
+static bool
|
|
||||||
+check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
|
|
||||||
+{
|
|
||||||
+ tree_code code = gimple_assign_rhs_code (stmt);
|
|
||||||
+ tree lhs, rhs1, rhs2;
|
|
||||||
+
|
|
||||||
+ switch (code)
|
|
||||||
+ {
|
|
||||||
+ case EQ_EXPR:
|
|
||||||
+ case NE_EXPR:
|
|
||||||
+ rhs1 = gimple_assign_rhs1 (stmt);
|
|
||||||
+ rhs2 = gimple_assign_rhs2 (stmt);
|
|
||||||
+ if ((rhs1 == ssa_name && integer_zerop (rhs2))
|
|
||||||
+ || (rhs2 == ssa_name && integer_zerop (rhs1)))
|
|
||||||
+ {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case BIT_IOR_EXPR:
|
|
||||||
+ lhs = gimple_assign_lhs (stmt);
|
|
||||||
+ if (hset->contains (lhs))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ /* We should check the use of _tmp further. */
|
|
||||||
+ return check_uses (lhs, hset);
|
|
||||||
+
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Check SSA_NAME is used in
|
|
||||||
+ # result = PHI <SSA_NAME (bb1), 0 (bb2), 0 (bb3)>
|
|
||||||
+*/
|
|
||||||
+static bool
|
|
||||||
+check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
|
|
||||||
+{
|
|
||||||
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
|
|
||||||
+ {
|
|
||||||
+ tree arg = gimple_phi_arg_def (stmt, i);
|
|
||||||
+ if (!integer_zerop (arg) && arg != ssa_name)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ tree result = gimple_phi_result (stmt);
|
|
||||||
+
|
|
||||||
+ /* It is used to avoid infinite recursion,
|
|
||||||
+ <bb 1>
|
|
||||||
+ if (cond)
|
|
||||||
+ goto <bb 2>
|
|
||||||
+ else
|
|
||||||
+ goto <bb 3>
|
|
||||||
+
|
|
||||||
+ <bb 2>
|
|
||||||
+ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)>
|
|
||||||
+ {BODY}
|
|
||||||
+ if (cond)
|
|
||||||
+ goto <bb 3>
|
|
||||||
+ else
|
|
||||||
+ goto <bb 4>
|
|
||||||
+
|
|
||||||
+ <bb 3>
|
|
||||||
+ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)>
|
|
||||||
+ {BODY}
|
|
||||||
+ if (cond)
|
|
||||||
+ goto <bb 2>
|
|
||||||
+ else
|
|
||||||
+ goto <bb 4>
|
|
||||||
+
|
|
||||||
+ <bb 4>
|
|
||||||
+ ...
|
|
||||||
+ */
|
|
||||||
+ if (hset->contains (result))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return check_uses (result, hset);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Check the use of SSA_NAME, it should only be used in comparison
|
|
||||||
+ operation and PHI node. HSET is used to record the ssa_names
|
|
||||||
+ that have been already checked. */
|
|
||||||
+static bool
|
|
||||||
+check_uses (tree ssa_name, hash_set<tree> *hset)
|
|
||||||
+{
|
|
||||||
+ imm_use_iterator imm_iter;
|
|
||||||
+ use_operand_p use_p;
|
|
||||||
+
|
|
||||||
+ if (TREE_CODE (ssa_name) != SSA_NAME)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (SSA_NAME_VAR (ssa_name)
|
|
||||||
+ && is_global_var (SSA_NAME_VAR (ssa_name)))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ hset->add (ssa_name);
|
|
||||||
+
|
|
||||||
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
|
|
||||||
+ {
|
|
||||||
+ gimple *stmt = USE_STMT (use_p);
|
|
||||||
+
|
|
||||||
+ /* Ignore debug gimple statements. */
|
|
||||||
+ if (is_gimple_debug (stmt))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ switch (gimple_code (stmt))
|
|
||||||
+ {
|
|
||||||
+ case GIMPLE_COND:
|
|
||||||
+ if (!check_uses_cond (ssa_name, stmt, hset))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case GIMPLE_ASSIGN:
|
|
||||||
+ if (!check_uses_assign (ssa_name, stmt, hset))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case GIMPLE_PHI:
|
|
||||||
+ if (!check_uses_phi (ssa_name, stmt, hset))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ default:
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+check_def_gimple (gimple *def1, gimple *def2, const_tree result)
|
|
||||||
+{
|
|
||||||
+ /* def1 and def2 should be POINTER_PLUS_EXPR. */
|
|
||||||
+ if (!is_gimple_assign (def1) || !is_gimple_assign (def2)
|
|
||||||
+ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR
|
|
||||||
+ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ tree rhs12 = gimple_assign_rhs2 (def1);
|
|
||||||
+
|
|
||||||
+ tree rhs21 = gimple_assign_rhs1 (def2);
|
|
||||||
+ tree rhs22 = gimple_assign_rhs2 (def2);
|
|
||||||
+
|
|
||||||
+ if (rhs21 != result)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* We should have a positive pointer-plus constant to ensure
|
|
||||||
+ that the pointer value is continuously increasing. */
|
|
||||||
+ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST
|
|
||||||
+ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+check_loop_body (basic_block bb0, basic_block bb2, const_tree result)
|
|
||||||
+{
|
|
||||||
+ gimple *g01 = first_stmt (bb0);
|
|
||||||
+ if (!g01 || !is_gimple_assign (g01)
|
|
||||||
+ || gimple_assign_rhs_code (g01) != MEM_REF
|
|
||||||
+ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gimple *g02 = g01->next;
|
|
||||||
+ /* GIMPLE_COND would be the last gimple in a basic block,
|
|
||||||
+ and have no other side effects on RESULT. */
|
|
||||||
+ if (!g02 || gimple_code (g02) != GIMPLE_COND)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (first_stmt (bb2) != last_stmt (bb2))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Pattern is like
|
|
||||||
+ <pre bb>
|
|
||||||
+ arg1 = base (rhs11) + cst (rhs12); [def1]
|
|
||||||
+ goto <bb 0>
|
|
||||||
+
|
|
||||||
+ <bb 2>
|
|
||||||
+ arg2 = result (rhs21) + cst (rhs22); [def2]
|
|
||||||
+
|
|
||||||
+ <bb 0>
|
|
||||||
+ # result = PHI <arg1 (pre bb), arg2 (bb 2)>
|
|
||||||
+ _v = *result; [g01]
|
|
||||||
+ if (_v == 0) [g02]
|
|
||||||
+ goto <bb 1>
|
|
||||||
+ else
|
|
||||||
+ goto <bb 2>
|
|
||||||
+
|
|
||||||
+ <bb 1>
|
|
||||||
+ _1 = result - base; [g1]
|
|
||||||
+ _2 = _1 /[ex] cst; [g2]
|
|
||||||
+ _3 = (unsigned int) _2; [g3]
|
|
||||||
+ if (_3 == 0)
|
|
||||||
+ ...
|
|
||||||
+*/
|
|
||||||
+static bool
|
|
||||||
+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
|
|
||||||
+ gphi *phi_stmt, gimple *&output)
|
|
||||||
+{
|
|
||||||
+ /* Start check from PHI node in BB0. */
|
|
||||||
+ if (gimple_phi_num_args (phi_stmt) != 2
|
|
||||||
+ || virtual_operand_p (gimple_phi_result (phi_stmt)))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ tree result = gimple_phi_result (phi_stmt);
|
|
||||||
+ tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
|
|
||||||
+ tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
|
|
||||||
+
|
|
||||||
+ if (TREE_CODE (arg1) != SSA_NAME
|
|
||||||
+ || TREE_CODE (arg2) != SSA_NAME
|
|
||||||
+ || SSA_NAME_IS_DEFAULT_DEF (arg1)
|
|
||||||
+ || SSA_NAME_IS_DEFAULT_DEF (arg2))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gimple *def1 = SSA_NAME_DEF_STMT (arg1);
|
|
||||||
+ gimple *def2 = SSA_NAME_DEF_STMT (arg2);
|
|
||||||
+
|
|
||||||
+ /* Swap bb1 and bb2 if pattern is like
|
|
||||||
+ if (_v != 0)
|
|
||||||
+ goto <bb 2>
|
|
||||||
+ else
|
|
||||||
+ goto <bb 1>
|
|
||||||
+ */
|
|
||||||
+ if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
|
|
||||||
+ {
|
|
||||||
+ std::swap (bb1, bb2);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* prebb[def1] --> bb0 <-- bb2[def2] */
|
|
||||||
+ if (!gimple_bb (def1)
|
|
||||||
+ || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
|
|
||||||
+ || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Check whether define gimple meets the pattern requirements. */
|
|
||||||
+ if (!check_def_gimple (def1, def2, result))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!check_loop_body (bb0, bb2, result))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ output = def1;
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Check pattern
|
|
||||||
+ <bb 1>
|
|
||||||
+ _1 = result - base; [g1]
|
|
||||||
+ _2 = _1 /[ex] cst; [g2]
|
|
||||||
+ _3 = (unsigned int) _2; [g3]
|
|
||||||
+ if (_3 == 0)
|
|
||||||
+ ...
|
|
||||||
+*/
|
|
||||||
+static bool
|
|
||||||
+check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
|
|
||||||
+ const_tree result, gimple *&output)
|
|
||||||
+{
|
|
||||||
+ gimple *g1 = first_stmt (bb1);
|
|
||||||
+ if (!g1 || !is_gimple_assign (g1)
|
|
||||||
+ || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
|
|
||||||
+ || gimple_assign_rhs1 (g1) != result
|
|
||||||
+ || gimple_assign_rhs2 (g1) != base)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gimple *g2 = g1->next;
|
|
||||||
+ if (!g2 || !is_gimple_assign (g2)
|
|
||||||
+ || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
|
|
||||||
+ || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
|
|
||||||
+ || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* INTEGER_CST cst in gimple def1. */
|
|
||||||
+ HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
|
|
||||||
+ /* INTEGER_CST cst in gimple g2. */
|
|
||||||
+ HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
|
|
||||||
+ /* _2 must be at least a positive number. */
|
|
||||||
+ if (num2 == 0 || num1 / num2 <= 0)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gimple *g3 = g2->next;
|
|
||||||
+ if (!g3 || !is_gimple_assign (g3)
|
|
||||||
+ || gimple_assign_rhs_code (g3) != NOP_EXPR
|
|
||||||
+ || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
|
|
||||||
+ || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* _3 should only be used in comparison operation or PHI node. */
|
|
||||||
+ hash_set<tree> *hset = new hash_set<tree>;
|
|
||||||
+ if (!check_uses (gimple_assign_lhs (g3), hset))
|
|
||||||
+ {
|
|
||||||
+ delete hset;
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ delete hset;
|
|
||||||
+
|
|
||||||
+ output = g3;
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
|
|
||||||
+{
|
|
||||||
+ gphi_iterator gsi;
|
|
||||||
+
|
|
||||||
+ for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ gphi *phi_stmt = gsi.phi ();
|
|
||||||
+ gimple *def1 = NULL;
|
|
||||||
+ tree base, cst, result;
|
|
||||||
+
|
|
||||||
+ if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ base = gimple_assign_rhs1 (def1);
|
|
||||||
+ cst = gimple_assign_rhs2 (def1);
|
|
||||||
+ result = gimple_phi_result (phi_stmt);
|
|
||||||
+
|
|
||||||
+ gimple *stmt = NULL;
|
|
||||||
+ if (!check_gimple_order (bb1, base, cst, result, stmt))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gcc_assert (stmt);
|
|
||||||
+
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
|
||||||
+ fprintf (dump_file, "to\n");
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Rewrite statement
|
|
||||||
+ _3 = (unsigned int) _2;
|
|
||||||
+ to
|
|
||||||
+ _3 = (unsigned int) 1;
|
|
||||||
+ */
|
|
||||||
+ tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
|
|
||||||
+ gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
|
|
||||||
+ update_stmt (stmt);
|
|
||||||
+
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
|
||||||
+ fprintf (dump_file, "\n");
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Determine whether we should attempt to hoist adjacent loads out of
|
|
||||||
diamond patterns in pass_phiopt. Always hoist loads if
|
|
||||||
-fhoist-adjacent-loads is specified and the target machine has
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,486 +0,0 @@
|
|||||||
From 2194d59a20be1ab627089d2f0c082b5a0a217f52 Mon Sep 17 00:00:00 2001
|
|
||||||
From: xiezhiheng <xiezhiheng@huawei.com>
|
|
||||||
Date: Tue, 3 Aug 2021 03:49:52 -0400
|
|
||||||
Subject: [PATCH 16/22] [StructReorg] Bugfix in certain scenarios
|
|
||||||
|
|
||||||
Some bugfix in certain scenarios,
|
|
||||||
1. disable type simplify in LTO within optimizations
|
|
||||||
2. only enable optimizations in C language
|
|
||||||
3. use new to initialize allocated memory in symbol-summary.h
|
|
||||||
4. cover escape scenarios not considered
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 5a19ea0bb40..1cb544ec3b0 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -97,6 +97,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "tree-eh.h"
|
|
||||||
#include "bitmap.h"
|
|
||||||
#include "cfgloop.h"
|
|
||||||
+#include "langhooks.h"
|
|
||||||
#include "ipa-param-manipulation.h"
|
|
||||||
#include "tree-ssa-live.h" /* For remove_unused_locals. */
|
|
||||||
|
|
||||||
@@ -161,6 +162,44 @@ handled_type (tree type)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Check whether in C language or LTO with only C language. */
|
|
||||||
+bool
|
|
||||||
+lang_c_p (void)
|
|
||||||
+{
|
|
||||||
+ const char *language_string = lang_hooks.name;
|
|
||||||
+
|
|
||||||
+ if (!language_string)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (strcmp (language_string, "GNU GIMPLE") == 0)
|
|
||||||
+ {
|
|
||||||
+ unsigned i = 0;
|
|
||||||
+ tree t = NULL;
|
|
||||||
+ const char *unit_string = NULL;
|
|
||||||
+
|
|
||||||
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
|
|
||||||
+ {
|
|
||||||
+ unit_string = TRANSLATION_UNIT_LANGUAGE (t);
|
|
||||||
+ if (!unit_string
|
|
||||||
+ || (strncmp (unit_string, "GNU C", 5) != 0)
|
|
||||||
+ || (!ISDIGIT (unit_string[5])))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ else if (strncmp (language_string, "GNU C", 5) == 0
|
|
||||||
+ && ISDIGIT (language_string[5]))
|
|
||||||
+ {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
enum srmode
|
|
||||||
{
|
|
||||||
NORMAL = 0,
|
|
||||||
@@ -999,7 +1038,6 @@ public:
|
|
||||||
void analyze_types (void);
|
|
||||||
void clear_visited (void);
|
|
||||||
bool create_new_types (void);
|
|
||||||
- void restore_field_type (void);
|
|
||||||
void create_new_decls (void);
|
|
||||||
srdecl *find_decl (tree);
|
|
||||||
void create_new_functions (void);
|
|
||||||
@@ -2127,7 +2165,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
|
|
||||||
srdecl *d = find_decl (lhs);
|
|
||||||
if (!d && t)
|
|
||||||
- current_function->record_decl (t, lhs, -1);
|
|
||||||
+ {
|
|
||||||
+ current_function->record_decl (t, lhs, -1);
|
|
||||||
+ tree var = SSA_NAME_VAR (lhs);
|
|
||||||
+ if (var && VOID_POINTER_P (TREE_TYPE (var)))
|
|
||||||
+ current_function->record_decl (t, var, -1);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
if (TREE_CODE (rhs) == SSA_NAME
|
|
||||||
&& VOID_POINTER_P (TREE_TYPE (rhs))
|
|
||||||
@@ -2136,7 +2179,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
|
|
||||||
srdecl *d = find_decl (rhs);
|
|
||||||
if (!d && t)
|
|
||||||
- current_function->record_decl (t, rhs, -1);
|
|
||||||
+ {
|
|
||||||
+ current_function->record_decl (t, rhs, -1);
|
|
||||||
+ tree var = SSA_NAME_VAR (rhs);
|
|
||||||
+ if (var && VOID_POINTER_P (TREE_TYPE (var)))
|
|
||||||
+ current_function->record_decl (t, var, -1);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
@@ -2816,8 +2864,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
|
|
||||||
if (escapes != does_not_escape)
|
|
||||||
{
|
|
||||||
for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
|
|
||||||
- mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
|
|
||||||
- escapes);
|
|
||||||
+ {
|
|
||||||
+ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
|
|
||||||
+ escapes);
|
|
||||||
+ srdecl *d = current_function->find_decl (
|
|
||||||
+ gimple_call_arg (stmt, i));
|
|
||||||
+ if (d)
|
|
||||||
+ d->type->mark_escape (escapes, stmt);
|
|
||||||
+ }
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -3753,49 +3807,6 @@ ipa_struct_reorg::analyze_types (void)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* When struct A has a struct B member, B's type info
|
|
||||||
- is not stored in
|
|
||||||
- TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA)))
|
|
||||||
- Try to restore B's type information. */
|
|
||||||
-void
|
|
||||||
-ipa_struct_reorg::restore_field_type (void)
|
|
||||||
-{
|
|
||||||
- for (unsigned i = 0; i < types.length (); i++)
|
|
||||||
- {
|
|
||||||
- for (unsigned j = 0; j < types[i]->fields.length (); j++)
|
|
||||||
- {
|
|
||||||
- srfield *field = types[i]->fields[j];
|
|
||||||
- if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE)
|
|
||||||
- {
|
|
||||||
- /* If field type has TYPE_FIELDS information,
|
|
||||||
- we do not need to do this. */
|
|
||||||
- if (TYPE_FIELDS (field->type->type) != NULL)
|
|
||||||
- {
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
- for (unsigned k = 0; k < types.length (); k++)
|
|
||||||
- {
|
|
||||||
- if (i == k)
|
|
||||||
- {
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
- const char *type1 = get_type_name (field->type->type);
|
|
||||||
- const char *type2 = get_type_name (types[k]->type);
|
|
||||||
- if (type1 == NULL || type2 == NULL)
|
|
||||||
- {
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
- if (type1 == type2
|
|
||||||
- && TYPE_FIELDS (types[k]->type))
|
|
||||||
- {
|
|
||||||
- field->type = types[k];
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
/* Create all new types we want to create. */
|
|
||||||
|
|
||||||
bool
|
|
||||||
@@ -4652,7 +4663,6 @@ ipa_struct_reorg::rewrite_functions (void)
|
|
||||||
{
|
|
||||||
unsigned retval = 0;
|
|
||||||
|
|
||||||
- restore_field_type ();
|
|
||||||
/* Create new types, if we did not create any new types,
|
|
||||||
then don't rewrite any accesses. */
|
|
||||||
if (!create_new_types ())
|
|
||||||
@@ -4887,7 +4897,10 @@ pass_ipa_struct_reorg::gate (function *)
|
|
||||||
&& flag_ipa_struct_reorg
|
|
||||||
/* Don't bother doing anything if the program has errors. */
|
|
||||||
&& !seen_error ()
|
|
||||||
- && flag_lto_partition == LTO_PARTITION_ONE);
|
|
||||||
+ && flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
+ /* Only enable struct optimizations in C since other
|
|
||||||
+ languages' grammar forbid. */
|
|
||||||
+ && lang_c_p ());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anon namespace
|
|
||||||
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
|
|
||||||
index fa1df5c8015..a223b4dadea 100644
|
|
||||||
--- a/gcc/symbol-summary.h
|
|
||||||
+++ b/gcc/symbol-summary.h
|
|
||||||
@@ -59,6 +59,12 @@ protected:
|
|
||||||
/* Allocates new data that are stored within map. */
|
|
||||||
T* allocate_new ()
|
|
||||||
{
|
|
||||||
+ /* In structure optimizatons, we call new to ensure that
|
|
||||||
+ the allocated memory is initialized to 0. */
|
|
||||||
+ if (flag_ipa_struct_reorg)
|
|
||||||
+ return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
|
|
||||||
+ : new T ();
|
|
||||||
+
|
|
||||||
/* Call gcc_internal_because we do not want to call finalizer for
|
|
||||||
a type T. We call dtor explicitly. */
|
|
||||||
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
|
|
||||||
@@ -71,7 +77,12 @@ protected:
|
|
||||||
if (is_ggc ())
|
|
||||||
ggc_delete (item);
|
|
||||||
else
|
|
||||||
- m_allocator.remove (item);
|
|
||||||
+ {
|
|
||||||
+ if (flag_ipa_struct_reorg)
|
|
||||||
+ delete item;
|
|
||||||
+ else
|
|
||||||
+ m_allocator.remove (item);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Unregister all call-graph hooks. */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..273baa9a368
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
|
|
||||||
@@ -0,0 +1,31 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
|
|
||||||
+
|
|
||||||
+struct D
|
|
||||||
+{
|
|
||||||
+ int n;
|
|
||||||
+ int c [8];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct A
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+ char *p;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct B
|
|
||||||
+{
|
|
||||||
+ struct A *a;
|
|
||||||
+ struct D *d;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int dtInsert1 (struct B *b)
|
|
||||||
+{
|
|
||||||
+ struct A a = { 0, 0 };
|
|
||||||
+ struct D *d;
|
|
||||||
+ b->a = &a;
|
|
||||||
+ d = b->d;
|
|
||||||
+ &d->c [d->n];
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..455f9b501d6
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
|
|
||||||
@@ -0,0 +1,54 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
|
|
||||||
+
|
|
||||||
+typedef struct basic_block_def *basic_block;
|
|
||||||
+typedef struct gimple_seq_node_d *gimple_seq_node;
|
|
||||||
+typedef struct gimple_seq_d *gimple_seq;
|
|
||||||
+typedef struct
|
|
||||||
+{
|
|
||||||
+ gimple_seq_node ptr;
|
|
||||||
+ gimple_seq seq;
|
|
||||||
+ basic_block bb;
|
|
||||||
+} gimple_stmt_iterator;
|
|
||||||
+typedef void *gimple;
|
|
||||||
+extern void exit(int);
|
|
||||||
+struct gimple_seq_node_d
|
|
||||||
+{
|
|
||||||
+ gimple stmt;
|
|
||||||
+ struct gimple_seq_node_d *next;
|
|
||||||
+};
|
|
||||||
+struct gimple_seq_d
|
|
||||||
+{
|
|
||||||
+};
|
|
||||||
+static __inline__ gimple_stmt_iterator
|
|
||||||
+gsi_start (gimple_seq seq)
|
|
||||||
+{
|
|
||||||
+ gimple_stmt_iterator i;
|
|
||||||
+ i.seq = seq;
|
|
||||||
+ return i;
|
|
||||||
+}
|
|
||||||
+static __inline__ unsigned char
|
|
||||||
+gsi_end_p (gimple_stmt_iterator i)
|
|
||||||
+{
|
|
||||||
+ return i.ptr == ((void *)0);
|
|
||||||
+}
|
|
||||||
+static __inline__ void
|
|
||||||
+gsi_next (gimple_stmt_iterator *i)
|
|
||||||
+{
|
|
||||||
+ i->ptr = i->ptr->next;
|
|
||||||
+}
|
|
||||||
+static __inline__ gimple
|
|
||||||
+gsi_stmt (gimple_stmt_iterator i)
|
|
||||||
+{
|
|
||||||
+ return i.ptr->stmt;
|
|
||||||
+}
|
|
||||||
+void
|
|
||||||
+c_warn_unused_result (gimple_seq seq)
|
|
||||||
+{
|
|
||||||
+ gimple_stmt_iterator i;
|
|
||||||
+ for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
|
|
||||||
+ {
|
|
||||||
+ gimple g = gsi_stmt (i);
|
|
||||||
+ if (!g) exit(0);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..afc0bd86ca5
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
|
|
||||||
@@ -0,0 +1,38 @@
|
|
||||||
+/* { dg-do run } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+struct gki_elem {
|
|
||||||
+ char *key;
|
|
||||||
+ int idx;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+typedef struct {
|
|
||||||
+ struct gki_elem *table;
|
|
||||||
+
|
|
||||||
+ int primelevel;
|
|
||||||
+ int nhash;
|
|
||||||
+ int nkeys;
|
|
||||||
+} GKI;
|
|
||||||
+
|
|
||||||
+void *
|
|
||||||
+sre_malloc(size_t size)
|
|
||||||
+{
|
|
||||||
+ void *ptr = malloc (size);
|
|
||||||
+ return ptr;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+__attribute__((noinline)) int
|
|
||||||
+GKIStoreKey(GKI *hash)
|
|
||||||
+{
|
|
||||||
+ hash->table = sre_malloc(sizeof(struct gki_elem));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ GKI *hash = malloc (sizeof(GKI));
|
|
||||||
+ GKIStoreKey(hash);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..9bcfaf3681b
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
|
|
||||||
@@ -0,0 +1,25 @@
|
|
||||||
+/* { dg-do run } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+#include <string.h>
|
|
||||||
+
|
|
||||||
+typedef struct {
|
|
||||||
+ unsigned char blue;
|
|
||||||
+ unsigned char green;
|
|
||||||
+} Pixel;
|
|
||||||
+
|
|
||||||
+typedef struct {
|
|
||||||
+ unsigned short colormaplength;
|
|
||||||
+ Pixel *colormapdata;
|
|
||||||
+} TargaImage;
|
|
||||||
+
|
|
||||||
+TargaImage *img;
|
|
||||||
+
|
|
||||||
+int main() {
|
|
||||||
+ img = (TargaImage *) malloc( sizeof(TargaImage) );
|
|
||||||
+ if (img->colormaplength > 0) {
|
|
||||||
+ img->colormapdata = (Pixel *) malloc(sizeof(Pixel) * img->colormaplength);
|
|
||||||
+ memset(img->colormapdata, 0, (sizeof(Pixel) * img->colormaplength) );
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..052f4e3bdc1
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
|
|
||||||
@@ -0,0 +1,54 @@
|
|
||||||
+/* { dg-do run } */
|
|
||||||
+
|
|
||||||
+extern void abort(void);
|
|
||||||
+
|
|
||||||
+struct packed_ushort {
|
|
||||||
+ unsigned short ucs;
|
|
||||||
+} __attribute__((packed));
|
|
||||||
+
|
|
||||||
+struct source {
|
|
||||||
+ int pos, length;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int flag;
|
|
||||||
+
|
|
||||||
+static void __attribute__((noinline)) fetch(struct source *p)
|
|
||||||
+{
|
|
||||||
+ p->length = 128;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static struct packed_ushort __attribute__((noinline)) next(struct source *p)
|
|
||||||
+{
|
|
||||||
+ struct packed_ushort rv;
|
|
||||||
+
|
|
||||||
+ if (p->pos >= p->length) {
|
|
||||||
+ if (flag) {
|
|
||||||
+ flag = 0;
|
|
||||||
+ fetch(p);
|
|
||||||
+ return next(p);
|
|
||||||
+ }
|
|
||||||
+ flag = 1;
|
|
||||||
+ rv.ucs = 0xffff;
|
|
||||||
+ return rv;
|
|
||||||
+ }
|
|
||||||
+ rv.ucs = 0;
|
|
||||||
+ return rv;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int main(void)
|
|
||||||
+{
|
|
||||||
+ struct source s;
|
|
||||||
+ int i;
|
|
||||||
+
|
|
||||||
+ s.pos = 0;
|
|
||||||
+ s.length = 0;
|
|
||||||
+ flag = 0;
|
|
||||||
+
|
|
||||||
+ for (i = 0; i < 16; i++) {
|
|
||||||
+ struct packed_ushort rv = next(&s);
|
|
||||||
+ if ((i == 0 && rv.ucs != 0xffff)
|
|
||||||
+ || (i > 0 && rv.ucs != 0))
|
|
||||||
+ abort();
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
||||||
index 3c17694c703..5c1374d6fb1 100644
|
|
||||||
--- a/gcc/tree.c
|
|
||||||
+++ b/gcc/tree.c
|
|
||||||
@@ -5216,6 +5216,12 @@ fld_worklist_push (tree t, class free_lang_data_d *fld)
|
|
||||||
static tree
|
|
||||||
fld_simplified_type_name (tree type)
|
|
||||||
{
|
|
||||||
+ /* Simplify type will cause that struct A and struct A within
|
|
||||||
+ struct B are different type pointers, so skip it in structure
|
|
||||||
+ optimizations. */
|
|
||||||
+ if (flag_ipa_struct_reorg)
|
|
||||||
+ return TYPE_NAME (type);
|
|
||||||
+
|
|
||||||
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
|
|
||||||
return TYPE_NAME (type);
|
|
||||||
/* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
|
|
||||||
@@ -5454,6 +5460,11 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
|
|
||||||
{
|
|
||||||
if (!t)
|
|
||||||
return t;
|
|
||||||
+ /* Simplify type will cause that struct A and struct A within
|
|
||||||
+ struct B are different type pointers, so skip it in structure
|
|
||||||
+ optimizations. */
|
|
||||||
+ if (flag_ipa_struct_reorg)
|
|
||||||
+ return t;
|
|
||||||
if (POINTER_TYPE_P (t))
|
|
||||||
return fld_incomplete_type_of (t, fld);
|
|
||||||
/* FIXME: This triggers verification error, see PR88140. */
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,622 +0,0 @@
|
|||||||
From 4d76b521d9bb539556011304b8a76dea1e2657a1 Mon Sep 17 00:00:00 2001
|
|
||||||
From: bule <bule1@huawei.com>
|
|
||||||
Date: Fri, 6 Aug 2021 10:20:54 +0800
|
|
||||||
Subject: [PATCH 17/22] [mcmodel] Enable mcmodel=medium on kunpeng
|
|
||||||
|
|
||||||
Enable mcmodel=medium on kunpeng
|
|
||||||
|
|
||||||
diff --git a/gcc/combine.c b/gcc/combine.c
|
|
||||||
index 35505cc5311..497e53289ca 100644
|
|
||||||
--- a/gcc/combine.c
|
|
||||||
+++ b/gcc/combine.c
|
|
||||||
@@ -1923,6 +1923,12 @@ can_combine_p (rtx_insn *insn, rtx_insn *i3, rtx_insn *pred ATTRIBUTE_UNUSED,
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SET:
|
|
||||||
+ /* If the set is a symbol loaded by medium code model unspec
|
|
||||||
+ escape this combine. */
|
|
||||||
+ if (GET_CODE (SET_SRC (elt)) == UNSPEC
|
|
||||||
+ && XVECLEN (SET_SRC (elt), 0) != 0
|
|
||||||
+ && targetm.medium_symbol_p (SET_SRC (elt)))
|
|
||||||
+ return 0;
|
|
||||||
/* Ignore SETs whose result isn't used but not those that
|
|
||||||
have side-effects. */
|
|
||||||
if (find_reg_note (insn, REG_UNUSED, SET_DEST (elt))
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
|
|
||||||
index ee7bed34924..21828803480 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64-opts.h
|
|
||||||
+++ b/gcc/config/aarch64/aarch64-opts.h
|
|
||||||
@@ -66,6 +66,10 @@ enum aarch64_code_model {
|
|
||||||
/* -fpic for small memory model.
|
|
||||||
GOT size to 28KiB (4K*8-4K) or 3580 entries. */
|
|
||||||
AARCH64_CMODEL_SMALL_SPIC,
|
|
||||||
+ /* Using movk insn sequence to do 64bit PC relative relocation. */
|
|
||||||
+ AARCH64_CMODEL_MEDIUM,
|
|
||||||
+ /* Using movk insn sequence to do 64bit PC relative got relocation. */
|
|
||||||
+ AARCH64_CMODEL_MEDIUM_PIC,
|
|
||||||
/* No assumptions about addresses of code and data.
|
|
||||||
The PIC variant is not yet implemented. */
|
|
||||||
AARCH64_CMODEL_LARGE
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
|
||||||
index bebd1b36228..226f3a8ff01 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
|
||||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
|
||||||
@@ -95,9 +95,11 @@
|
|
||||||
*/
|
|
||||||
enum aarch64_symbol_type
|
|
||||||
{
|
|
||||||
+ SYMBOL_MEDIUM_ABSOLUTE,
|
|
||||||
SYMBOL_SMALL_ABSOLUTE,
|
|
||||||
SYMBOL_SMALL_GOT_28K,
|
|
||||||
SYMBOL_SMALL_GOT_4G,
|
|
||||||
+ SYMBOL_MEDIUM_GOT_4G,
|
|
||||||
SYMBOL_SMALL_TLSGD,
|
|
||||||
SYMBOL_SMALL_TLSDESC,
|
|
||||||
SYMBOL_SMALL_TLSIE,
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
||||||
index 79dc8f186f4..f78942b04c6 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64.c
|
|
||||||
+++ b/gcc/config/aarch64/aarch64.c
|
|
||||||
@@ -3127,6 +3127,29 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
|
|
||||||
emit_insn (gen_add_losym (dest, tmp_reg, imm));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
+ case SYMBOL_MEDIUM_ABSOLUTE:
|
|
||||||
+ {
|
|
||||||
+ rtx tmp_reg = dest;
|
|
||||||
+ machine_mode mode = GET_MODE (dest);
|
|
||||||
+
|
|
||||||
+ gcc_assert (mode == Pmode || mode == ptr_mode);
|
|
||||||
+ if (can_create_pseudo_p ())
|
|
||||||
+ tmp_reg = gen_reg_rtx (mode);
|
|
||||||
+
|
|
||||||
+ if (mode == DImode)
|
|
||||||
+ {
|
|
||||||
+ emit_insn (gen_load_symbol_medium_di (dest, tmp_reg, imm));
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ emit_insn (gen_load_symbol_medium_si (dest, tmp_reg, imm));
|
|
||||||
+ }
|
|
||||||
+ if (REG_P (dest))
|
|
||||||
+ {
|
|
||||||
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (imm));
|
|
||||||
+ }
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
case SYMBOL_TINY_ABSOLUTE:
|
|
||||||
emit_insn (gen_rtx_SET (dest, imm));
|
|
||||||
@@ -3249,6 +3272,60 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ case SYMBOL_MEDIUM_GOT_4G:
|
|
||||||
+ {
|
|
||||||
+ rtx tmp_reg = dest;
|
|
||||||
+ machine_mode mode = GET_MODE (dest);
|
|
||||||
+ if (can_create_pseudo_p ())
|
|
||||||
+ {
|
|
||||||
+ tmp_reg = gen_reg_rtx (mode);
|
|
||||||
+ }
|
|
||||||
+ rtx insn;
|
|
||||||
+ rtx mem;
|
|
||||||
+ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
|
|
||||||
+
|
|
||||||
+ if (mode == DImode)
|
|
||||||
+ {
|
|
||||||
+ emit_insn (gen_load_symbol_medium_di (tmp_reg, dest, s));
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ emit_insn (gen_load_symbol_medium_si (tmp_reg, dest, s));
|
|
||||||
+ }
|
|
||||||
+ if (REG_P (dest))
|
|
||||||
+ {
|
|
||||||
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (s));
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (mode == ptr_mode)
|
|
||||||
+ {
|
|
||||||
+ if (mode == DImode)
|
|
||||||
+ {
|
|
||||||
+ emit_insn (gen_get_gotoff_di (dest, imm));
|
|
||||||
+ insn = gen_ldr_got_medium_di (dest, tmp_reg, dest);
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ emit_insn (gen_get_gotoff_si (dest, imm));
|
|
||||||
+ insn = gen_ldr_got_medium_si (dest, tmp_reg, dest);
|
|
||||||
+ }
|
|
||||||
+ mem = XVECEXP (SET_SRC (insn), 0, 0);
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ gcc_assert (mode == Pmode);
|
|
||||||
+ emit_insn (gen_get_gotoff_di (dest, imm));
|
|
||||||
+ insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest);
|
|
||||||
+ mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gcc_assert (GET_CODE (mem) == MEM);
|
|
||||||
+ MEM_READONLY_P (mem) = 1;
|
|
||||||
+ MEM_NOTRAP_P (mem) = 1;
|
|
||||||
+ emit_insn (insn);
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
case SYMBOL_SMALL_TLSGD:
|
|
||||||
{
|
|
||||||
rtx_insn *insns;
|
|
||||||
@@ -5256,11 +5333,12 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
|
||||||
|
|
||||||
return;
|
|
||||||
|
|
||||||
- case SYMBOL_SMALL_TLSGD:
|
|
||||||
- case SYMBOL_SMALL_TLSDESC:
|
|
||||||
+ case SYMBOL_SMALL_TLSGD:
|
|
||||||
+ case SYMBOL_SMALL_TLSDESC:
|
|
||||||
case SYMBOL_SMALL_TLSIE:
|
|
||||||
case SYMBOL_SMALL_GOT_28K:
|
|
||||||
case SYMBOL_SMALL_GOT_4G:
|
|
||||||
+ case SYMBOL_MEDIUM_GOT_4G:
|
|
||||||
case SYMBOL_TINY_GOT:
|
|
||||||
case SYMBOL_TINY_TLSIE:
|
|
||||||
if (const_offset != 0)
|
|
||||||
@@ -5279,6 +5357,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
|
||||||
case SYMBOL_TLSLE24:
|
|
||||||
case SYMBOL_TLSLE32:
|
|
||||||
case SYMBOL_TLSLE48:
|
|
||||||
+ case SYMBOL_MEDIUM_ABSOLUTE:
|
|
||||||
aarch64_load_symref_appropriately (dest, imm, sty);
|
|
||||||
return;
|
|
||||||
|
|
||||||
@@ -9389,7 +9468,14 @@ aarch64_classify_address (struct aarch64_address_info *info,
|
|
||||||
if (GET_CODE (sym) == SYMBOL_REF
|
|
||||||
&& offset.is_constant (&const_offset)
|
|
||||||
&& (aarch64_classify_symbol (sym, const_offset)
|
|
||||||
- == SYMBOL_SMALL_ABSOLUTE))
|
|
||||||
+ == SYMBOL_SMALL_ABSOLUTE
|
|
||||||
+ /* Fix fail on dbl_mov_immediate_1.c. If end up here with
|
|
||||||
+ MEDIUM_ABSOLUTE, the symbol is a constant number that is
|
|
||||||
+ forced to memory in reload pass, which is ok to go on with
|
|
||||||
+ the original design that subtitude the mov to
|
|
||||||
+ 'adrp and ldr :losum'. */
|
|
||||||
+ || aarch64_classify_symbol (sym, const_offset)
|
|
||||||
+ == SYMBOL_MEDIUM_ABSOLUTE))
|
|
||||||
{
|
|
||||||
/* The symbol and offset must be aligned to the access size. */
|
|
||||||
unsigned int align;
|
|
||||||
@@ -11346,7 +11432,13 @@ static inline bool
|
|
||||||
aarch64_can_use_per_function_literal_pools_p (void)
|
|
||||||
{
|
|
||||||
return (aarch64_pcrelative_literal_loads
|
|
||||||
- || aarch64_cmodel == AARCH64_CMODEL_LARGE);
|
|
||||||
+ || aarch64_cmodel == AARCH64_CMODEL_LARGE
|
|
||||||
+ /* Fix const9.C so that constants goes to function_literal_pools.
|
|
||||||
+ According to the orignal design of aarch64 mcmodel=medium, we
|
|
||||||
+ don't care where this symbol is put. For the benefit of code size
|
|
||||||
+ and behaviour consistent with other mcmodel, put it into
|
|
||||||
+ function_literal_pools. */
|
|
||||||
+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
@@ -13003,6 +13095,13 @@ cost_plus:
|
|
||||||
if (speed)
|
|
||||||
*cost += extra_cost->alu.arith;
|
|
||||||
}
|
|
||||||
+ else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM
|
|
||||||
+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
|
|
||||||
+ {
|
|
||||||
+ /* 4 movs adr sub add 2movs ldr. */
|
|
||||||
+ if (speed)
|
|
||||||
+ *cost += 7*extra_cost->alu.arith;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
if (flag_pic)
|
|
||||||
{
|
|
||||||
@@ -13010,6 +13109,8 @@ cost_plus:
|
|
||||||
*cost += COSTS_N_INSNS (1);
|
|
||||||
if (speed)
|
|
||||||
*cost += extra_cost->ldst.load;
|
|
||||||
+ if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
|
|
||||||
+ *cost += 2*extra_cost->alu.arith;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
|
|
||||||
@@ -14373,6 +14474,7 @@ initialize_aarch64_tls_size (struct gcc_options *opts)
|
|
||||||
if (aarch64_tls_size > 32)
|
|
||||||
aarch64_tls_size = 32;
|
|
||||||
break;
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM:
|
|
||||||
case AARCH64_CMODEL_LARGE:
|
|
||||||
/* The maximum TLS size allowed under large is 16E.
|
|
||||||
FIXME: 16E should be 64bit, we only support 48bit offset now. */
|
|
||||||
@@ -15266,6 +15368,12 @@ initialize_aarch64_code_model (struct gcc_options *opts)
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM:
|
|
||||||
+ if (opts->x_flag_pic)
|
|
||||||
+ {
|
|
||||||
+ aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
case AARCH64_CMODEL_LARGE:
|
|
||||||
if (opts->x_flag_pic)
|
|
||||||
sorry ("code model %qs with %<-f%s%>", "large",
|
|
||||||
@@ -15276,6 +15384,7 @@ initialize_aarch64_code_model (struct gcc_options *opts)
|
|
||||||
case AARCH64_CMODEL_TINY_PIC:
|
|
||||||
case AARCH64_CMODEL_SMALL_PIC:
|
|
||||||
case AARCH64_CMODEL_SMALL_SPIC:
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM_PIC:
|
|
||||||
gcc_unreachable ();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -15286,6 +15395,7 @@ static void
|
|
||||||
aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
|
|
||||||
{
|
|
||||||
ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
|
|
||||||
+ ptr->x_aarch64_data_threshold = opts->x_aarch64_data_threshold;
|
|
||||||
ptr->x_aarch64_branch_protection_string
|
|
||||||
= opts->x_aarch64_branch_protection_string;
|
|
||||||
}
|
|
||||||
@@ -15301,6 +15411,7 @@ aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
|
|
||||||
opts->x_explicit_arch = ptr->x_explicit_arch;
|
|
||||||
selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
|
|
||||||
opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
|
|
||||||
+ opts->x_aarch64_data_threshold = ptr->x_aarch64_data_threshold;
|
|
||||||
opts->x_aarch64_branch_protection_string
|
|
||||||
= ptr->x_aarch64_branch_protection_string;
|
|
||||||
if (opts->x_aarch64_branch_protection_string)
|
|
||||||
@@ -16169,6 +16280,8 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
|
|
||||||
|
|
||||||
case AARCH64_CMODEL_SMALL_SPIC:
|
|
||||||
case AARCH64_CMODEL_SMALL_PIC:
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM_PIC:
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM:
|
|
||||||
case AARCH64_CMODEL_SMALL:
|
|
||||||
return SYMBOL_SMALL_ABSOLUTE;
|
|
||||||
|
|
||||||
@@ -16205,6 +16318,7 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
|
|
||||||
return SYMBOL_TINY_ABSOLUTE;
|
|
||||||
|
|
||||||
case AARCH64_CMODEL_SMALL:
|
|
||||||
+ AARCH64_SMALL_ROUTINE:
|
|
||||||
/* Same reasoning as the tiny code model, but the offset cap here is
|
|
||||||
1MB, allowing +/-3.9GB for the offset to the symbol. */
|
|
||||||
|
|
||||||
@@ -16228,7 +16342,50 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
|
|
||||||
? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
|
|
||||||
return SYMBOL_SMALL_ABSOLUTE;
|
|
||||||
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM:
|
|
||||||
+ {
|
|
||||||
+ tree decl_local = SYMBOL_REF_DECL (x);
|
|
||||||
+ if (decl_local != NULL
|
|
||||||
+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
|
|
||||||
+ {
|
|
||||||
+ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local));
|
|
||||||
+ /* If the data is smaller than the threshold, goto
|
|
||||||
+ the small code model. Else goto the large code
|
|
||||||
+ model. */
|
|
||||||
+ if (size >= HOST_WIDE_INT (aarch64_data_threshold))
|
|
||||||
+ goto AARCH64_LARGE_ROUTINE;
|
|
||||||
+ }
|
|
||||||
+ goto AARCH64_SMALL_ROUTINE;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM_PIC:
|
|
||||||
+ {
|
|
||||||
+ tree decl_local = SYMBOL_REF_DECL (x);
|
|
||||||
+ if (decl_local != NULL
|
|
||||||
+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
|
|
||||||
+ {
|
|
||||||
+ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local));
|
|
||||||
+ if (size < HOST_WIDE_INT (aarch64_data_threshold))
|
|
||||||
+ {
|
|
||||||
+ if (!aarch64_symbol_binds_local_p (x))
|
|
||||||
+ {
|
|
||||||
+ /* flag_pic is 2 only when -fPIC is on, when we should
|
|
||||||
+ use 4G GOT. */
|
|
||||||
+ return flag_pic == 2 ? SYMBOL_SMALL_GOT_4G
|
|
||||||
+ : SYMBOL_SMALL_GOT_28K ;
|
|
||||||
+ }
|
|
||||||
+ return SYMBOL_SMALL_ABSOLUTE;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ if (!aarch64_symbol_binds_local_p (x))
|
|
||||||
+ {
|
|
||||||
+ return SYMBOL_MEDIUM_GOT_4G;
|
|
||||||
+ }
|
|
||||||
+ return SYMBOL_MEDIUM_ABSOLUTE;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
case AARCH64_CMODEL_LARGE:
|
|
||||||
+ AARCH64_LARGE_ROUTINE:
|
|
||||||
/* This is alright even in PIC code as the constant
|
|
||||||
pool reference is always PC relative and within
|
|
||||||
the same translation unit. */
|
|
||||||
@@ -19352,6 +19509,8 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
|
|
||||||
case AARCH64_CMODEL_SMALL:
|
|
||||||
case AARCH64_CMODEL_SMALL_PIC:
|
|
||||||
case AARCH64_CMODEL_SMALL_SPIC:
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM:
|
|
||||||
+ case AARCH64_CMODEL_MEDIUM_PIC:
|
|
||||||
/* text+got+data < 4Gb. 4-byte signed relocs are sufficient
|
|
||||||
for everything. */
|
|
||||||
type = DW_EH_PE_sdata4;
|
|
||||||
@@ -22605,7 +22764,14 @@ aarch64_empty_mask_is_expensive (unsigned)
|
|
||||||
bool
|
|
||||||
aarch64_use_pseudo_pic_reg (void)
|
|
||||||
{
|
|
||||||
- return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
|
|
||||||
+ /* flag_pic is 2 when -fPIC is on, where we do not need the pseudo
|
|
||||||
+ pic reg. In medium code mode, when combine with -fpie/-fpic, there are
|
|
||||||
+ possibility that some symbol size smaller than the -mlarge-data-threshold
|
|
||||||
+ will still use SMALL_SPIC relocation, which need the pseudo pic reg.
|
|
||||||
+ Fix spill_1.c fail. */
|
|
||||||
+ return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
|
|
||||||
+ || (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC
|
|
||||||
+ && flag_pic != 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
|
|
||||||
@@ -22615,6 +22781,7 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
|
|
||||||
{
|
|
||||||
switch (XINT (x, 1))
|
|
||||||
{
|
|
||||||
+ case UNSPEC_GOTMEDIUMPIC4G:
|
|
||||||
case UNSPEC_GOTSMALLPIC:
|
|
||||||
case UNSPEC_GOTSMALLPIC28K:
|
|
||||||
case UNSPEC_GOTTINYPIC:
|
|
||||||
@@ -22976,6 +23143,18 @@ aarch64_estimated_poly_value (poly_int64 val)
|
|
||||||
return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Implement TARGET_MEDIUM_SYMBOL_P.
|
|
||||||
+ Return true if x is a symbol loaded by UNSPEC_LOAD_SYMBOL_MEDIUM. */
|
|
||||||
+bool
|
|
||||||
+aarch64_medium_symbol_p (rtx x)
|
|
||||||
+{
|
|
||||||
+ if (GET_CODE (x) != UNSPEC)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ return XINT (x, 1) == UNSPEC_LOAD_SYMBOL_MEDIUM;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
|
|
||||||
/* Return true for types that could be supported as SIMD return or
|
|
||||||
argument types. */
|
|
||||||
@@ -24015,6 +24194,9 @@ aarch64_libgcc_floating_mode_supported_p
|
|
||||||
#undef TARGET_ESTIMATED_POLY_VALUE
|
|
||||||
#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
|
|
||||||
|
|
||||||
+#undef TARGET_MEDIUM_SYMBOL_P
|
|
||||||
+#define TARGET_MEDIUM_SYMBOL_P aarch64_medium_symbol_p
|
|
||||||
+
|
|
||||||
#undef TARGET_ATTRIBUTE_TABLE
|
|
||||||
#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
|
|
||||||
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
|
||||||
index 51148846345..8fc92d13dcb 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64.h
|
|
||||||
+++ b/gcc/config/aarch64/aarch64.h
|
|
||||||
@@ -33,6 +33,10 @@
|
|
||||||
|
|
||||||
#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
|
|
||||||
|
|
||||||
+/* Default threshold 64-bit relocation data
|
|
||||||
+ with aarch64 medium memory model. */
|
|
||||||
+#define AARCH64_DEFAULT_LARGE_DATA_THRESHOLD 65536
|
|
||||||
+
|
|
||||||
/* Target machine storage layout. */
|
|
||||||
|
|
||||||
#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
|
||||||
index 58445dea941..ee80261f1ac 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64.md
|
|
||||||
+++ b/gcc/config/aarch64/aarch64.md
|
|
||||||
@@ -224,6 +224,9 @@
|
|
||||||
UNSPEC_RSQRTS
|
|
||||||
UNSPEC_NZCV
|
|
||||||
UNSPEC_XPACLRI
|
|
||||||
+ UNSPEC_GOTMEDIUMPIC4G
|
|
||||||
+ UNSPEC_GET_GOTOFF
|
|
||||||
+ UNSPEC_LOAD_SYMBOL_MEDIUM
|
|
||||||
UNSPEC_LD1_SVE
|
|
||||||
UNSPEC_ST1_SVE
|
|
||||||
UNSPEC_LDNT1_SVE
|
|
||||||
@@ -6792,6 +6795,39 @@
|
|
||||||
[(set_attr "type" "load_4")]
|
|
||||||
)
|
|
||||||
|
|
||||||
+(define_insn "get_gotoff_<mode>"
|
|
||||||
+ [(set (match_operand:GPI 0 "register_operand" "=r")
|
|
||||||
+ (unspec:GPI [(match_operand 1 "aarch64_valid_symref" "S")]
|
|
||||||
+ UNSPEC_GET_GOTOFF))]
|
|
||||||
+ ""
|
|
||||||
+ "movz\\t%x0, :gotoff_g1:%A1\;movk\\t%x0, :gotoff_g0_nc:%A1"
|
|
||||||
+ [(set_attr "type" "multiple")
|
|
||||||
+ (set_attr "length" "8")]
|
|
||||||
+)
|
|
||||||
+
|
|
||||||
+(define_insn "ldr_got_medium_<mode>"
|
|
||||||
+ [(set (match_operand:PTR 0 "register_operand" "=r")
|
|
||||||
+ (unspec:PTR [(mem:PTR (lo_sum:PTR
|
|
||||||
+ (match_operand:PTR 1 "register_operand" "r")
|
|
||||||
+ (match_operand:PTR 2 "register_operand" "r")))]
|
|
||||||
+ UNSPEC_GOTMEDIUMPIC4G))]
|
|
||||||
+ ""
|
|
||||||
+ "ldr\\t%0, [%1, %2]"
|
|
||||||
+ [(set_attr "type" "load_4")]
|
|
||||||
+)
|
|
||||||
+
|
|
||||||
+(define_insn "ldr_got_medium_sidi"
|
|
||||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
|
||||||
+ (zero_extend:DI
|
|
||||||
+ (unspec:SI [(mem:SI (lo_sum:DI
|
|
||||||
+ (match_operand:DI 1 "register_operand" "r")
|
|
||||||
+ (match_operand:DI 2 "register_operand" "r")))]
|
|
||||||
+ UNSPEC_GOTMEDIUMPIC4G)))]
|
|
||||||
+ "TARGET_ILP32"
|
|
||||||
+ "ldr\\t%0, [%1, %2]"
|
|
||||||
+ [(set_attr "type" "load_4")]
|
|
||||||
+)
|
|
||||||
+
|
|
||||||
(define_insn "ldr_got_small_28k_<mode>"
|
|
||||||
[(set (match_operand:PTR 0 "register_operand" "=r")
|
|
||||||
(unspec:PTR [(mem:PTR (lo_sum:PTR
|
|
||||||
@@ -6955,6 +6991,23 @@
|
|
||||||
(set_attr "length" "12")]
|
|
||||||
)
|
|
||||||
|
|
||||||
+(define_insn "load_symbol_medium_<mode>"
|
|
||||||
+ [(set (match_operand:GPI 0 "register_operand" "=r")
|
|
||||||
+ (unspec:GPI [(match_operand 2 "aarch64_valid_symref" "S")]
|
|
||||||
+ UNSPEC_LOAD_SYMBOL_MEDIUM))
|
|
||||||
+ (clobber (match_operand:GPI 1 "register_operand" "=r"))]
|
|
||||||
+ ""
|
|
||||||
+ "movz\\t%x0, :prel_g3:%A2\;\\
|
|
||||||
+movk\\t%x0, :prel_g2_nc:%A2\;\\
|
|
||||||
+movk\\t%x0, :prel_g1_nc:%A2\;\\
|
|
||||||
+movk\\t%x0, :prel_g0_nc:%A2\;\\
|
|
||||||
+adr\\t%x1, .\;\\
|
|
||||||
+sub\\t%x1, %x1, 0x4\;\\
|
|
||||||
+add\\t%x0, %x0, %x1"
|
|
||||||
+ [(set_attr "type" "multiple")
|
|
||||||
+ (set_attr "length" "28")]
|
|
||||||
+)
|
|
||||||
+
|
|
||||||
(define_expand "tlsdesc_small_<mode>"
|
|
||||||
[(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)]
|
|
||||||
"TARGET_TLS_DESC"
|
|
||||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
|
||||||
index 4539156d6f4..bb888461ab0 100644
|
|
||||||
--- a/gcc/config/aarch64/aarch64.opt
|
|
||||||
+++ b/gcc/config/aarch64/aarch64.opt
|
|
||||||
@@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_core = aarch64_none
|
|
||||||
TargetVariable
|
|
||||||
enum aarch64_arch explicit_arch = aarch64_no_arch
|
|
||||||
|
|
||||||
+;; -mlarge-data-threshold=
|
|
||||||
+TargetSave
|
|
||||||
+int x_aarch64_data_threshold
|
|
||||||
+
|
|
||||||
TargetSave
|
|
||||||
const char *x_aarch64_override_tune_string
|
|
||||||
|
|
||||||
@@ -60,9 +64,16 @@ Enum(cmodel) String(tiny) Value(AARCH64_CMODEL_TINY)
|
|
||||||
EnumValue
|
|
||||||
Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL)
|
|
||||||
|
|
||||||
+EnumValue
|
|
||||||
+Enum(cmodel) String(medium) Value(AARCH64_CMODEL_MEDIUM)
|
|
||||||
+
|
|
||||||
EnumValue
|
|
||||||
Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE)
|
|
||||||
|
|
||||||
+mlarge-data-threshold=
|
|
||||||
+Target RejectNegative Joined UInteger Var(aarch64_data_threshold) Init(AARCH64_DEFAULT_LARGE_DATA_THRESHOLD)
|
|
||||||
+-mlarge-data-threshold=<number> Data greater than given threshold will be assume that it should be relocated using 64-bit relocation.
|
|
||||||
+
|
|
||||||
mbig-endian
|
|
||||||
Target Report RejectNegative Mask(BIG_END)
|
|
||||||
Assume target CPU is configured as big endian.
|
|
||||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
|
||||||
index fcb7245e95c..0508fce57a7 100644
|
|
||||||
--- a/gcc/doc/tm.texi
|
|
||||||
+++ b/gcc/doc/tm.texi
|
|
||||||
@@ -6983,6 +6983,11 @@ things like cost calculations or profiling frequencies. The default
|
|
||||||
implementation returns the lowest possible value of @var{val}.
|
|
||||||
@end deftypefn
|
|
||||||
|
|
||||||
+@deftypefn {Target Hook} bool TARGET_MEDIUM_SYMBOL_P (rtx @var{x})
|
|
||||||
+Return true if the input rtx is a symbol loaded by kunpeng medium code
|
|
||||||
+model.
|
|
||||||
+@end deftypefn
|
|
||||||
+
|
|
||||||
@node Scheduling
|
|
||||||
@section Adjusting the Instruction Scheduler
|
|
||||||
|
|
||||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
|
||||||
index c17209daa51..3b70ea4841a 100644
|
|
||||||
--- a/gcc/doc/tm.texi.in
|
|
||||||
+++ b/gcc/doc/tm.texi.in
|
|
||||||
@@ -4701,6 +4701,8 @@ Define this macro if a non-short-circuit operation produced by
|
|
||||||
|
|
||||||
@hook TARGET_ESTIMATED_POLY_VALUE
|
|
||||||
|
|
||||||
+@hook TARGET_MEDIUM_SYMBOL_P
|
|
||||||
+
|
|
||||||
@node Scheduling
|
|
||||||
@section Adjusting the Instruction Scheduler
|
|
||||||
|
|
||||||
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
|
|
||||||
index d61cadb5208..bad8208cd22 100644
|
|
||||||
--- a/gcc/dwarf2out.c
|
|
||||||
+++ b/gcc/dwarf2out.c
|
|
||||||
@@ -14501,14 +14501,17 @@ const_ok_for_output_1 (rtx rtl)
|
|
||||||
/* If delegitimize_address couldn't do anything with the UNSPEC, and
|
|
||||||
the target hook doesn't explicitly allow it in debug info, assume
|
|
||||||
we can't express it in the debug info. */
|
|
||||||
- /* Don't complain about TLS UNSPECs, those are just too hard to
|
|
||||||
- delegitimize. Note this could be a non-decl SYMBOL_REF such as
|
|
||||||
- one in a constant pool entry, so testing SYMBOL_REF_TLS_MODEL
|
|
||||||
- rather than DECL_THREAD_LOCAL_P is not just an optimization. */
|
|
||||||
+ /* Don't complain about TLS UNSPECs and aarch64 medium code model
|
|
||||||
+ related UNSPECs, those are just too hard to delegitimize. Note
|
|
||||||
+ this could be a non-decl SYMBOL_REF such as one in a constant
|
|
||||||
+ pool entry, so testing SYMBOL_REF_TLS_MODEL rather than
|
|
||||||
+ DECL_THREAD_LOCAL_P is not just an optimization. */
|
|
||||||
if (flag_checking
|
|
||||||
&& (XVECLEN (rtl, 0) == 0
|
|
||||||
|| GET_CODE (XVECEXP (rtl, 0, 0)) != SYMBOL_REF
|
|
||||||
- || SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0)) == TLS_MODEL_NONE))
|
|
||||||
+ || (!targetm.medium_symbol_p (rtl)
|
|
||||||
+ && SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0))
|
|
||||||
+ == TLS_MODEL_NONE)))
|
|
||||||
inform (current_function_decl
|
|
||||||
? DECL_SOURCE_LOCATION (current_function_decl)
|
|
||||||
: UNKNOWN_LOCATION,
|
|
||||||
diff --git a/gcc/target.def b/gcc/target.def
|
|
||||||
index f5a6d507e91..2020564118b 100644
|
|
||||||
--- a/gcc/target.def
|
|
||||||
+++ b/gcc/target.def
|
|
||||||
@@ -3869,6 +3869,13 @@ implementation returns the lowest possible value of @var{val}.",
|
|
||||||
HOST_WIDE_INT, (poly_int64 val),
|
|
||||||
default_estimated_poly_value)
|
|
||||||
|
|
||||||
+DEFHOOK
|
|
||||||
+(medium_symbol_p,
|
|
||||||
+ "Return true if the input rtx is a symbol loaded by kunpeng medium code\n\
|
|
||||||
+model.",
|
|
||||||
+ bool, (rtx x),
|
|
||||||
+ default_medium_symbol_p)
|
|
||||||
+
|
|
||||||
/* Permit speculative instructions in delay slots during delayed-branch
|
|
||||||
scheduling. */
|
|
||||||
DEFHOOK
|
|
||||||
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
|
|
||||||
index 7cb04f30bdb..43a9f0cdf5b 100644
|
|
||||||
--- a/gcc/targhooks.c
|
|
||||||
+++ b/gcc/targhooks.c
|
|
||||||
@@ -1708,6 +1708,13 @@ default_estimated_poly_value (poly_int64 x)
|
|
||||||
return x.coeffs[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* The default implementation of TARGET_MEDIUM_SYMBOL_P. */
|
|
||||||
+bool
|
|
||||||
+default_medium_symbol_p (rtx x ATTRIBUTE_UNUSED)
|
|
||||||
+{
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* For hooks which use the MOVE_RATIO macro, this gives the legacy default
|
|
||||||
behavior. SPEED_P is true if we are compiling for speed. */
|
|
||||||
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,296 +0,0 @@
|
|||||||
From 5392e41dcb7d58a80f2864b3c3f600c538fba799 Mon Sep 17 00:00:00 2001
|
|
||||||
From: huangxiaoquan <huangxiaoquan1@huawei.com>
|
|
||||||
Date: Wed, 4 Aug 2021 14:21:08 +0800
|
|
||||||
Subject: [PATCH 19/22] [StructReorderFields] Fix bugs and improve mechanism
|
|
||||||
|
|
||||||
Fix bugs and improve mechanism:
|
|
||||||
|
|
||||||
1. Fixed a bug in multi-layer pointer recording.
|
|
||||||
2. Use new to initialize allocated memory in symbol-summary.h.
|
|
||||||
3. Only enable optimizations in C language.
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 384aa81583c..fe364f742d8 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -173,31 +173,30 @@ lang_c_p (void)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (strcmp (language_string, "GNU GIMPLE") == 0)
|
|
||||||
+ if (lang_GNU_C ())
|
|
||||||
+ {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
|
|
||||||
{
|
|
||||||
unsigned i = 0;
|
|
||||||
- tree t = NULL;
|
|
||||||
- const char *unit_string = NULL;
|
|
||||||
+ tree t = NULL_TREE;
|
|
||||||
|
|
||||||
FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
|
|
||||||
{
|
|
||||||
- unit_string = TRANSLATION_UNIT_LANGUAGE (t);
|
|
||||||
- if (!unit_string
|
|
||||||
- || (strncmp (unit_string, "GNU C", 5) != 0)
|
|
||||||
- || (!ISDIGIT (unit_string[5])))
|
|
||||||
+ language_string = TRANSLATION_UNIT_LANGUAGE (t);
|
|
||||||
+ if (language_string == NULL
|
|
||||||
+ || strncmp (language_string, "GNU C", 5)
|
|
||||||
+ || (language_string[5] != '\0'
|
|
||||||
+ && !(ISDIGIT (language_string[5]))))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
- else if (strncmp (language_string, "GNU C", 5) == 0
|
|
||||||
- && ISDIGIT (language_string[5]))
|
|
||||||
- {
|
|
||||||
- return true;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
return false;
|
|
||||||
+}
|
|
||||||
|
|
||||||
/* Get the number of pointer layers. */
|
|
||||||
|
|
||||||
@@ -1262,7 +1261,7 @@ public:
|
|
||||||
void check_uses (srdecl *decl, vec<srdecl*>&);
|
|
||||||
void check_use (srdecl *decl, gimple *stmt, vec<srdecl*>&);
|
|
||||||
void check_type_and_push (tree newdecl, srdecl *decl,
|
|
||||||
- vec<srdecl*> &worklist, gimple *stmt);
|
|
||||||
+ vec<srdecl*> &worklist, gimple *stmt);
|
|
||||||
void check_other_side (srdecl *decl, tree other, gimple *stmt, vec<srdecl*> &worklist);
|
|
||||||
void check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt);
|
|
||||||
|
|
||||||
@@ -3010,11 +3009,9 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt)
|
|
||||||
{
|
|
||||||
tree r = TREE_OPERAND (expr, 0);
|
|
||||||
tree orig_type = TREE_TYPE (expr);
|
|
||||||
- if (handled_component_p (r)
|
|
||||||
- || TREE_CODE (r) == MEM_REF)
|
|
||||||
+ if (handled_component_p (r) || TREE_CODE (r) == MEM_REF)
|
|
||||||
{
|
|
||||||
- while (handled_component_p (r)
|
|
||||||
- || TREE_CODE (r) == MEM_REF)
|
|
||||||
+ while (handled_component_p (r) || TREE_CODE (r) == MEM_REF)
|
|
||||||
{
|
|
||||||
if (TREE_CODE (r) == VIEW_CONVERT_EXPR)
|
|
||||||
{
|
|
||||||
@@ -3092,10 +3089,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
srdecl *d = find_decl (lhs);
|
|
||||||
if (!d && t)
|
|
||||||
{
|
|
||||||
- current_function->record_decl (t, lhs, -1);
|
|
||||||
+ current_function->record_decl (t, lhs, -1,
|
|
||||||
+ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
|
|
||||||
tree var = SSA_NAME_VAR (lhs);
|
|
||||||
if (var && VOID_POINTER_P (TREE_TYPE (var)))
|
|
||||||
- current_function->record_decl (t, var, -1);
|
|
||||||
+ current_function->record_decl (t, var, -1,
|
|
||||||
+ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* void * _1; struct arc * _2;
|
|
||||||
@@ -3108,10 +3107,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
srdecl *d = find_decl (rhs);
|
|
||||||
if (!d && t)
|
|
||||||
{
|
|
||||||
- current_function->record_decl (t, rhs, -1);
|
|
||||||
+ current_function->record_decl (t, rhs, -1,
|
|
||||||
+ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL);
|
|
||||||
tree var = SSA_NAME_VAR (rhs);
|
|
||||||
if (var && VOID_POINTER_P (TREE_TYPE (var)))
|
|
||||||
- current_function->record_decl (t, var, -1);
|
|
||||||
+ current_function->record_decl (t, var, -1,
|
|
||||||
+ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -3529,7 +3530,7 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple
|
|
||||||
{
|
|
||||||
/* The type is other, the declaration is side. */
|
|
||||||
current_function->record_decl (type, side, -1,
|
|
||||||
- find_decl (other) ? find_decl (other)->orig_type : NULL);
|
|
||||||
+ isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
@@ -5111,31 +5112,23 @@ ipa_struct_reorg::propagate_escape_via_original (void)
|
|
||||||
{
|
|
||||||
for (unsigned i = 0; i < types.length (); i++)
|
|
||||||
{
|
|
||||||
- for (unsigned j = 0; j < types[i]->fields.length (); j++)
|
|
||||||
- {
|
|
||||||
- srfield *field = types[i]->fields[j];
|
|
||||||
- if (handled_type (field->fieldtype) && field->type)
|
|
||||||
- {
|
|
||||||
- for (unsigned k = 0; k < types.length (); k++)
|
|
||||||
- {
|
|
||||||
- const char *type1 = get_type_name (field->type->type);
|
|
||||||
- const char *type2 = get_type_name (types[k]->type);
|
|
||||||
- if (type1 == NULL || type2 == NULL)
|
|
||||||
- {
|
|
||||||
- continue;
|
|
||||||
- }
|
|
||||||
- if (type1 == type2 && types[k]->has_escaped ())
|
|
||||||
- {
|
|
||||||
- if (!field->type->has_escaped ())
|
|
||||||
- {
|
|
||||||
- field->type->mark_escape (
|
|
||||||
- escape_via_orig_escape, NULL);
|
|
||||||
- }
|
|
||||||
- break;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
+ for (unsigned j = 0; j < types.length (); j++)
|
|
||||||
+ {
|
|
||||||
+ const char *type1 = get_type_name (types[i]->type);
|
|
||||||
+ const char *type2 = get_type_name (types[j]->type);
|
|
||||||
+ if (type1 == NULL || type2 == NULL)
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ if (type1 == type2 && types[j]->has_escaped ())
|
|
||||||
+ {
|
|
||||||
+ if (!types[i]->has_escaped ())
|
|
||||||
+ {
|
|
||||||
+ types[i]->mark_escape (escape_via_orig_escape, NULL);
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -6683,7 +6676,10 @@ pass_ipa_reorder_fields::gate (function *)
|
|
||||||
&& flag_ipa_reorder_fields
|
|
||||||
/* Don't bother doing anything if the program has errors. */
|
|
||||||
&& !seen_error ()
|
|
||||||
- && flag_lto_partition == LTO_PARTITION_ONE);
|
|
||||||
+ && flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
+ /* Only enable struct optimizations in C since other
|
|
||||||
+ languages' grammar forbid. */
|
|
||||||
+ && lang_c_p ());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anon namespace
|
|
||||||
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
|
|
||||||
index a223b4dadea..ddf5e35776e 100644
|
|
||||||
--- a/gcc/symbol-summary.h
|
|
||||||
+++ b/gcc/symbol-summary.h
|
|
||||||
@@ -61,10 +61,9 @@ protected:
|
|
||||||
{
|
|
||||||
/* In structure optimizatons, we call new to ensure that
|
|
||||||
the allocated memory is initialized to 0. */
|
|
||||||
- if (flag_ipa_struct_reorg)
|
|
||||||
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
|
|
||||||
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
|
|
||||||
: new T ();
|
|
||||||
-
|
|
||||||
/* Call gcc_internal_because we do not want to call finalizer for
|
|
||||||
a type T. We call dtor explicitly. */
|
|
||||||
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
|
|
||||||
@@ -78,7 +77,7 @@ protected:
|
|
||||||
ggc_delete (item);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- if (flag_ipa_struct_reorg)
|
|
||||||
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
|
|
||||||
delete item;
|
|
||||||
else
|
|
||||||
m_allocator.remove (item);
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..23765fc5615
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
|
|
||||||
@@ -0,0 +1,30 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct T_HASH_ENTRY
|
|
||||||
+{
|
|
||||||
+ unsigned int hash;
|
|
||||||
+ unsigned int klen;
|
|
||||||
+ char *key;
|
|
||||||
+} iHashEntry;
|
|
||||||
+
|
|
||||||
+typedef struct T_HASH
|
|
||||||
+{
|
|
||||||
+ unsigned int size;
|
|
||||||
+ unsigned int fill;
|
|
||||||
+ unsigned int keys;
|
|
||||||
+
|
|
||||||
+ iHashEntry **array;
|
|
||||||
+} uHash;
|
|
||||||
+
|
|
||||||
+uHash *retval;
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main() {
|
|
||||||
+ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
|
|
||||||
index 8d687c58b30..54e737ee856 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
|
|
||||||
@@ -1,6 +1,6 @@
|
|
||||||
-// 针对
|
|
||||||
+// For testing:
|
|
||||||
/*
|
|
||||||
-Compile options: /home/hxq/hcc_gcc9.3.0_org_debug/bin/gcc -O3 -g
|
|
||||||
+Compile options: gcc -O3 -g
|
|
||||||
-flto -flto-partition=one -fipa-reorder-fields -fipa-struct-reorg
|
|
||||||
-v -save-temps -fdump-ipa-all-details test.c -o test
|
|
||||||
|
|
||||||
@@ -94,12 +94,11 @@ switch_arcs(arc_t** deleted_arcs, arc_t* arcnew)
|
|
||||||
copy = *test_arc;
|
|
||||||
count++;
|
|
||||||
*test_arc = arcnew[0];
|
|
||||||
- replace_weaker_arc(arcnew, copy.tail, copy.head);
|
|
||||||
+ replace_weaker_arc(arcnew, NULL, NULL);
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
-
|
|
||||||
int
|
|
||||||
main ()
|
|
||||||
{
|
|
||||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
||||||
index 5c1374d6fb1..89fa469c359 100644
|
|
||||||
--- a/gcc/tree.c
|
|
||||||
+++ b/gcc/tree.c
|
|
||||||
@@ -5219,7 +5219,7 @@ fld_simplified_type_name (tree type)
|
|
||||||
/* Simplify type will cause that struct A and struct A within
|
|
||||||
struct B are different type pointers, so skip it in structure
|
|
||||||
optimizations. */
|
|
||||||
- if (flag_ipa_struct_reorg)
|
|
||||||
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
|
|
||||||
return TYPE_NAME (type);
|
|
||||||
|
|
||||||
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
|
|
||||||
@@ -5463,7 +5463,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
|
|
||||||
/* Simplify type will cause that struct A and struct A within
|
|
||||||
struct B are different type pointers, so skip it in structure
|
|
||||||
optimizations. */
|
|
||||||
- if (flag_ipa_struct_reorg)
|
|
||||||
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
|
|
||||||
return t;
|
|
||||||
if (POINTER_TYPE_P (t))
|
|
||||||
return fld_incomplete_type_of (t, fld);
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,128 +0,0 @@
|
|||||||
From 633dd654347b6146d6e94d6434e7028617019134 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhanghaijian <z.zhanghaijian@huawei.com>
|
|
||||||
Date: Mon, 9 Aug 2021 20:18:26 +0800
|
|
||||||
Subject: [PATCH 20/22] [Backport]vect: Fix an ICE in
|
|
||||||
vect_recog_mask_conversion_pattern
|
|
||||||
|
|
||||||
Reference:https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=91d80cf4bd2827dd9c40fe6a7c719c909d79083d
|
|
||||||
|
|
||||||
When processing the cond expression, vect_recog_mask_conversion_pattern
|
|
||||||
doesn't consider the situation that two operands of rhs1 are different
|
|
||||||
vectypes, leading to a vect ICE. This patch adds the identification and
|
|
||||||
handling of the situation to fix the problem.
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pr96757.c b/gcc/testsuite/gcc.target/aarch64/pr96757.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..122e39dca0e
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.target/aarch64/pr96757.c
|
|
||||||
@@ -0,0 +1,23 @@
|
|
||||||
+/* PR target/96757 */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O3" } */
|
|
||||||
+
|
|
||||||
+short
|
|
||||||
+fun1(short i, short j)
|
|
||||||
+{
|
|
||||||
+ return i * j;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+fun(int a, int b, int c)
|
|
||||||
+{
|
|
||||||
+ int *v, z, k, m;
|
|
||||||
+ short f, d;
|
|
||||||
+ for (int i=0; i<c; i++)
|
|
||||||
+ {
|
|
||||||
+ f= 4 <= d;
|
|
||||||
+ k= a > m;
|
|
||||||
+ z = f > k;
|
|
||||||
+ *v += fun1(z,b);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
|
|
||||||
index 310165084a3..84d7ddb170f 100644
|
|
||||||
--- a/gcc/tree-vect-patterns.c
|
|
||||||
+++ b/gcc/tree-vect-patterns.c
|
|
||||||
@@ -4237,6 +4237,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
||||||
tree vectype1, vectype2;
|
|
||||||
stmt_vec_info pattern_stmt_info;
|
|
||||||
vec_info *vinfo = stmt_vinfo->vinfo;
|
|
||||||
+ tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
|
|
||||||
+ tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
|
|
||||||
|
|
||||||
/* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
|
|
||||||
if (is_gimple_call (last_stmt)
|
|
||||||
@@ -4336,9 +4338,37 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
||||||
|
|
||||||
it is better for b1 and b2 to use the mask type associated
|
|
||||||
with int elements rather bool (byte) elements. */
|
|
||||||
- rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
|
|
||||||
- if (!rhs1_type)
|
|
||||||
- rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
|
|
||||||
+ rhs1_op0 = TREE_OPERAND (rhs1, 0);
|
|
||||||
+ rhs1_op1 = TREE_OPERAND (rhs1, 1);
|
|
||||||
+ if (!rhs1_op0 || !rhs1_op1)
|
|
||||||
+ return NULL;
|
|
||||||
+ rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
|
|
||||||
+ rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
|
|
||||||
+
|
|
||||||
+ if (!rhs1_op0_type)
|
|
||||||
+ rhs1_type = TREE_TYPE (rhs1_op0);
|
|
||||||
+ else if (!rhs1_op1_type)
|
|
||||||
+ rhs1_type = TREE_TYPE (rhs1_op1);
|
|
||||||
+ else if (TYPE_PRECISION (rhs1_op0_type)
|
|
||||||
+ != TYPE_PRECISION (rhs1_op1_type))
|
|
||||||
+ {
|
|
||||||
+ int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
|
|
||||||
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
|
|
||||||
+ int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
|
|
||||||
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
|
|
||||||
+ if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
|
|
||||||
+ {
|
|
||||||
+ if (abs (tmp0) > abs (tmp1))
|
|
||||||
+ rhs1_type = rhs1_op1_type;
|
|
||||||
+ else
|
|
||||||
+ rhs1_type = rhs1_op0_type;
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ rhs1_type = build_nonstandard_integer_type
|
|
||||||
+ (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ rhs1_type = rhs1_op0_type;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return NULL;
|
|
||||||
@@ -4356,8 +4386,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
||||||
name from the outset. */
|
|
||||||
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
|
|
||||||
TYPE_VECTOR_SUBPARTS (vectype2))
|
|
||||||
- && (TREE_CODE (rhs1) == SSA_NAME
|
|
||||||
- || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
|
|
||||||
+ && !rhs1_op0_type
|
|
||||||
+ && !rhs1_op1_type)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
/* If rhs1 is invariant and we can promote it leave the COND_EXPR
|
|
||||||
@@ -4390,7 +4420,16 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
||||||
if (TREE_CODE (rhs1) != SSA_NAME)
|
|
||||||
{
|
|
||||||
tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
|
|
||||||
- pattern_stmt = gimple_build_assign (tmp, rhs1);
|
|
||||||
+ if (rhs1_op0_type
|
|
||||||
+ && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
|
|
||||||
+ rhs1_op0 = build_mask_conversion (rhs1_op0,
|
|
||||||
+ vectype2, stmt_vinfo);
|
|
||||||
+ if (rhs1_op1_type
|
|
||||||
+ && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
|
|
||||||
+ rhs1_op1 = build_mask_conversion (rhs1_op1,
|
|
||||||
+ vectype2, stmt_vinfo);
|
|
||||||
+ pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
|
|
||||||
+ rhs1_op0, rhs1_op1);
|
|
||||||
rhs1 = tmp;
|
|
||||||
append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
|
|
||||||
rhs1_type);
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
From 023c92ac45b727768599a95f7da748158a270753 Mon Sep 17 00:00:00 2001
|
|
||||||
From: bule <bule1@huawei.com>
|
|
||||||
Date: Mon, 16 Aug 2021 11:20:35 +0800
|
|
||||||
Subject: [PATCH 21/22] [mcmodel] Bugfix for mcmodel=medium on x86
|
|
||||||
|
|
||||||
Declare default_medium_symbol_p in targhooks.h which otherwise
|
|
||||||
cause the build failure on x86 platform.
|
|
||||||
|
|
||||||
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
|
|
||||||
index 72f3064e8f8..95c136edc79 100644
|
|
||||||
--- a/gcc/targhooks.h
|
|
||||||
+++ b/gcc/targhooks.h
|
|
||||||
@@ -218,6 +218,7 @@ extern int default_register_move_cost (machine_mode, reg_class_t,
|
|
||||||
reg_class_t);
|
|
||||||
extern bool default_slow_unaligned_access (machine_mode, unsigned int);
|
|
||||||
extern HOST_WIDE_INT default_estimated_poly_value (poly_int64);
|
|
||||||
+extern bool default_medium_symbol_p (rtx);
|
|
||||||
|
|
||||||
extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
|
|
||||||
unsigned int,
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,167 +0,0 @@
|
|||||||
From 1c69390a01d3bf7226fce2a670a0f71731744b04 Mon Sep 17 00:00:00 2001
|
|
||||||
From: huangxiaoquan <huangxiaoquan1@huawei.com>
|
|
||||||
Date: Tue, 17 Aug 2021 15:50:31 +0800
|
|
||||||
Subject: [PATCH 22/22] [StructReorderFields] Fix pointer layer check bug
|
|
||||||
|
|
||||||
In the pointer layer check, the NULL pointer check is added
|
|
||||||
for the escape type mark.
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index fe364f742d8..85986ce5803 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -2235,9 +2235,9 @@ check_record_ptr_usage (gimple *use_stmt, tree ¤t_node,
|
|
||||||
}
|
|
||||||
|
|
||||||
bool res = true;
|
|
||||||
- /* MEM[(long int *)a_1] = _57; (record).
|
|
||||||
+ /* MEM[(long int *)a_1] = _1; (record).
|
|
||||||
If lhs is ssa_name, lhs cannot be the current node.
|
|
||||||
- _283 = _282->flow; (No record). */
|
|
||||||
+ _2 = _1->flow; (No record). */
|
|
||||||
if (TREE_CODE (rhs1) == SSA_NAME)
|
|
||||||
{
|
|
||||||
tree tmp = (rhs1 != current_node) ? rhs1 : lhs;
|
|
||||||
@@ -2285,13 +2285,13 @@ check_record_single_node (gimple *use_stmt, tree ¤t_node,
|
|
||||||
bool res = true;
|
|
||||||
if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == MEM_REF)
|
|
||||||
{
|
|
||||||
- /* _257 = MEM[(struct arc_t * *)_17]. */
|
|
||||||
+ /* add such as: _2 = MEM[(struct arc_t * *)_1]. */
|
|
||||||
res = add_node (lhs, *ptr_layers.get (current_node) - 1,
|
|
||||||
ptr_layers, ssa_name_stack);
|
|
||||||
}
|
|
||||||
else if (TREE_CODE (lhs) == MEM_REF && TREE_CODE (rhs1) == SSA_NAME)
|
|
||||||
{
|
|
||||||
- /* MEM[(long int *)a_1] = _57. */
|
|
||||||
+ /* add such as: MEM[(long int *)a_1] = _1. */
|
|
||||||
if (rhs1 == current_node)
|
|
||||||
{
|
|
||||||
res = add_node (TREE_OPERAND (lhs, 0),
|
|
||||||
@@ -3097,7 +3097,8 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
- /* void * _1; struct arc * _2;
|
|
||||||
+ /* find void ssa_name such as:
|
|
||||||
+ void * _1; struct arc * _2;
|
|
||||||
_2 = _1 + _3; _1 = calloc (100, 40). */
|
|
||||||
if (TREE_CODE (rhs) == SSA_NAME
|
|
||||||
&& VOID_POINTER_P (TREE_TYPE (rhs))
|
|
||||||
@@ -3126,7 +3127,7 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
find_var (gimple_assign_rhs1 (stmt), stmt);
|
|
||||||
find_var (gimple_assign_rhs2 (stmt), stmt);
|
|
||||||
}
|
|
||||||
- /* _23 = _21 - old_arcs_12. */
|
|
||||||
+ /* find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */
|
|
||||||
else if ((current_mode == STRUCT_REORDER_FIELDS)
|
|
||||||
&& gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR
|
|
||||||
&& types_compatible_p (
|
|
||||||
@@ -3310,7 +3311,7 @@ trace_calculate_negate (gimple *size_def_stmt, tree *num, tree struct_size)
|
|
||||||
{
|
|
||||||
gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NEGATE_EXPR);
|
|
||||||
|
|
||||||
- /* _480 = -_479; _479 = _478 * 72. */
|
|
||||||
+ /* support NEGATE_EXPR trace: _3 = -_2; _2 = _1 * 72. */
|
|
||||||
tree num1 = NULL_TREE;
|
|
||||||
tree arg0 = gimple_assign_rhs1 (size_def_stmt);
|
|
||||||
if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE)
|
|
||||||
@@ -3329,7 +3330,8 @@ trace_calculate_diff (gimple *size_def_stmt, tree *num)
|
|
||||||
{
|
|
||||||
gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NOP_EXPR);
|
|
||||||
|
|
||||||
- /* _25 = (long unsigned int) _23; _23 = _21 - old_arcs_12. */
|
|
||||||
+ /* support POINTER_DIFF_EXPR trace:
|
|
||||||
+ _3 = (long unsigned int) _2; _2 = _1 - old_arcs_1. */
|
|
||||||
tree arg = gimple_assign_rhs1 (size_def_stmt);
|
|
||||||
size_def_stmt = SSA_NAME_DEF_STMT (arg);
|
|
||||||
if (size_def_stmt && is_gimple_assign (size_def_stmt)
|
|
||||||
@@ -3811,8 +3813,8 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
|
|
||||||
release INTEGER_TYPE cast to struct pointer.
|
|
||||||
(If t has escpaed above, then directly returns
|
|
||||||
and doesn't mark escape follow.). */
|
|
||||||
- /* _607 = MEM[(struct arc_t * *)pl_100].
|
|
||||||
- then base pl_100:ssa_name - pointer_type - integer_type. */
|
|
||||||
+ /* _1 = MEM[(struct arc_t * *)a_1].
|
|
||||||
+ then base a_1: ssa_name - pointer_type - integer_type. */
|
|
||||||
if (current_mode == STRUCT_REORDER_FIELDS)
|
|
||||||
{
|
|
||||||
bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base))
|
|
||||||
@@ -4520,8 +4522,15 @@ ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
- a->type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
- b->type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+
|
|
||||||
+ if (a)
|
|
||||||
+ {
|
|
||||||
+ a->type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ }
|
|
||||||
+ if (b)
|
|
||||||
+ {
|
|
||||||
+ b->type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
@@ -5649,9 +5658,9 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_
|
|
||||||
if (current_mode == STRUCT_REORDER_FIELDS)
|
|
||||||
{
|
|
||||||
/* Supports the MEM_REF offset.
|
|
||||||
- _1 = MEM[(struct arc *)ap_4 + 72B].flow;
|
|
||||||
- Old rewrite:_1 = ap.reorder.0_8->flow;
|
|
||||||
- New rewrite:_1
|
|
||||||
+ _1 = MEM[(struct arc *)ap_1 + 72B].flow;
|
|
||||||
+ Old rewrite: _1 = ap.reorder.0_8->flow;
|
|
||||||
+ New rewrite: _1
|
|
||||||
= MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow;
|
|
||||||
*/
|
|
||||||
HOST_WIDE_INT offset_tmp = 0;
|
|
||||||
@@ -6150,10 +6159,10 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
- /* Old rewrite:if (x_1 != 0B)
|
|
||||||
+ /* Old rewrite: if (x_1 != 0B)
|
|
||||||
-> _1 = x.reorder.0_1 != 0B; if (_1 != 1)
|
|
||||||
The logic is incorrect.
|
|
||||||
- New rewrite:if (x_1 != 0B)
|
|
||||||
+ New rewrite: if (x_1 != 0B)
|
|
||||||
-> if (x.reorder.0_1 != 0B);*/
|
|
||||||
for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..a5477dcc9be
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
|
|
||||||
@@ -0,0 +1,24 @@
|
|
||||||
+/* check_ptr_layers bugfix.*/
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+struct {
|
|
||||||
+ char a;
|
|
||||||
+} **b = 0, *e = 0;
|
|
||||||
+long c;
|
|
||||||
+char d = 9;
|
|
||||||
+int f;
|
|
||||||
+
|
|
||||||
+void g()
|
|
||||||
+{
|
|
||||||
+ for (; f;)
|
|
||||||
+ if (c)
|
|
||||||
+ (*e).a++;
|
|
||||||
+ if (!d)
|
|
||||||
+ for (;;)
|
|
||||||
+ b &&c;
|
|
||||||
+}
|
|
||||||
+int
|
|
||||||
+main()
|
|
||||||
+{
|
|
||||||
+ g();
|
|
||||||
+}
|
|
||||||
+/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,87 +0,0 @@
|
|||||||
From 83a35da4910fc7d8f29ced3e0ff8adddeb537731 Mon Sep 17 00:00:00 2001
|
|
||||||
From: huangxiaoquan <huangxiaoquan1@huawei.com>
|
|
||||||
Date: Fri, 27 Aug 2021 14:53:18 +0800
|
|
||||||
Subject: [PATCH 23/24] [StructReorderFields] Add pointer offset check
|
|
||||||
|
|
||||||
The pointer offset check is added for the expr that is dereferenced
|
|
||||||
in the memory, and escapes struct pointer offset operations involving
|
|
||||||
field order.
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 85986ce5803..b0d4fe80797 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -3876,6 +3876,17 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /* Escape the operation of fetching field with pointer offset such as:
|
|
||||||
+ *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0);
|
|
||||||
+ */
|
|
||||||
+ if (current_mode != NORMAL
|
|
||||||
+ && (TREE_CODE (expr) == MEM_REF) && (offset != 0))
|
|
||||||
+ {
|
|
||||||
+ gcc_assert (can_escape);
|
|
||||||
+ t->mark_escape (escape_non_multiply_size, NULL);
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (wholeaccess (expr, base, accesstype, t))
|
|
||||||
{
|
|
||||||
field = NULL;
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
|
|
||||||
index 190b9418275..2ae46fb3112 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
|
|
||||||
@@ -84,4 +84,4 @@ main ()
|
|
||||||
return cnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
+/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..317aafa5f72
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
|
|
||||||
@@ -0,0 +1,34 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ struct node *left, *right;
|
|
||||||
+ double a, b, c, d, e, f;
|
|
||||||
+}
|
|
||||||
+*a;
|
|
||||||
+int b, c;
|
|
||||||
+void
|
|
||||||
+CreateNode (struct node **p1)
|
|
||||||
+{
|
|
||||||
+ *p1 = calloc (10, sizeof (struct node));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ a->left = 0;
|
|
||||||
+ struct node *t = a;
|
|
||||||
+ CreateNode (&t->right);
|
|
||||||
+
|
|
||||||
+ struct node p = *a;
|
|
||||||
+ b = 1;
|
|
||||||
+ if (p.left)
|
|
||||||
+ b = 0;
|
|
||||||
+ if (b)
|
|
||||||
+ printf (" Tree.\n");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,90 +0,0 @@
|
|||||||
From 0ee0f0ebeb098787cb9698887c237606b6ab10c6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: huangxiaoquan <huangxiaoquan1@huawei.com>
|
|
||||||
Date: Wed, 1 Sep 2021 17:07:22 +0800
|
|
||||||
Subject: [PATCH 24/24] [StructReorderFields] Add lto and whole-program gate
|
|
||||||
|
|
||||||
Only enable struct reorder fields optimizations in lto or whole-program.
|
|
||||||
This prevents some .c files from being struct reorder fields optimized
|
|
||||||
while some of them are not optimized during project compilation.
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index b0d4fe80797..2bf41e0d83b 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -6655,7 +6655,9 @@ pass_ipa_struct_reorg::gate (function *)
|
|
||||||
&& flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
/* Only enable struct optimizations in C since other
|
|
||||||
languages' grammar forbid. */
|
|
||||||
- && lang_c_p ());
|
|
||||||
+ && lang_c_p ()
|
|
||||||
+ /* Only enable struct optimizations in lto or whole_program. */
|
|
||||||
+ && (in_lto_p || flag_whole_program));
|
|
||||||
}
|
|
||||||
|
|
||||||
const pass_data pass_data_ipa_reorder_fields =
|
|
||||||
@@ -6699,7 +6701,9 @@ pass_ipa_reorder_fields::gate (function *)
|
|
||||||
&& flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
/* Only enable struct optimizations in C since other
|
|
||||||
languages' grammar forbid. */
|
|
||||||
- && lang_c_p ());
|
|
||||||
+ && lang_c_p ()
|
|
||||||
+ /* Only enable struct optimizations in lto or whole_program. */
|
|
||||||
+ && (in_lto_p || flag_whole_program));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anon namespace
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
|
|
||||||
index 6565fe8dd63..23444fe8b0d 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
// { dg-do compile }
|
|
||||||
-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" }
|
|
||||||
+// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" }
|
|
||||||
|
|
||||||
struct a
|
|
||||||
{
|
|
||||||
@@ -21,4 +21,10 @@ int g(void)
|
|
||||||
return b->t;
|
|
||||||
}
|
|
||||||
|
|
||||||
+int main()
|
|
||||||
+{
|
|
||||||
+ f ();
|
|
||||||
+ return g ();
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
|
|
||||||
index 5864ad46fd3..2d1f95c9935 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
// { dg-do compile }
|
|
||||||
-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" }
|
|
||||||
+// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" }
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
typedef struct {
|
|
||||||
@@ -10,7 +10,7 @@ typedef struct {
|
|
||||||
compile_stack_elt_t *stack;
|
|
||||||
unsigned size;
|
|
||||||
} compile_stack_type;
|
|
||||||
-void f (const char *p, const char *pend, int c)
|
|
||||||
+__attribute__((noinline)) void f (const char *p, const char *pend, int c)
|
|
||||||
{
|
|
||||||
compile_stack_type compile_stack;
|
|
||||||
while (p != pend)
|
|
||||||
@@ -20,4 +20,9 @@ void f (const char *p, const char *pend, int c)
|
|
||||||
* sizeof (compile_stack_elt_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
+int main()
|
|
||||||
+{
|
|
||||||
+ f (NULL, NULL, 1);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,669 +0,0 @@
|
|||||||
From 26e4ba63112f55c27b7dd3d5f8c4497ef9a2f459 Mon Sep 17 00:00:00 2001
|
|
||||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
||||||
Date: Thu, 6 Jan 2022 15:33:29 +0800
|
|
||||||
Subject: [PATCH 25/28] [AutoPrefetch] Support cache misses profile
|
|
||||||
|
|
||||||
Add pass ex-afdo after pass afdo in auto-profile.c.
|
|
||||||
Add flag -fcache-misses-profile.
|
|
||||||
Read profile of different types of perf events and build maps for
|
|
||||||
function and gimple location to its count of each perf event.
|
|
||||||
Currently, instruction execution and cahce misses are supported.
|
|
||||||
---
|
|
||||||
gcc/auto-profile.c | 415 +++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
gcc/auto-profile.h | 28 +++
|
|
||||||
gcc/common.opt | 14 ++
|
|
||||||
gcc/opts.c | 26 +++
|
|
||||||
gcc/passes.def | 1 +
|
|
||||||
gcc/timevar.def | 1 +
|
|
||||||
gcc/toplev.c | 6 +
|
|
||||||
gcc/tree-pass.h | 2 +
|
|
||||||
8 files changed, 493 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
|
|
||||||
index 7d09887c9..aced8fca5 100644
|
|
||||||
--- a/gcc/auto-profile.c
|
|
||||||
+++ b/gcc/auto-profile.c
|
|
||||||
@@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "auto-profile.h"
|
|
||||||
#include "tree-pretty-print.h"
|
|
||||||
#include "gimple-pretty-print.h"
|
|
||||||
+#include <map>
|
|
||||||
+#include <vector>
|
|
||||||
+#include <algorithm>
|
|
||||||
|
|
||||||
/* The following routines implements AutoFDO optimization.
|
|
||||||
|
|
||||||
@@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
|
|
||||||
+#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov"
|
|
||||||
#define AUTO_PROFILE_VERSION 1
|
|
||||||
|
|
||||||
namespace autofdo
|
|
||||||
@@ -117,6 +121,14 @@ private:
|
|
||||||
bool annotated_;
|
|
||||||
};
|
|
||||||
|
|
||||||
+/* pair <func_decl, count> */
|
|
||||||
+static bool
|
|
||||||
+event_count_cmp (std::pair<unsigned, gcov_type> &a,
|
|
||||||
+ std::pair<unsigned, gcov_type> &b)
|
|
||||||
+{
|
|
||||||
+ return a.second > b.second;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Represent a source location: (function_decl, lineno). */
|
|
||||||
typedef std::pair<tree, unsigned> decl_lineno;
|
|
||||||
|
|
||||||
@@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile;
|
|
||||||
/* gcov_summary structure to store the profile_info. */
|
|
||||||
static gcov_summary *afdo_profile_info;
|
|
||||||
|
|
||||||
+/* Check opts->x_flags and put file name into EVENT_FILES. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+get_all_profile_names (const char **event_files)
|
|
||||||
+{
|
|
||||||
+ if (!(flag_auto_profile || flag_cache_misses_profile))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ event_files[INST_EXEC] = auto_profile_file;
|
|
||||||
+
|
|
||||||
+ if (cache_misses_profile_file == NULL)
|
|
||||||
+ {
|
|
||||||
+ cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE;
|
|
||||||
+ }
|
|
||||||
+ event_files[CACHE_MISSES] = cache_misses_profile_file;
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void read_profile (void);
|
|
||||||
+
|
|
||||||
+/* Maintain multiple profile data of different events with event_loc_count_map
|
|
||||||
+ and event_func_count_map. */
|
|
||||||
+
|
|
||||||
+class extend_auto_profile
|
|
||||||
+{
|
|
||||||
+public:
|
|
||||||
+ bool auto_profile_exist (enum event_type type);
|
|
||||||
+ gcov_type get_loc_count (location_t, event_type);
|
|
||||||
+ gcov_type get_func_count (unsigned, event_type);
|
|
||||||
+ struct rank_info get_func_rank (unsigned, enum event_type);
|
|
||||||
+ /* There should be only one instance of class EXTEND_AUTO_PROFILE. */
|
|
||||||
+ static extend_auto_profile *create ()
|
|
||||||
+ {
|
|
||||||
+ extend_auto_profile *map = new extend_auto_profile ();
|
|
||||||
+ if (map->read ())
|
|
||||||
+ {
|
|
||||||
+ return map;
|
|
||||||
+ }
|
|
||||||
+ delete map;
|
|
||||||
+ return NULL;
|
|
||||||
+ }
|
|
||||||
+private:
|
|
||||||
+ /* Basic maps of extend_auto_profile. */
|
|
||||||
+ typedef std::map<location_t, gcov_type> loc_count_map;
|
|
||||||
+ typedef std::map<unsigned, gcov_type> func_count_map;
|
|
||||||
+
|
|
||||||
+ /* Map of function_uid to its descending order rank of counts. */
|
|
||||||
+ typedef std::map<unsigned, unsigned> rank_map;
|
|
||||||
+
|
|
||||||
+ /* Mapping hardware events to corresponding basic maps. */
|
|
||||||
+ typedef std::map<event_type, loc_count_map> event_loc_count_map;
|
|
||||||
+ typedef std::map<event_type, func_count_map> event_func_count_map;
|
|
||||||
+ typedef std::map<event_type, rank_map> event_rank_map;
|
|
||||||
+
|
|
||||||
+ extend_auto_profile () {}
|
|
||||||
+ bool read ();
|
|
||||||
+ void set_loc_count ();
|
|
||||||
+ void process_extend_source_profile ();
|
|
||||||
+ void read_extend_afdo_file (const char*, event_type);
|
|
||||||
+ void rank_all_func ();
|
|
||||||
+ void dump_event ();
|
|
||||||
+ event_loc_count_map event_loc_map;
|
|
||||||
+ event_func_count_map event_func_map;
|
|
||||||
+ event_rank_map func_rank;
|
|
||||||
+ event_type profile_type;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+/* Member functions for extend_auto_profile. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+extend_auto_profile::auto_profile_exist (enum event_type type)
|
|
||||||
+{
|
|
||||||
+ switch (type)
|
|
||||||
+ {
|
|
||||||
+ case INST_EXEC:
|
|
||||||
+ return event_func_map.count (INST_EXEC) != 0
|
|
||||||
+ || event_loc_map.count (INST_EXEC) != 0;
|
|
||||||
+ case CACHE_MISSES:
|
|
||||||
+ return event_func_map.count (CACHE_MISSES) != 0
|
|
||||||
+ || event_loc_map.count (CACHE_MISSES) != 0;
|
|
||||||
+ default:
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+extend_auto_profile::dump_event ()
|
|
||||||
+{
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ switch (profile_type)
|
|
||||||
+ {
|
|
||||||
+ case INST_EXEC:
|
|
||||||
+ fprintf (dump_file, "Processing event instruction execution.\n");
|
|
||||||
+ break;
|
|
||||||
+ case CACHE_MISSES:
|
|
||||||
+ fprintf (dump_file, "Processing event cache misses.\n");
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return true if any profile data was read. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+extend_auto_profile::read ()
|
|
||||||
+{
|
|
||||||
+ const char *event_files[EVENT_NUMBER] = {NULL};
|
|
||||||
+ if (!get_all_profile_names (event_files))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create
|
|
||||||
+ new ones for each event_type. */
|
|
||||||
+ autofdo::string_table *string_table_afdo = afdo_string_table;
|
|
||||||
+ autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile;
|
|
||||||
+
|
|
||||||
+ for (unsigned i = 0; i < EVENT_NUMBER; i++)
|
|
||||||
+ {
|
|
||||||
+ if (event_files[i] == NULL)
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ profile_type = (enum event_type) i;
|
|
||||||
+ dump_event ();
|
|
||||||
+ gcov_close ();
|
|
||||||
+ auto_profile_file = event_files[i];
|
|
||||||
+ read_profile ();
|
|
||||||
+ gcov_close ();
|
|
||||||
+
|
|
||||||
+ process_extend_source_profile ();
|
|
||||||
+
|
|
||||||
+ delete afdo_source_profile;
|
|
||||||
+ delete afdo_string_table;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function
|
|
||||||
+ END_AUTO_PROFILE will free them at the end of compilation. */
|
|
||||||
+ afdo_string_table = string_table_afdo;
|
|
||||||
+ afdo_source_profile = source_profile_afdo;
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Helper functions. */
|
|
||||||
+
|
|
||||||
+gcov_type
|
|
||||||
+extend_auto_profile::get_loc_count (location_t loc, event_type type)
|
|
||||||
+{
|
|
||||||
+ event_loc_count_map::iterator event_iter = event_loc_map.find (type);
|
|
||||||
+ if (event_iter != event_loc_map.end ())
|
|
||||||
+ {
|
|
||||||
+ loc_count_map::iterator loc_iter = event_iter->second.find (loc);
|
|
||||||
+ if (loc_iter != event_iter->second.end ())
|
|
||||||
+ {
|
|
||||||
+ return loc_iter->second;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+struct rank_info
|
|
||||||
+extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type)
|
|
||||||
+{
|
|
||||||
+ struct rank_info info = {0, 0};
|
|
||||||
+ event_rank_map::iterator event_iter = func_rank.find (type);
|
|
||||||
+ if (event_iter != func_rank.end ())
|
|
||||||
+ {
|
|
||||||
+ rank_map::iterator func_iter = event_iter->second.find (decl_uid);
|
|
||||||
+ if (func_iter != event_iter->second.end ())
|
|
||||||
+ {
|
|
||||||
+ info.rank = func_iter->second;
|
|
||||||
+ info.total = event_iter->second.size ();
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return info;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+gcov_type
|
|
||||||
+extend_auto_profile::get_func_count (unsigned decl_uid, event_type type)
|
|
||||||
+{
|
|
||||||
+ event_func_count_map::iterator event_iter = event_func_map.find (type);
|
|
||||||
+ if (event_iter != event_func_map.end ())
|
|
||||||
+ {
|
|
||||||
+ func_count_map::iterator func_iter = event_iter->second.find (decl_uid);
|
|
||||||
+ if (func_iter != event_iter->second.end ())
|
|
||||||
+ {
|
|
||||||
+ return func_iter->second;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static extend_auto_profile *extend_profile;
|
|
||||||
+
|
|
||||||
/* Helper functions. */
|
|
||||||
|
|
||||||
/* Return the original name of NAME: strip the suffix that starts
|
|
||||||
@@ -1654,6 +1866,131 @@ auto_profile (void)
|
|
||||||
|
|
||||||
return TODO_rebuild_cgraph_edges;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+extend_auto_profile::rank_all_func ()
|
|
||||||
+{
|
|
||||||
+ std::vector<std::pair<unsigned, gcov_type> > func_sorted;
|
|
||||||
+ event_func_count_map::iterator event_iter
|
|
||||||
+ = event_func_map.find (profile_type);
|
|
||||||
+ if (event_iter != event_func_map.end ())
|
|
||||||
+ {
|
|
||||||
+ func_count_map::iterator func_iter;
|
|
||||||
+ for (func_iter = event_iter->second.begin ();
|
|
||||||
+ func_iter != event_iter->second.end (); func_iter++)
|
|
||||||
+ {
|
|
||||||
+ func_sorted.push_back (std::make_pair (func_iter->first,
|
|
||||||
+ func_iter->second));
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp);
|
|
||||||
+
|
|
||||||
+ for (unsigned i = 0; i < func_sorted.size (); ++i)
|
|
||||||
+ {
|
|
||||||
+ func_rank[profile_type][func_sorted[i].first] = i + 1;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+extend_auto_profile::set_loc_count ()
|
|
||||||
+{
|
|
||||||
+ basic_block bb;
|
|
||||||
+ FOR_EACH_BB_FN (bb, cfun)
|
|
||||||
+ {
|
|
||||||
+ gimple_stmt_iterator gsi;
|
|
||||||
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ count_info info;
|
|
||||||
+ gimple *stmt = gsi_stmt (gsi);
|
|
||||||
+ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ if (afdo_source_profile->get_count_info (stmt, &info))
|
|
||||||
+ {
|
|
||||||
+ location_t loc = gimple_location (stmt);
|
|
||||||
+ event_loc_map[profile_type][loc] += info.count;
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "stmt ");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
|
|
||||||
+ fprintf (dump_file, "counts %ld\n",
|
|
||||||
+ event_loc_map[profile_type][loc]);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Process data in extend_auto_source_profile, save them into two maps.
|
|
||||||
+ 1. gimple_location to count.
|
|
||||||
+ 2. function_index to count. */
|
|
||||||
+void
|
|
||||||
+extend_auto_profile::process_extend_source_profile ()
|
|
||||||
+{
|
|
||||||
+ struct cgraph_node *node;
|
|
||||||
+ if (symtab->state == FINISHED)
|
|
||||||
+ {
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ FOR_EACH_FUNCTION (node)
|
|
||||||
+ {
|
|
||||||
+ if (!gimple_has_body_p (node->decl) || node->inlined_to)
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Don't profile functions produced for builtin stuff. */
|
|
||||||
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ function *fn = DECL_STRUCT_FUNCTION (node->decl);
|
|
||||||
+ push_cfun (fn);
|
|
||||||
+
|
|
||||||
+ const function_instance *s
|
|
||||||
+ = afdo_source_profile->get_function_instance_by_decl (
|
|
||||||
+ current_function_decl);
|
|
||||||
+
|
|
||||||
+ if (s == NULL)
|
|
||||||
+ {
|
|
||||||
+ pop_cfun ();
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ unsigned int decl_uid = DECL_UID (current_function_decl);
|
|
||||||
+ gcov_type count = s->total_count ();
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "Extend auto-profile for function %s.\n",
|
|
||||||
+ node->dump_name ());
|
|
||||||
+ }
|
|
||||||
+ event_func_map[profile_type][decl_uid] += count;
|
|
||||||
+ set_loc_count ();
|
|
||||||
+ pop_cfun ();
|
|
||||||
+ }
|
|
||||||
+ rank_all_func ();
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Main entry of extend_auto_profile. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+extend_source_profile ()
|
|
||||||
+{
|
|
||||||
+ extend_profile = autofdo::extend_auto_profile::create ();
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ if (extend_profile == NULL)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "No profile file is found.\n");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ fprintf (dump_file, "Extend profile info generated.\n");
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
} /* namespace autofdo. */
|
|
||||||
|
|
||||||
/* Read the profile from the profile data file. */
|
|
||||||
@@ -1682,6 +2019,42 @@ end_auto_profile (void)
|
|
||||||
profile_info = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Extern function to get profile info in other passes. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+profile_exist (enum event_type type)
|
|
||||||
+{
|
|
||||||
+ return autofdo::extend_profile != NULL
|
|
||||||
+ && autofdo::extend_profile->auto_profile_exist (type);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+gcov_type
|
|
||||||
+event_get_loc_count (location_t loc, event_type type)
|
|
||||||
+{
|
|
||||||
+ return autofdo::extend_profile->get_loc_count (loc, type);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+gcov_type
|
|
||||||
+event_get_func_count (unsigned decl_uid, event_type type)
|
|
||||||
+{
|
|
||||||
+ return autofdo::extend_profile->get_func_count (decl_uid, type);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+struct rank_info
|
|
||||||
+event_get_func_rank (unsigned decl_uid, enum event_type type)
|
|
||||||
+{
|
|
||||||
+ return autofdo::extend_profile->get_func_rank (decl_uid, type);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+free_extend_profile_info ()
|
|
||||||
+{
|
|
||||||
+ if (autofdo::extend_profile != NULL)
|
|
||||||
+ {
|
|
||||||
+ delete autofdo::extend_profile;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Returns TRUE if EDGE is hot enough to be inlined early. */
|
|
||||||
|
|
||||||
bool
|
|
||||||
@@ -1743,8 +2116,50 @@ public:
|
|
||||||
|
|
||||||
} // anon namespace
|
|
||||||
|
|
||||||
+namespace
|
|
||||||
+{
|
|
||||||
+const pass_data pass_data_ipa_extend_auto_profile =
|
|
||||||
+{
|
|
||||||
+ SIMPLE_IPA_PASS, /* type */
|
|
||||||
+ "ex-afdo", /* name */
|
|
||||||
+ OPTGROUP_NONE, /* optinfo_flags */
|
|
||||||
+ TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */
|
|
||||||
+ 0, /* properties_required */
|
|
||||||
+ 0, /* properties_provided */
|
|
||||||
+ 0, /* properties_destroyed */
|
|
||||||
+ 0, /* todo_flags_start */
|
|
||||||
+ 0, /* todo_flags_finish */
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass
|
|
||||||
+{
|
|
||||||
+public:
|
|
||||||
+ pass_ipa_extend_auto_profile (gcc::context *ctxt)
|
|
||||||
+ : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt)
|
|
||||||
+ {}
|
|
||||||
+
|
|
||||||
+ /* opt_pass methods: */
|
|
||||||
+ virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);}
|
|
||||||
+ virtual unsigned int execute (function *);
|
|
||||||
+
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+unsigned int
|
|
||||||
+pass_ipa_extend_auto_profile::execute (function *fun)
|
|
||||||
+{
|
|
||||||
+ autofdo::extend_source_profile ();
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+} // anon namespace
|
|
||||||
+
|
|
||||||
simple_ipa_opt_pass *
|
|
||||||
make_pass_ipa_auto_profile (gcc::context *ctxt)
|
|
||||||
{
|
|
||||||
return new pass_ipa_auto_profile (ctxt);
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+simple_ipa_opt_pass *
|
|
||||||
+make_pass_ipa_extend_auto_profile (gcc::context *ctxt)
|
|
||||||
+{
|
|
||||||
+ return new pass_ipa_extend_auto_profile (ctxt);
|
|
||||||
+}
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
|
|
||||||
index f5cff091d..230d7e68a 100644
|
|
||||||
--- a/gcc/auto-profile.h
|
|
||||||
+++ b/gcc/auto-profile.h
|
|
||||||
@@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#ifndef AUTO_PROFILE_H
|
|
||||||
#define AUTO_PROFILE_H
|
|
||||||
|
|
||||||
+enum event_type
|
|
||||||
+{
|
|
||||||
+ INST_EXEC = 0,
|
|
||||||
+ CACHE_MISSES,
|
|
||||||
+ EVENT_NUMBER
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
/* Read, process, finalize AutoFDO data structures. */
|
|
||||||
extern void read_autofdo_file (void);
|
|
||||||
extern void end_auto_profile (void);
|
|
||||||
@@ -28,4 +35,25 @@ extern void end_auto_profile (void);
|
|
||||||
/* Returns TRUE if EDGE is hot enough to be inlined early. */
|
|
||||||
extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
|
|
||||||
|
|
||||||
+/* Chcek if profile exists before using this profile. */
|
|
||||||
+extern bool profile_exist (enum event_type);
|
|
||||||
+
|
|
||||||
+/* Given func decl_uid or gimple location and event_type, return count.
|
|
||||||
+ Count is 0 if function or gimple is not sampled. */
|
|
||||||
+extern gcov_type event_get_func_count (unsigned, enum event_type);
|
|
||||||
+extern gcov_type event_get_loc_count (location_t, enum event_type);
|
|
||||||
+
|
|
||||||
+struct rank_info
|
|
||||||
+{
|
|
||||||
+ unsigned total;
|
|
||||||
+ unsigned rank;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+/* Given function decl_uid and event type, return rank_info. Rank_info
|
|
||||||
+ is {0, 0} if function was not sampled. */
|
|
||||||
+extern struct rank_info event_get_func_rank (unsigned, enum event_type);
|
|
||||||
+
|
|
||||||
+/* Free memory allocated by autofdo::extern_profile. */
|
|
||||||
+extern void free_extend_profile_info ();
|
|
||||||
+
|
|
||||||
#endif /* AUTO_PROFILE_H */
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 73c24f28d..37cbbd8c0 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file)
|
|
||||||
Use sample profile information for call graph node weights. The profile
|
|
||||||
file is specified in the argument.
|
|
||||||
|
|
||||||
+fcache-misses-profile
|
|
||||||
+Common Report Var(flag_cache_misses_profile)
|
|
||||||
+Use sample profile information for source code cache miss count. The default
|
|
||||||
+profile file is cmsdata.gcov in `pwd`.
|
|
||||||
+
|
|
||||||
+fcache-misses-profile=
|
|
||||||
+Common Joined RejectNegative Var(cache_misses_profile_file)
|
|
||||||
+Use sample profile information for source code cache miss count. The profile
|
|
||||||
+file is specified in the argument.
|
|
||||||
+
|
|
||||||
; -fcheck-bounds causes gcc to generate array bounds checks.
|
|
||||||
; For C, C++ and ObjC: defaults off.
|
|
||||||
; For Java: defaults to on.
|
|
||||||
@@ -1873,6 +1883,10 @@ fipa-struct-reorg
|
|
||||||
Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
|
|
||||||
Perform structure layout optimizations.
|
|
||||||
|
|
||||||
+fipa-extend-auto-profile
|
|
||||||
+Common Report Var(flag_ipa_extend_auto_profile)
|
|
||||||
+Use sample profile information for source code.
|
|
||||||
+
|
|
||||||
fipa-vrp
|
|
||||||
Common Report Var(flag_ipa_vrp) Optimization
|
|
||||||
Perform IPA Value Range Propagation.
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index 6924a973a..642327296 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts,
|
|
||||||
SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void
|
|
||||||
+set_cache_misses_profile_params (struct gcc_options *opts,
|
|
||||||
+ struct gcc_options *opts_set)
|
|
||||||
+{
|
|
||||||
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* -f{,no-}sanitize{,-recover}= suboptions. */
|
|
||||||
const struct sanitizer_opts_s sanitizer_opts[] =
|
|
||||||
{
|
|
||||||
@@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts,
|
|
||||||
param_early_inliner_max_iterations, 10);
|
|
||||||
break;
|
|
||||||
|
|
||||||
+ case OPT_fipa_extend_auto_profile:
|
|
||||||
+ opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile
|
|
||||||
+ ? true : value;
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case OPT_fcache_misses_profile_:
|
|
||||||
+ opts->x_cache_misses_profile_file = xstrdup (arg);
|
|
||||||
+ opts->x_flag_cache_misses_profile = true;
|
|
||||||
+ value = true;
|
|
||||||
+ /* No break here - do -fcache-misses-profile processing. */
|
|
||||||
+ /* FALLTHRU */
|
|
||||||
+ case OPT_fcache_misses_profile:
|
|
||||||
+ opts->x_flag_ipa_extend_auto_profile = value;
|
|
||||||
+ if (value)
|
|
||||||
+ {
|
|
||||||
+ set_cache_misses_profile_params (opts, opts_set);
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
case OPT_fprofile_generate_:
|
|
||||||
opts->x_profile_data_prefix = xstrdup (arg);
|
|
||||||
value = true;
|
|
||||||
diff --git a/gcc/passes.def b/gcc/passes.def
|
|
||||||
index 63303ab65..e9c91d26e 100644
|
|
||||||
--- a/gcc/passes.def
|
|
||||||
+++ b/gcc/passes.def
|
|
||||||
@@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
|
|
||||||
NEXT_PASS (pass_target_clone);
|
|
||||||
NEXT_PASS (pass_ipa_auto_profile);
|
|
||||||
+ NEXT_PASS (pass_ipa_extend_auto_profile);
|
|
||||||
NEXT_PASS (pass_ipa_tree_profile);
|
|
||||||
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
|
|
||||||
NEXT_PASS (pass_feedback_split_functions);
|
|
||||||
diff --git a/gcc/timevar.def b/gcc/timevar.def
|
|
||||||
index ee25eccbb..e873747a8 100644
|
|
||||||
--- a/gcc/timevar.def
|
|
||||||
+++ b/gcc/timevar.def
|
|
||||||
@@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
|
|
||||||
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
|
|
||||||
DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization")
|
|
||||||
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
|
|
||||||
+DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
|
|
||||||
DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
|
|
||||||
DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
|
|
||||||
DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression")
|
|
||||||
diff --git a/gcc/toplev.c b/gcc/toplev.c
|
|
||||||
index eaed6f6c7..51e6bd400 100644
|
|
||||||
--- a/gcc/toplev.c
|
|
||||||
+++ b/gcc/toplev.c
|
|
||||||
@@ -577,6 +577,12 @@ compile_file (void)
|
|
||||||
targetm.asm_out.output_ident (ident_str);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /* Extend auto profile finalization. */
|
|
||||||
+ if (flag_ipa_extend_auto_profile)
|
|
||||||
+ {
|
|
||||||
+ free_extend_profile_info ();
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
/* Auto profile finalization. */
|
|
||||||
if (flag_auto_profile)
|
|
||||||
end_auto_profile ();
|
|
||||||
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
|
||||||
index eb32c5d44..be6387768 100644
|
|
||||||
--- a/gcc/tree-pass.h
|
|
||||||
+++ b/gcc/tree-pass.h
|
|
||||||
@@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
|
|
||||||
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
|
|
||||||
extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
|
|
||||||
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
|
|
||||||
+extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
|
|
||||||
+ *ctxt);
|
|
||||||
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
|
|
||||||
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
|
|
||||||
extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,353 +0,0 @@
|
|||||||
From eb58d920a95696d8d5a7db9a6d640d4494fb023f Mon Sep 17 00:00:00 2001
|
|
||||||
From: liyancheng <412998149@qq.com>
|
|
||||||
Date: Tue, 25 Jan 2022 16:57:28 +0800
|
|
||||||
Subject: [PATCH 26/28] [AutoFDO] Enable discriminator and MCF algorithm on
|
|
||||||
AutoFDO
|
|
||||||
|
|
||||||
1. Support discriminator for distinguishes among several
|
|
||||||
basic blocks that share a common locus, allowing for
|
|
||||||
more accurate autofdo.
|
|
||||||
|
|
||||||
2. Using option -fprofile-correction for calling MCF algorithm
|
|
||||||
to smooth non conservative BB counts.
|
|
||||||
---
|
|
||||||
gcc/auto-profile.c | 172 ++++++++++++++++++++++++++++++++++++++++++++-
|
|
||||||
gcc/cfghooks.c | 7 ++
|
|
||||||
gcc/ipa-cp.c | 21 ++++++
|
|
||||||
gcc/opts.c | 5 +-
|
|
||||||
gcc/tree-inline.c | 14 ++++
|
|
||||||
5 files changed, 215 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
|
|
||||||
index aced8fca5..e6164b91b 100644
|
|
||||||
--- a/gcc/auto-profile.c
|
|
||||||
+++ b/gcc/auto-profile.c
|
|
||||||
@@ -678,6 +678,17 @@ string_table::get_index (const char *name) const
|
|
||||||
if (name == NULL)
|
|
||||||
return -1;
|
|
||||||
string_index_map::const_iterator iter = map_.find (name);
|
|
||||||
+ /* Function name may be duplicate. Try to distinguish by the
|
|
||||||
+ #file_name#function_name defined by the autofdo tool chain. */
|
|
||||||
+ if (iter == map_.end ())
|
|
||||||
+ {
|
|
||||||
+ char* file_name = get_original_name (lbasename (dump_base_name));
|
|
||||||
+ char* file_func_name
|
|
||||||
+ = concat ("#", file_name, "#", name, NULL);
|
|
||||||
+ iter = map_.find (file_func_name);
|
|
||||||
+ free (file_name);
|
|
||||||
+ free (file_func_name);
|
|
||||||
+ }
|
|
||||||
if (iter == map_.end ())
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
@@ -866,7 +877,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_pos_counts; i++)
|
|
||||||
{
|
|
||||||
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
|
|
||||||
+ unsigned offset = gcov_read_unsigned ();
|
|
||||||
unsigned num_targets = gcov_read_unsigned ();
|
|
||||||
gcov_type count = gcov_read_counter ();
|
|
||||||
s->pos_counts[offset].count = count;
|
|
||||||
@@ -945,6 +956,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
|
|
||||||
function_instance *s = get_function_instance_by_inline_stack (stack);
|
|
||||||
if (s == NULL)
|
|
||||||
return false;
|
|
||||||
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
|
|
||||||
+ {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
return s->get_count_info (stack[0].second, info);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1583,6 +1598,68 @@ afdo_propagate (bb_set *annotated_bb)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Process the following scene when the branch probability
|
|
||||||
+ inversion when do function afdo_propagate (). E.g.
|
|
||||||
+ BB_NUM (sample count)
|
|
||||||
+ BB1 (1000)
|
|
||||||
+ / \
|
|
||||||
+ BB2 (10) BB3 (0)
|
|
||||||
+ \ /
|
|
||||||
+ BB4
|
|
||||||
+ In afdo_propagate(), count of BB3 is calculated by
|
|
||||||
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
|
|
||||||
+
|
|
||||||
+ In fact, BB3 may be colder than BB2 by sample count.
|
|
||||||
+
|
|
||||||
+ This function allocate source BB count to each succ BB by sample
|
|
||||||
+ rate, E.g.
|
|
||||||
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+afdo_preprocess_bb_count ()
|
|
||||||
+{
|
|
||||||
+ basic_block bb;
|
|
||||||
+ FOR_ALL_BB_FN (bb, cfun)
|
|
||||||
+ {
|
|
||||||
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
|
|
||||||
+ && bb->count > profile_count::zero ().afdo ())
|
|
||||||
+ {
|
|
||||||
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
|
|
||||||
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
|
|
||||||
+ if (single_succ_p (bb1) && single_succ_p (bb2)
|
|
||||||
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
|
|
||||||
+ {
|
|
||||||
+ gcov_type max_count = 0;
|
|
||||||
+ gcov_type total_count = 0;
|
|
||||||
+ edge e;
|
|
||||||
+ edge_iterator ei;
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ {
|
|
||||||
+ if (!e->dest->count.ipa_p ())
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ max_count = MAX(max_count, e->dest->count.to_gcov_type ());
|
|
||||||
+ total_count += e->dest->count.to_gcov_type ();
|
|
||||||
+ }
|
|
||||||
+ /* Only bb_count > max_count * 2, branch probability will
|
|
||||||
+ inversion. */
|
|
||||||
+ if (max_count > 0
|
|
||||||
+ && bb->count.to_gcov_type () > max_count * 2)
|
|
||||||
+ {
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ {
|
|
||||||
+ gcov_type target_count = bb->count.to_gcov_type ()
|
|
||||||
+ * e->dest->count.to_gcov_type () / total_count;
|
|
||||||
+ e->dest->count
|
|
||||||
+ = profile_count::from_gcov_type (target_count).afdo ();
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Propagate counts on control flow graph and calculate branch
|
|
||||||
probabilities. */
|
|
||||||
|
|
||||||
@@ -1608,6 +1685,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
|
|
||||||
}
|
|
||||||
|
|
||||||
afdo_find_equiv_class (annotated_bb);
|
|
||||||
+ afdo_preprocess_bb_count ();
|
|
||||||
afdo_propagate (annotated_bb);
|
|
||||||
|
|
||||||
FOR_EACH_BB_FN (bb, cfun)
|
|
||||||
@@ -1711,6 +1789,82 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Preparation before executing MCF algorithm. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+afdo_init_mcf ()
|
|
||||||
+{
|
|
||||||
+ basic_block bb;
|
|
||||||
+ edge e;
|
|
||||||
+ edge_iterator ei;
|
|
||||||
+
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Step1: when use mcf, BB id must be continous,
|
|
||||||
+ so we need compact_blocks (). */
|
|
||||||
+ compact_blocks ();
|
|
||||||
+
|
|
||||||
+ /* Step2: allocate memory for MCF input data. */
|
|
||||||
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
|
|
||||||
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
|
|
||||||
+
|
|
||||||
+ /* Step3: init MCF input data from cfg. */
|
|
||||||
+ FOR_ALL_BB_FN (bb, cfun)
|
|
||||||
+ {
|
|
||||||
+ /* Init BB count for MCF. */
|
|
||||||
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
|
|
||||||
+
|
|
||||||
+ gcov_type total_count = 0;
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ {
|
|
||||||
+ total_count += e->dest->count.to_gcov_type ();
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* If there is no sample in each successor blocks, source
|
|
||||||
+ BB samples are allocated to each edge by branch static prob. */
|
|
||||||
+
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ {
|
|
||||||
+ if (total_count == 0)
|
|
||||||
+ {
|
|
||||||
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
|
||||||
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
|
||||||
+ * e->dest->count.to_gcov_type () / total_count;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Free the resources used by MCF and reset BB count from MCF result,
|
|
||||||
+ branch probability has been updated in mcf_smooth_cfg (). */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+afdo_process_after_mcf ()
|
|
||||||
+{
|
|
||||||
+ basic_block bb;
|
|
||||||
+ /* Reset BB count from MCF result. */
|
|
||||||
+ FOR_EACH_BB_FN (bb, cfun)
|
|
||||||
+ {
|
|
||||||
+ if (bb_gcov_count (bb))
|
|
||||||
+ {
|
|
||||||
+ bb->count
|
|
||||||
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Clean up MCF resource. */
|
|
||||||
+ bb_gcov_counts.release ();
|
|
||||||
+ delete edge_gcov_counts;
|
|
||||||
+ edge_gcov_counts = NULL;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Annotate auto profile to the control flow graph. Do not annotate value
|
|
||||||
profile for stmts in PROMOTED_STMTS. */
|
|
||||||
|
|
||||||
@@ -1762,8 +1916,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
|
|
||||||
afdo_source_profile->mark_annotated (cfun->function_end_locus);
|
|
||||||
if (max_count > profile_count::zero ())
|
|
||||||
{
|
|
||||||
- /* Calculate, propagate count and probability information on CFG. */
|
|
||||||
- afdo_calculate_branch_prob (&annotated_bb);
|
|
||||||
+ /* 1 means -fprofile-correction is enabled manually, and MCF
|
|
||||||
+ algorithm will be used to calculate count and probability.
|
|
||||||
+ Otherwise, use the default calculate algorithm. */
|
|
||||||
+ if (flag_profile_correction == 1)
|
|
||||||
+ {
|
|
||||||
+ afdo_init_mcf ();
|
|
||||||
+ mcf_smooth_cfg ();
|
|
||||||
+ afdo_process_after_mcf ();
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ /* Calculate, propagate count and probability information on CFG. */
|
|
||||||
+ afdo_calculate_branch_prob (&annotated_bb);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
update_max_bb_count ();
|
|
||||||
profile_status_for_fn (cfun) = PROFILE_READ;
|
|
||||||
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
|
|
||||||
index ea558b469..4ea490a8a 100644
|
|
||||||
--- a/gcc/cfghooks.c
|
|
||||||
+++ b/gcc/cfghooks.c
|
|
||||||
@@ -526,6 +526,9 @@ split_block_1 (basic_block bb, void *i)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
new_bb->count = bb->count;
|
|
||||||
+ /* Copy discriminator from original bb for distinguishes among
|
|
||||||
+ several basic blocks that share a common locus, allowing for
|
|
||||||
+ more accurate autofdo. */
|
|
||||||
new_bb->discriminator = bb->discriminator;
|
|
||||||
|
|
||||||
if (dom_info_available_p (CDI_DOMINATORS))
|
|
||||||
@@ -1091,6 +1094,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
|
|
||||||
move_block_after (new_bb, after);
|
|
||||||
|
|
||||||
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
|
|
||||||
+ /* Copy discriminator from original bb for distinguishes among
|
|
||||||
+ several basic blocks that share a common locus, allowing for
|
|
||||||
+ more accurate autofdo. */
|
|
||||||
+ new_bb->discriminator = bb->discriminator;
|
|
||||||
FOR_EACH_EDGE (s, ei, bb->succs)
|
|
||||||
{
|
|
||||||
/* Since we are creating edges from a new block to successors
|
|
||||||
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
|
|
||||||
index b1f0881bd..c208070c9 100644
|
|
||||||
--- a/gcc/ipa-cp.c
|
|
||||||
+++ b/gcc/ipa-cp.c
|
|
||||||
@@ -4365,6 +4365,27 @@ update_profiling_info (struct cgraph_node *orig_node,
|
|
||||||
orig_node_count.dump (dump_file);
|
|
||||||
fprintf (dump_file, "\n");
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ /* When autofdo uses PMU as the sampling unit, the count of
|
|
||||||
+ cgraph_node->count cannot be obtained directly and will
|
|
||||||
+ be zero. It using for apply_scale will cause the node
|
|
||||||
+ count incorrectly overestimated. So set orig_new_node_count
|
|
||||||
+ equal to orig_node_count, which is same as known error
|
|
||||||
+ handling. */
|
|
||||||
+ if (orig_node->count == profile_count::zero ().afdo ()
|
|
||||||
+ && new_node->count == profile_count::zero ().global0adjusted ())
|
|
||||||
+ {
|
|
||||||
+ orig_new_node_count = (orig_sum + new_sum).apply_scale (12, 10);
|
|
||||||
+
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, " node %s with zero count from afdo ",
|
|
||||||
+ new_node->dump_name ());
|
|
||||||
+ fprintf (dump_file, " proceeding by pretending it was ");
|
|
||||||
+ orig_new_node_count.dump (dump_file);
|
|
||||||
+ fprintf (dump_file, "\n");
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index 642327296..7a39f618b 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -2606,7 +2606,10 @@ common_handle_option (struct gcc_options *opts,
|
|
||||||
/* FALLTHRU */
|
|
||||||
case OPT_fauto_profile:
|
|
||||||
enable_fdo_optimizations (opts, opts_set, value);
|
|
||||||
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
|
|
||||||
+ /* 2 is special and means flag_profile_correction trun on by
|
|
||||||
+ -fauto-profile. */
|
|
||||||
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
|
|
||||||
+ (value ? 2 : 0));
|
|
||||||
SET_OPTION_IF_UNSET (opts, opts_set,
|
|
||||||
param_early_inliner_max_iterations, 10);
|
|
||||||
break;
|
|
||||||
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
|
|
||||||
index efde5d158..8405a959c 100644
|
|
||||||
--- a/gcc/tree-inline.c
|
|
||||||
+++ b/gcc/tree-inline.c
|
|
||||||
@@ -2015,6 +2015,10 @@ copy_bb (copy_body_data *id, basic_block bb,
|
|
||||||
basic_block_info automatically. */
|
|
||||||
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
|
|
||||||
copy_basic_block->count = bb->count.apply_scale (num, den);
|
|
||||||
+ /* Copy discriminator from original bb for distinguishes among
|
|
||||||
+ several basic blocks that share a common locus, allowing for
|
|
||||||
+ more accurate autofdo. */
|
|
||||||
+ copy_basic_block->discriminator = bb->discriminator;
|
|
||||||
|
|
||||||
copy_gsi = gsi_start_bb (copy_basic_block);
|
|
||||||
|
|
||||||
@@ -3028,6 +3032,16 @@ copy_cfg_body (copy_body_data * id,
|
|
||||||
den += e->count ();
|
|
||||||
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
|
|
||||||
}
|
|
||||||
+ /* When autofdo uses PMU as the sampling unit, the number of
|
|
||||||
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
|
|
||||||
+ be zero. It using for adjust_for_ipa_scaling will cause the
|
|
||||||
+ inlined BB count incorrectly overestimated. So set den equal
|
|
||||||
+ to num, which is the source inline BB count to avoid
|
|
||||||
+ overestimated. */
|
|
||||||
+ if (den == profile_count::zero ().afdo ())
|
|
||||||
+ {
|
|
||||||
+ den = num;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
profile_count::adjust_for_ipa_scaling (&num, &den);
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,151 +0,0 @@
|
|||||||
From 3d20b13bc2e5af8d52e221a33881423e38c3dfdd Mon Sep 17 00:00:00 2001
|
|
||||||
From: dingguangya <dingguangya1@huawei.com>
|
|
||||||
Date: Thu, 17 Feb 2022 21:53:31 +0800
|
|
||||||
Subject: [PATCH 28/28] [AutoPrefetch] Handle the case that the basic block
|
|
||||||
branch probability is invalid
|
|
||||||
|
|
||||||
When the node branch probability value is not initialized,
|
|
||||||
the branch probability must be set to 0 to ensure that
|
|
||||||
the calculation of the basic block execution probability
|
|
||||||
must be less than or equal to 100%.
|
|
||||||
---
|
|
||||||
.../gcc.dg/autoprefetch/autoprefetch.exp | 27 +++++++++++++++++++
|
|
||||||
.../autoprefetch/branch-weighted-prefetch.c | 22 +++++++++++++++
|
|
||||||
.../autoprefetch/get-edge-prob-non-init.c | 24 +++++++++++++++++
|
|
||||||
gcc/tree-ssa-loop-prefetch.c | 17 +++++++++++-
|
|
||||||
4 files changed, 89 insertions(+), 1 deletion(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..a7408e338
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
|
||||||
@@ -0,0 +1,27 @@
|
|
||||||
+# Copyright (C) 1997-2022 Free Software Foundation, Inc.
|
|
||||||
+
|
|
||||||
+# This program is free software; you can redistribute it and/or modify
|
|
||||||
+# it under the terms of the GNU General Public License as published by
|
|
||||||
+# the Free Software Foundation; either version 3 of the License, or
|
|
||||||
+# (at your option) any later version.
|
|
||||||
+#
|
|
||||||
+# This program is distributed in the hope that it will be useful,
|
|
||||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
+# GNU General Public License for more details.
|
|
||||||
+#
|
|
||||||
+# You should have received a copy of the GNU General Public License
|
|
||||||
+# along with GCC; see the file COPYING3. If not see
|
|
||||||
+# <http://www.gnu.org/licenses/>.
|
|
||||||
+
|
|
||||||
+load_lib gcc-dg.exp
|
|
||||||
+load_lib target-supports.exp
|
|
||||||
+
|
|
||||||
+# Initialize `dg'.
|
|
||||||
+dg-init
|
|
||||||
+
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
|
||||||
+ "" "-fprefetch-loop-arrays"
|
|
||||||
+
|
|
||||||
+# All done.
|
|
||||||
+dg-finish
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..c63c5e5cb
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
|
||||||
@@ -0,0 +1,22 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
|
|
||||||
+#define N 10000000
|
|
||||||
+
|
|
||||||
+long long a[N];
|
|
||||||
+
|
|
||||||
+long long func ()
|
|
||||||
+{
|
|
||||||
+ long long i;
|
|
||||||
+ long long sum = 0;
|
|
||||||
+
|
|
||||||
+ for (i = 0; i < N; i+=1) {
|
|
||||||
+ if (i < 100000)
|
|
||||||
+ sum += a[i];
|
|
||||||
+ else
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return sum;
|
|
||||||
+}
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..f55481008
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
|
|
||||||
@@ -0,0 +1,24 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-Ofast -fprefetch-loop-arrays=2 -fdump-tree-aprefetch-details" } */
|
|
||||||
+
|
|
||||||
+int a, c, f;
|
|
||||||
+static int *b = &a;
|
|
||||||
+int *d;
|
|
||||||
+int e[0];
|
|
||||||
+void g() {
|
|
||||||
+ int h;
|
|
||||||
+ for (;;) {
|
|
||||||
+ h = 1;
|
|
||||||
+ for (; h >= 0; h--) {
|
|
||||||
+ c = 2;
|
|
||||||
+ for (; c; c--)
|
|
||||||
+ if (e[0])
|
|
||||||
+ if (e[c])
|
|
||||||
+ *b = 0;
|
|
||||||
+ f || (*d = 0);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+int main() {}
|
|
||||||
+
|
|
||||||
+/* { dg-final } */
|
|
||||||
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
index 3a5aef0fc..673f453a4 100644
|
|
||||||
--- a/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
+++ b/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
@@ -2132,7 +2132,7 @@ get_edge_prob (edge e)
|
|
||||||
{
|
|
||||||
/* Limit the minimum probability value. */
|
|
||||||
const float MINNUM_PROB = 0.00001f;
|
|
||||||
- float fvalue = 1;
|
|
||||||
+ float fvalue = 0;
|
|
||||||
|
|
||||||
profile_probability probability = e->probability;
|
|
||||||
if (probability.initialized_p ())
|
|
||||||
@@ -2143,6 +2143,21 @@ get_edge_prob (edge e)
|
|
||||||
fvalue = MINNUM_PROB;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ /* When the node branch probability value is not initialized, the branch
|
|
||||||
+ probability must be set to 0 to ensure that the calculation of the
|
|
||||||
+ basic block execution probability must be less than or equal to 100%.
|
|
||||||
+ i.e,
|
|
||||||
+ ...
|
|
||||||
+ <bb 3> [local count: 20000]
|
|
||||||
+ if (f_2 != 0)
|
|
||||||
+ goto <bb 6>; [INV]
|
|
||||||
+ else
|
|
||||||
+ goto <bb 7>; [100.00%]
|
|
||||||
+ ... */
|
|
||||||
+ fvalue = 0;
|
|
||||||
+ }
|
|
||||||
return fvalue;
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,548 +0,0 @@
|
|||||||
From c34a02199b1dfd362e81e78cb90fbd11e02eb93e Mon Sep 17 00:00:00 2001
|
|
||||||
From: liyancheng <412998149@qq.com>
|
|
||||||
Date: Mon, 14 Feb 2022 14:34:41 +0800
|
|
||||||
Subject: [PATCH 29/32] [AutoBOLT] Support saving feedback count info to ELF
|
|
||||||
segment 1/3
|
|
||||||
|
|
||||||
Add flag -fauto-bolt to save the feedback count info from PGO or
|
|
||||||
AutoFDO to segment .text.fdo. The bolt plugin will read and parse
|
|
||||||
it into the profile of llvm-bolt.
|
|
||||||
---
|
|
||||||
gcc/common.opt | 8 +
|
|
||||||
gcc/final.c | 400 +++++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
gcc/opts.c | 61 ++++++++
|
|
||||||
3 files changed, 469 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 9488bd90f..5eaa667b3 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -2403,6 +2403,14 @@ freorder-functions
|
|
||||||
Common Report Var(flag_reorder_functions) Optimization
|
|
||||||
Reorder functions to improve code placement.
|
|
||||||
|
|
||||||
+fauto-bolt
|
|
||||||
+Common Report Var(flag_auto_bolt)
|
|
||||||
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
|
|
||||||
+
|
|
||||||
+fauto-bolt=
|
|
||||||
+Common Joined RejectNegative
|
|
||||||
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
|
|
||||||
+
|
|
||||||
frerun-cse-after-loop
|
|
||||||
Common Report Var(flag_rerun_cse_after_loop) Optimization
|
|
||||||
Add a common subexpression elimination pass after loop optimizations.
|
|
||||||
diff --git a/gcc/final.c b/gcc/final.c
|
|
||||||
index a3601964a..b9affd3a7 100644
|
|
||||||
--- a/gcc/final.c
|
|
||||||
+++ b/gcc/final.c
|
|
||||||
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "rtl-iter.h"
|
|
||||||
#include "print-rtl.h"
|
|
||||||
#include "function-abi.h"
|
|
||||||
+#include "insn-codes.h"
|
|
||||||
|
|
||||||
#ifdef XCOFF_DEBUGGING_INFO
|
|
||||||
#include "xcoffout.h" /* Needed for external data declarations. */
|
|
||||||
@@ -4640,6 +4641,399 @@ leaf_renumber_regs_insn (rtx in_rtx)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+
|
|
||||||
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
|
|
||||||
+
|
|
||||||
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
|
|
||||||
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
|
|
||||||
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
|
|
||||||
+
|
|
||||||
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
|
|
||||||
+
|
|
||||||
+/* Return the relative offset address of the start instruction of BB,
|
|
||||||
+ return -1 if it is empty instruction. */
|
|
||||||
+
|
|
||||||
+static int
|
|
||||||
+get_bb_start_addr (basic_block bb)
|
|
||||||
+{
|
|
||||||
+ rtx_insn *insn;
|
|
||||||
+ FOR_BB_INSNS (bb, insn)
|
|
||||||
+ {
|
|
||||||
+ if (!INSN_P (insn))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ int insn_code = recog_memoized (insn);
|
|
||||||
+
|
|
||||||
+ /* The instruction NOP in llvm-bolt belongs to the previous
|
|
||||||
+ BB, so it needs to be skipped. */
|
|
||||||
+ if (insn_code != CODE_FOR_nop)
|
|
||||||
+ {
|
|
||||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return -1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return the relative offset address of the end instruction of BB,
|
|
||||||
+ return -1 if it is empty or call instruction. */
|
|
||||||
+
|
|
||||||
+static int
|
|
||||||
+get_bb_end_addr (basic_block bb)
|
|
||||||
+{
|
|
||||||
+ rtx_insn *insn;
|
|
||||||
+ int num_succs = EDGE_COUNT (bb->succs);
|
|
||||||
+ FOR_BB_INSNS_REVERSE (bb, insn)
|
|
||||||
+ {
|
|
||||||
+ if (!INSN_P (insn))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ /* The jump target of call is not in this function, so
|
|
||||||
+ it should be excluded. */
|
|
||||||
+ if (CALL_P (insn))
|
|
||||||
+ {
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+ if ((num_succs == 1)
|
|
||||||
+ || ((num_succs == 2) && any_condjump_p (insn)))
|
|
||||||
+ {
|
|
||||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return -1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return the end address of cfun. */
|
|
||||||
+
|
|
||||||
+static int
|
|
||||||
+get_function_end_addr ()
|
|
||||||
+{
|
|
||||||
+ rtx_insn *insn = get_last_insn ();
|
|
||||||
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
|
|
||||||
+ {
|
|
||||||
+ if (!INSN_P (insn))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return -1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return the function profile status string. */
|
|
||||||
+
|
|
||||||
+static const char *
|
|
||||||
+get_function_profile_status ()
|
|
||||||
+{
|
|
||||||
+ const char *profile_status[] = {
|
|
||||||
+ "PROFILE_ABSENT",
|
|
||||||
+ "PROFILE_GUESSED",
|
|
||||||
+ "PROFILE_READ",
|
|
||||||
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return profile_status[profile_status_for_fn (cfun)];
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return the count from the feedback data, such as PGO or AFDO. */
|
|
||||||
+
|
|
||||||
+inline static gcov_type
|
|
||||||
+get_fdo_count (profile_count count)
|
|
||||||
+{
|
|
||||||
+ return count.quality () >= GUESSED
|
|
||||||
+ ? count.to_gcov_type () : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return the profile quality string. */
|
|
||||||
+
|
|
||||||
+static const char *
|
|
||||||
+get_fdo_count_quality (profile_count count)
|
|
||||||
+{
|
|
||||||
+ const char *profile_quality[] = {
|
|
||||||
+ "UNINITIALIZED_PROFILE",
|
|
||||||
+ "GUESSED_LOCAL",
|
|
||||||
+ "GUESSED_GLOBAL0",
|
|
||||||
+ "GUESSED_GLOBAL0_ADJUSTED",
|
|
||||||
+ "GUESSED",
|
|
||||||
+ "AFDO",
|
|
||||||
+ "ADJUSTED",
|
|
||||||
+ "PRECISE"
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return profile_quality[count.quality ()];
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static const char *
|
|
||||||
+alias_local_functions (const char *fnname)
|
|
||||||
+{
|
|
||||||
+ if (TREE_PUBLIC (cfun->decl))
|
|
||||||
+ {
|
|
||||||
+ return fnname;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Return function bind type string. */
|
|
||||||
+
|
|
||||||
+static const char *
|
|
||||||
+simple_get_function_bind ()
|
|
||||||
+{
|
|
||||||
+ const char *function_bind[] = {
|
|
||||||
+ "GLOBAL",
|
|
||||||
+ "WEAK",
|
|
||||||
+ "LOCAL",
|
|
||||||
+ "UNKNOWN"
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ if (TREE_PUBLIC (cfun->decl))
|
|
||||||
+ {
|
|
||||||
+ if (!(DECL_WEAK (cfun->decl)))
|
|
||||||
+ {
|
|
||||||
+ return function_bind[0];
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ return function_bind[1];
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ return function_bind[2];
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return function_bind[3];
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Dump the callee functions insn in bb by CALL_P (insn). */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
|
||||||
+{
|
|
||||||
+ rtx_insn *insn;
|
|
||||||
+ FOR_BB_INSNS (bb, insn)
|
|
||||||
+ {
|
|
||||||
+ if (insn && CALL_P (insn))
|
|
||||||
+ {
|
|
||||||
+ tree callee = get_call_fndecl (insn);
|
|
||||||
+
|
|
||||||
+ if (callee)
|
|
||||||
+ {
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
|
|
||||||
+ INSN_ADDRESSES (INSN_UID (insn)));
|
|
||||||
+
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
|
||||||
+ ASM_FDO_CALLEE_FLAG,
|
|
||||||
+ alias_local_functions (get_fnname_from_decl (callee)));
|
|
||||||
+
|
|
||||||
+ fprintf (asm_out_file,
|
|
||||||
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
|
||||||
+ call_count);
|
|
||||||
+
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "call: %x --> %s\n",
|
|
||||||
+ INSN_ADDRESSES (INSN_UID (insn)),
|
|
||||||
+ alias_local_functions
|
|
||||||
+ (get_fnname_from_decl (callee)));
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Dump the edge info into asm. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
|
|
||||||
+{
|
|
||||||
+ edge e;
|
|
||||||
+ edge_iterator ei;
|
|
||||||
+ gcov_type edge_total_count = 0;
|
|
||||||
+
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ {
|
|
||||||
+ gcov_type edge_count = get_fdo_count (e->count ());
|
|
||||||
+ edge_total_count += edge_count;
|
|
||||||
+
|
|
||||||
+ int edge_start_addr = get_bb_end_addr (e->src);
|
|
||||||
+ int edge_end_addr = get_bb_start_addr (e->dest);
|
|
||||||
+
|
|
||||||
+ if (edge_start_addr == -1 || edge_end_addr == -1)
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* This is a reserved assert for the original design. If this
|
|
||||||
+ assert is found, use the address of the previous instruction
|
|
||||||
+ as edge_start_addr. */
|
|
||||||
+ gcc_assert (edge_start_addr != edge_end_addr);
|
|
||||||
+
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
|
|
||||||
+ edge_start_addr, edge_end_addr, edge_count);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (edge_count > 0)
|
|
||||||
+ {
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
|
||||||
+ edge_count);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ gcov_type call_count = MAX (edge_total_count, bb_count);
|
|
||||||
+ if (call_count > 0)
|
|
||||||
+ {
|
|
||||||
+ dump_direct_callee_info_to_asm (bb, call_count);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Dump the bb info into asm. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
|
|
||||||
+{
|
|
||||||
+ int bb_start_addr = get_bb_start_addr (bb);
|
|
||||||
+ if (bb_start_addr != -1)
|
|
||||||
+ {
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
|
||||||
+ bb_count);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Dump the function info into asm. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dump_function_info_to_asm (const char *fnname)
|
|
||||||
+{
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
|
||||||
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
|
|
||||||
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
|
|
||||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
|
||||||
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
|
|
||||||
+
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
|
|
||||||
+ alias_local_functions (fnname));
|
|
||||||
+ fprintf (dump_file, " file: %s\n",
|
|
||||||
+ dump_base_name);
|
|
||||||
+ fprintf (dump_file, " profile_status: %s\n",
|
|
||||||
+ get_function_profile_status ());
|
|
||||||
+ fprintf (dump_file, " size: %x\n",
|
|
||||||
+ get_function_end_addr ());
|
|
||||||
+ fprintf (dump_file, " function_bind: %s\n",
|
|
||||||
+ simple_get_function_bind ());
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Dump function profile info form AutoFDO or PGO to asm. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dump_fdo_info_to_asm (const char *fnname)
|
|
||||||
+{
|
|
||||||
+ basic_block bb;
|
|
||||||
+
|
|
||||||
+ dump_function_info_to_asm (fnname);
|
|
||||||
+
|
|
||||||
+ FOR_EACH_BB_FN (bb, cfun)
|
|
||||||
+ {
|
|
||||||
+ gcov_type bb_count = get_fdo_count (bb->count);
|
|
||||||
+ if (bb_count == 0)
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
|
|
||||||
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
|
|
||||||
+ bb_count, get_fdo_count_quality (bb->count));
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (flag_profile_use)
|
|
||||||
+ {
|
|
||||||
+ dump_edge_jump_info_to_asm (bb, bb_count);
|
|
||||||
+ }
|
|
||||||
+ else if (flag_auto_profile)
|
|
||||||
+ {
|
|
||||||
+ dump_bb_info_to_asm (bb, bb_count);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* When -fauto-bolt option is turned on, the .text.fdo. section
|
|
||||||
+ will be generated in the *.s file if there is feedback information
|
|
||||||
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dump_profile_to_elf_sections ()
|
|
||||||
+{
|
|
||||||
+ if (!flag_function_sections)
|
|
||||||
+ {
|
|
||||||
+ error ("-fauto-bolt should work with -ffunction-sections");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (!flag_ipa_ra)
|
|
||||||
+ {
|
|
||||||
+ error ("-fauto-bolt should work with -fipa-ra");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (flag_align_jumps)
|
|
||||||
+ {
|
|
||||||
+ error ("-fauto-bolt is not supported with -falign-jumps");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (flag_align_labels)
|
|
||||||
+ {
|
|
||||||
+ error ("-fauto-bolt is not supported with -falign-labels");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (flag_align_loops)
|
|
||||||
+ {
|
|
||||||
+ error ("-fauto-bolt is not supported with -falign-loops");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Return if no feedback data. */
|
|
||||||
+ if (!flag_profile_use && !flag_auto_profile)
|
|
||||||
+ {
|
|
||||||
+ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Avoid empty functions. */
|
|
||||||
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
|
|
||||||
+ {
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
|
|
||||||
+ const char *fnname = get_fnname_from_decl (current_function_decl);
|
|
||||||
+ char *profile_fnname = NULL;
|
|
||||||
+
|
|
||||||
+ asprintf (&profile_fnname,"%s%s", ASM_FDO_SECTION_PREFIX, fnname);
|
|
||||||
+ switch_to_section (get_section (profile_fnname, flags , NULL));
|
|
||||||
+ dump_fdo_info_to_asm (fnname);
|
|
||||||
+
|
|
||||||
+ if (profile_fnname)
|
|
||||||
+ {
|
|
||||||
+ free (profile_fnname);
|
|
||||||
+ profile_fnname = NULL;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Turn the RTL into assembly. */
|
|
||||||
static unsigned int
|
|
||||||
rest_of_handle_final (void)
|
|
||||||
@@ -4707,6 +5101,12 @@ rest_of_handle_final (void)
|
|
||||||
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
|
|
||||||
decl_fini_priority_lookup
|
|
||||||
(current_function_decl));
|
|
||||||
+
|
|
||||||
+ if (flag_auto_bolt)
|
|
||||||
+ {
|
|
||||||
+ dump_profile_to_elf_sections ();
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index f49f5ee58..0b389ae1d 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -1166,6 +1166,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
|
||||||
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
|
|
||||||
sorry ("vtable verification is not supported with LTO");
|
|
||||||
|
|
||||||
+ /* Currently -fauto-bolt is not supported for LTO. */
|
|
||||||
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
|
|
||||||
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
|
|
||||||
+
|
|
||||||
/* Control IPA optimizations based on different -flive-patching level. */
|
|
||||||
if (opts->x_flag_live_patching)
|
|
||||||
control_options_for_live_patching (opts, opts_set,
|
|
||||||
@@ -1183,6 +1187,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
|
||||||
= (opts->x_flag_unroll_loops
|
|
||||||
|| opts->x_flag_peel_loops
|
|
||||||
|| opts->x_optimize >= 3);
|
|
||||||
+
|
|
||||||
+ if (opts->x_flag_auto_bolt)
|
|
||||||
+ {
|
|
||||||
+ /* Record the function section to facilitate the feedback
|
|
||||||
+ data storage. */
|
|
||||||
+ if (!opts->x_flag_function_sections)
|
|
||||||
+ {
|
|
||||||
+ inform (loc,
|
|
||||||
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
|
|
||||||
+ " enabling %<-ffunction-sections%>");
|
|
||||||
+ opts->x_flag_function_sections = true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Cancel the internal alignment of the function. The binary
|
|
||||||
+ optimizer bolt will cancel the internal alignment optimization
|
|
||||||
+ of the function, so the alignment is meaningless at this time,
|
|
||||||
+ and if not, it will bring trouble to the calculation of the
|
|
||||||
+ offset address of the instruction. */
|
|
||||||
+ if (opts->x_flag_align_jumps)
|
|
||||||
+ {
|
|
||||||
+ inform (loc,
|
|
||||||
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
|
|
||||||
+ " disabling %<-falign-jumps%>");
|
|
||||||
+ opts->x_flag_align_jumps = false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (opts->x_flag_align_labels)
|
|
||||||
+ {
|
|
||||||
+ inform (loc,
|
|
||||||
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
|
|
||||||
+ " disabling %<-falign-labels%>");
|
|
||||||
+ opts->x_flag_align_labels = false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (opts->x_flag_align_loops)
|
|
||||||
+ {
|
|
||||||
+ inform (loc,
|
|
||||||
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
|
|
||||||
+ " disabling %<-falign-loops%>");
|
|
||||||
+ opts->x_flag_align_loops = false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* When parsing instructions in RTL phase, we need to know
|
|
||||||
+ the call information of instructions to avoid being optimized. */
|
|
||||||
+ if (!opts->x_flag_ipa_ra)
|
|
||||||
+ {
|
|
||||||
+ inform (loc,
|
|
||||||
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
|
|
||||||
+ " enabling %<-fipa-ra%>");
|
|
||||||
+ opts->x_flag_ipa_ra = true;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LEFT_COLUMN 27
|
|
||||||
@@ -2881,6 +2937,11 @@ common_handle_option (struct gcc_options *opts,
|
|
||||||
check_alignment_argument (loc, arg, "functions");
|
|
||||||
break;
|
|
||||||
|
|
||||||
+ case OPT_fauto_bolt_:
|
|
||||||
+ case OPT_fauto_bolt:
|
|
||||||
+ /* Deferred. */
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
default:
|
|
||||||
/* If the flag was handled in a standard way, assume the lack of
|
|
||||||
processing here is intentional. */
|
|
||||||
--
|
|
||||||
2.27.0
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,82 +0,0 @@
|
|||||||
From 071d19832d788422034a3b052ff7ce91e1010344 Mon Sep 17 00:00:00 2001
|
|
||||||
From: dingguangya <dingguangya1@huawei.com>
|
|
||||||
Date: Mon, 28 Feb 2022 16:52:58 +0800
|
|
||||||
Subject: [PATCH 32/32] [Autoprefetch] Prune invaild loops containing edges whose
|
|
||||||
probability exceeds 1
|
|
||||||
|
|
||||||
Skip auto prefetch analysis if the loop contains the bb in which the sum
|
|
||||||
of its outgoing edge probabilities is greater than 1.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp | 2 +-
|
|
||||||
.../gcc.dg/autoprefetch/branch-weighted-prefetch.c | 8 ++++----
|
|
||||||
gcc/tree-ssa-loop-prefetch.c | 12 ++++++++++++
|
|
||||||
3 files changed, 17 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
|
||||||
index a7408e338..7cae630a2 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
|
||||||
@@ -20,7 +20,7 @@ load_lib target-supports.exp
|
|
||||||
# Initialize `dg'.
|
|
||||||
dg-init
|
|
||||||
|
|
||||||
-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
|
||||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
|
||||||
"" "-fprefetch-loop-arrays"
|
|
||||||
|
|
||||||
# All done.
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
|
||||||
index c63c5e5cb..ab537cb29 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
-/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 --param l1-cache-size=64 --param l1-cache-line-size=32 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
|
|
||||||
#define N 10000000
|
|
||||||
|
|
||||||
long long a[N];
|
|
||||||
@@ -18,5 +18,5 @@ long long func ()
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
-/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
|
|
||||||
-/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
+/* { dg-final { scan-tree-dump "Calculating prefetch distance using bb branch weighting method" "aprefetch" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump "builtin_prefetch" "optimized" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
index 673f453a4..0d992d8f6 100644
|
|
||||||
--- a/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
+++ b/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
@@ -2267,6 +2267,15 @@ traverse_prune_bb_branch (hash_map <basic_block, bb_bp> &bb_branch_prob,
|
|
||||||
&& bb_bp_node->false_edge_bb == NULL))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
+ /* Do not process the loop with a bb branch probability of an abnormal
|
|
||||||
+ value. */
|
|
||||||
+ if (bb_bp_node->true_edge_prob + bb_bp_node->false_edge_prob > 1)
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "bb branch probability is abnormal\n");
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (current_bb == latch_bb)
|
|
||||||
{
|
|
||||||
max_path--;
|
|
||||||
@@ -2409,6 +2418,9 @@ estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
|
|
||||||
dump_loop_bb (loop);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "Calculating prefetch distance using bb branch "
|
|
||||||
+ "weighting method\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
||||||
--
|
|
||||||
2.27.0
|
|
||||||
|
|
||||||
@ -1,130 +0,0 @@
|
|||||||
From adfcca263996bf174f7108b477e81e7ec58f19c4 Mon Sep 17 00:00:00 2001
|
|
||||||
From: dingguangya <dingguangya1@huawei.com>
|
|
||||||
Date: Mon, 14 Mar 2022 10:42:07 +0800
|
|
||||||
Subject: [PATCH] [AutoFdo] Fix memory leaks in autofdo and autoprefetch
|
|
||||||
|
|
||||||
Fix memory leaks in autofdo and autoprefetch.
|
|
||||||
---
|
|
||||||
gcc/final.c | 23 +++++++++++++++--------
|
|
||||||
gcc/tree-ssa-loop-prefetch.c | 4 ++++
|
|
||||||
2 files changed, 19 insertions(+), 8 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/final.c b/gcc/final.c
|
|
||||||
index b9affd3a7..da8d20958 100644
|
|
||||||
--- a/gcc/final.c
|
|
||||||
+++ b/gcc/final.c
|
|
||||||
@@ -4770,12 +4770,16 @@ get_fdo_count_quality (profile_count count)
|
|
||||||
return profile_quality[count.quality ()];
|
|
||||||
}
|
|
||||||
|
|
||||||
-static const char *
|
|
||||||
+/* If the function is not public, return the function_name/file_name for
|
|
||||||
+ disambiguation of local symbols since there could be identical function
|
|
||||||
+ names coming from identical file names. The caller needs to free memory. */
|
|
||||||
+
|
|
||||||
+static char *
|
|
||||||
alias_local_functions (const char *fnname)
|
|
||||||
{
|
|
||||||
if (TREE_PUBLIC (cfun->decl))
|
|
||||||
{
|
|
||||||
- return fnname;
|
|
||||||
+ return concat (fnname, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return concat (fnname, "/", lbasename (dump_base_name), NULL);
|
|
||||||
@@ -4826,12 +4830,14 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
|
||||||
|
|
||||||
if (callee)
|
|
||||||
{
|
|
||||||
+ char *func_name =
|
|
||||||
+ alias_local_functions (get_fnname_from_decl (callee));
|
|
||||||
fprintf (asm_out_file, "\t.string \"%x\"\n",
|
|
||||||
INSN_ADDRESSES (INSN_UID (insn)));
|
|
||||||
|
|
||||||
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
|
||||||
ASM_FDO_CALLEE_FLAG,
|
|
||||||
- alias_local_functions (get_fnname_from_decl (callee)));
|
|
||||||
+ func_name);
|
|
||||||
|
|
||||||
fprintf (asm_out_file,
|
|
||||||
"\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
|
||||||
@@ -4841,9 +4847,9 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
|
||||||
{
|
|
||||||
fprintf (dump_file, "call: %x --> %s\n",
|
|
||||||
INSN_ADDRESSES (INSN_UID (insn)),
|
|
||||||
- alias_local_functions
|
|
||||||
- (get_fnname_from_decl (callee)));
|
|
||||||
+ func_name);
|
|
||||||
}
|
|
||||||
+ free (func_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -4917,8 +4923,9 @@ dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
|
|
||||||
static void
|
|
||||||
dump_function_info_to_asm (const char *fnname)
|
|
||||||
{
|
|
||||||
+ char *func_name = alias_local_functions (fnname);
|
|
||||||
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
|
||||||
- ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
|
|
||||||
+ ASM_FDO_CALLER_FLAG, func_name);
|
|
||||||
fprintf (asm_out_file, "\t.string \"%s%d\"\n",
|
|
||||||
ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
|
|
||||||
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
|
||||||
@@ -4926,8 +4933,7 @@ dump_function_info_to_asm (const char *fnname)
|
|
||||||
|
|
||||||
if (dump_file)
|
|
||||||
{
|
|
||||||
- fprintf (dump_file, "\n FUNC_NAME: %s\n",
|
|
||||||
- alias_local_functions (fnname));
|
|
||||||
+ fprintf (dump_file, "\n FUNC_NAME: %s\n", func_name);
|
|
||||||
fprintf (dump_file, " file: %s\n",
|
|
||||||
dump_base_name);
|
|
||||||
fprintf (dump_file, " profile_status: %s\n",
|
|
||||||
@@ -4937,6 +4943,7 @@ dump_function_info_to_asm (const char *fnname)
|
|
||||||
fprintf (dump_file, " function_bind: %s\n",
|
|
||||||
simple_get_function_bind ());
|
|
||||||
}
|
|
||||||
+ free (func_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Dump function profile info form AutoFDO or PGO to asm. */
|
|
||||||
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
index 0d992d8f6..781831c39 100644
|
|
||||||
--- a/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
+++ b/gcc/tree-ssa-loop-prefetch.c
|
|
||||||
@@ -2248,6 +2248,7 @@ get_bb_branch_prob (hash_map <basic_block, bb_bp> &bb_branch_prob,
|
|
||||||
branch_prob.true_edge_prob = get_edge_prob (e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+ free (body);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Traverse each bb in the loop and prune fake loops. */
|
|
||||||
@@ -2416,6 +2417,7 @@ estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
|
|
||||||
if (get_bb_prob (bb_branch_prob, loop) == false)
|
|
||||||
{
|
|
||||||
dump_loop_bb (loop);
|
|
||||||
+ free (body);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
@@ -2596,6 +2598,7 @@ is_high_exec_rate_loop (struct loop *loop)
|
|
||||||
|
|
||||||
if (loop_exec_rate < (float) LOOP_EXECUTION_RATE / 100.0)
|
|
||||||
{
|
|
||||||
+ exit_edges.release ();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -2606,6 +2609,7 @@ is_high_exec_rate_loop (struct loop *loop)
|
|
||||||
loop_exec_rate, (float) LOOP_EXECUTION_RATE / 100.0);
|
|
||||||
dump_loop_bb (loop);
|
|
||||||
}
|
|
||||||
+ exit_edges.release ();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,72 +0,0 @@
|
|||||||
From 2969f5190561e26a8ce42d5dcda43ef59e0b6d32 Mon Sep 17 00:00:00 2001
|
|
||||||
From: liyancheng <412998149@qq.com>
|
|
||||||
Date: Tue, 26 Apr 2022 19:59:09 +0800
|
|
||||||
Subject: [PATCH] [Backport] sanitizer: Fix asan against glibc 2.34 [PR100114]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d9f462fb372fb02da032cefd6b091d7582c425ae
|
|
||||||
|
|
||||||
sanitizer: Fix asan against glibc 2.34 [PR100114]
|
|
||||||
|
|
||||||
As mentioned in the PR, SIGSTKSZ is no longer a compile time constant in
|
|
||||||
glibc 2.34 and later, so
|
|
||||||
static const uptr kAltStackSize = SIGSTKSZ * 4;
|
|
||||||
needs dynamic initialization, but is used by a function called indirectly
|
|
||||||
from .preinit_array and therefore before the variable is constructed.
|
|
||||||
This results in using 0 size instead and all asan instrumented programs
|
|
||||||
die with:
|
|
||||||
==91==ERROR: AddressSanitizer failed to allocate 0x0 (0) bytes of SetAlternateSignalStack (error code: 22)
|
|
||||||
|
|
||||||
Here is a cherry-pick from upstream to fix this.
|
|
||||||
|
|
||||||
2021-04-17 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
PR sanitizer/100114
|
|
||||||
* sanitizer_common/sanitizer_posix_libcdep.cpp: Cherry-pick
|
|
||||||
llvm-project revisions 82150606fb11d28813ae6da1101f5bda638165fe
|
|
||||||
and b93629dd335ffee2fc4b9b619bf86c3f9e6b0023.
|
|
||||||
---
|
|
||||||
.../sanitizer_common/sanitizer_posix_libcdep.cpp | 13 ++++++++-----
|
|
||||||
1 file changed, 8 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp
|
|
||||||
index 304b3a01a..ac88fbe07 100644
|
|
||||||
--- a/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp
|
|
||||||
+++ b/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp
|
|
||||||
@@ -169,7 +169,11 @@ bool SupportsColoredOutput(fd_t fd) {
|
|
||||||
|
|
||||||
#if !SANITIZER_GO
|
|
||||||
// TODO(glider): different tools may require different altstack size.
|
|
||||||
-static const uptr kAltStackSize = SIGSTKSZ * 4; // SIGSTKSZ is not enough.
|
|
||||||
+static uptr GetAltStackSize() {
|
|
||||||
+ // SIGSTKSZ is not enough.
|
|
||||||
+ static const uptr kAltStackSize = SIGSTKSZ * 4;
|
|
||||||
+ return kAltStackSize;
|
|
||||||
+}
|
|
||||||
|
|
||||||
void SetAlternateSignalStack() {
|
|
||||||
stack_t altstack, oldstack;
|
|
||||||
@@ -180,10 +184,9 @@ void SetAlternateSignalStack() {
|
|
||||||
// TODO(glider): the mapped stack should have the MAP_STACK flag in the
|
|
||||||
// future. It is not required by man 2 sigaltstack now (they're using
|
|
||||||
// malloc()).
|
|
||||||
- void* base = MmapOrDie(kAltStackSize, __func__);
|
|
||||||
- altstack.ss_sp = (char*) base;
|
|
||||||
+ altstack.ss_size = GetAltStackSize();
|
|
||||||
+ altstack.ss_sp = (char *)MmapOrDie(altstack.ss_size, __func__);
|
|
||||||
altstack.ss_flags = 0;
|
|
||||||
- altstack.ss_size = kAltStackSize;
|
|
||||||
CHECK_EQ(0, sigaltstack(&altstack, nullptr));
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -191,7 +194,7 @@ void UnsetAlternateSignalStack() {
|
|
||||||
stack_t altstack, oldstack;
|
|
||||||
altstack.ss_sp = nullptr;
|
|
||||||
altstack.ss_flags = SS_DISABLE;
|
|
||||||
- altstack.ss_size = kAltStackSize; // Some sane value required on Darwin.
|
|
||||||
+ altstack.ss_size = GetAltStackSize(); // Some sane value required on Darwin.
|
|
||||||
CHECK_EQ(0, sigaltstack(&altstack, &oldstack));
|
|
||||||
UnmapOrDie(oldstack.ss_sp, oldstack.ss_size);
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.25.1
|
|
||||||
|
|
||||||
@ -1,342 +0,0 @@
|
|||||||
From cf0f086ec274d794a2a180047123920bf8a5224b Mon Sep 17 00:00:00 2001
|
|
||||||
From: dingguangya <dingguangya1@huawei.com>
|
|
||||||
Date: Mon, 17 Jan 2022 21:03:47 +0800
|
|
||||||
Subject: [PATCH 01/12] [ccmp] Add another optimization opportunity for ccmp
|
|
||||||
instruction
|
|
||||||
|
|
||||||
Add flag -fccmp2.
|
|
||||||
Enables the use of the ccmp instruction by creating a new conflict
|
|
||||||
relationship for instances where temporary expressions replacement
|
|
||||||
cannot be effectively created.
|
|
||||||
---
|
|
||||||
gcc/ccmp.c | 33 ++++
|
|
||||||
gcc/ccmp.h | 1 +
|
|
||||||
gcc/common.opt | 4 +
|
|
||||||
gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++
|
|
||||||
gcc/tree-ssa-coalesce.c | 197 ++++++++++++++++++++++
|
|
||||||
5 files changed, 250 insertions(+)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
|
|
||||||
|
|
||||||
diff --git a/gcc/ccmp.c b/gcc/ccmp.c
|
|
||||||
index ca77375a9..8d2d73e52 100644
|
|
||||||
--- a/gcc/ccmp.c
|
|
||||||
+++ b/gcc/ccmp.c
|
|
||||||
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "cfgexpand.h"
|
|
||||||
#include "ccmp.h"
|
|
||||||
#include "predict.h"
|
|
||||||
+#include "gimple-iterator.h"
|
|
||||||
|
|
||||||
/* Check whether T is a simple boolean variable or a SSA name
|
|
||||||
set by a comparison operator in the same basic block. */
|
|
||||||
@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Check whether bb is a potential conditional compare candidate. */
|
|
||||||
+bool
|
|
||||||
+check_ccmp_candidate (basic_block bb)
|
|
||||||
+{
|
|
||||||
+ gimple_stmt_iterator gsi;
|
|
||||||
+ gimple *bb_last_stmt, *stmt;
|
|
||||||
+ tree op0, op1;
|
|
||||||
+
|
|
||||||
+ gsi = gsi_last_bb (bb);
|
|
||||||
+ bb_last_stmt = gsi_stmt (gsi);
|
|
||||||
+
|
|
||||||
+ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
|
|
||||||
+ {
|
|
||||||
+ op0 = gimple_cond_lhs (bb_last_stmt);
|
|
||||||
+ op1 = gimple_cond_rhs (bb_last_stmt);
|
|
||||||
+
|
|
||||||
+ if (TREE_CODE (op0) == SSA_NAME
|
|
||||||
+ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
|
|
||||||
+ && TREE_CODE (op1) == INTEGER_CST
|
|
||||||
+ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
|
|
||||||
+ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
|
|
||||||
+ {
|
|
||||||
+ stmt = SSA_NAME_DEF_STMT (op0);
|
|
||||||
+ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
|
|
||||||
+ {
|
|
||||||
+ return ccmp_candidate_p (stmt);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Extract the comparison we want to do from the tree. */
|
|
||||||
void
|
|
||||||
get_compare_parts (tree t, int *up, rtx_code *rcode,
|
|
||||||
diff --git a/gcc/ccmp.h b/gcc/ccmp.h
|
|
||||||
index 199dd581d..ac862f0f6 100644
|
|
||||||
--- a/gcc/ccmp.h
|
|
||||||
+++ b/gcc/ccmp.h
|
|
||||||
@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#define GCC_CCMP_H
|
|
||||||
|
|
||||||
extern rtx expand_ccmp_expr (gimple *, machine_mode);
|
|
||||||
+extern bool check_ccmp_candidate (basic_block bb);
|
|
||||||
|
|
||||||
#endif /* GCC_CCMP_H */
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 24834cf60..4dd566def 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -1942,6 +1942,10 @@ fira-verbose=
|
|
||||||
Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
|
|
||||||
-fira-verbose=<number> Control IRA's level of diagnostic messages.
|
|
||||||
|
|
||||||
+fccmp2
|
|
||||||
+Common Report Var(flag_ccmp2) Init(0) Optimization
|
|
||||||
+Optimize potential ccmp instruction in complex scenarios.
|
|
||||||
+
|
|
||||||
fivopts
|
|
||||||
Common Report Var(flag_ivopts) Init(1) Optimization
|
|
||||||
Optimize induction variables on trees.
|
|
||||||
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..b509ba810
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
|
|
||||||
@@ -0,0 +1,15 @@
|
|
||||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
|
|
||||||
+
|
|
||||||
+int func (int a, int b, int c)
|
|
||||||
+{
|
|
||||||
+ while(1)
|
|
||||||
+ {
|
|
||||||
+ if(a-- == 0 || b >= c)
|
|
||||||
+ {
|
|
||||||
+ return 1;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
|
|
||||||
diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c
|
|
||||||
index 0b0b1b18d..e0120a4a4 100644
|
|
||||||
--- a/gcc/tree-ssa-coalesce.c
|
|
||||||
+++ b/gcc/tree-ssa-coalesce.c
|
|
||||||
@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "explow.h"
|
|
||||||
#include "tree-dfa.h"
|
|
||||||
#include "stor-layout.h"
|
|
||||||
+#include "ccmp.h"
|
|
||||||
+#include "target.h"
|
|
||||||
+#include "tree-outof-ssa.h"
|
|
||||||
|
|
||||||
/* This set of routines implements a coalesce_list. This is an object which
|
|
||||||
is used to track pairs of ssa_names which are desirable to coalesce
|
|
||||||
@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
|
|
||||||
bitmap_clear (&ptr->live_base_var);
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Return true if gimple is a copy assignment. */
|
|
||||||
+
|
|
||||||
+static inline bool
|
|
||||||
+gimple_is_assign_copy_p (gimple *gs)
|
|
||||||
+{
|
|
||||||
+ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
|
|
||||||
+ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
|
|
||||||
+ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+#define MAX_CCMP_CONFLICT_NUM 5
|
|
||||||
+
|
|
||||||
+/* Clear high-cost conflict graphs. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
|
|
||||||
+{
|
|
||||||
+ unsigned x = 0;
|
|
||||||
+ int add_conflict_num = 0;
|
|
||||||
+ bitmap b;
|
|
||||||
+ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
|
|
||||||
+ {
|
|
||||||
+ if (b)
|
|
||||||
+ {
|
|
||||||
+ add_conflict_num++;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
|
|
||||||
+ {
|
|
||||||
+ conflict_graph->conflicts.release ();
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Adding a new conflict graph to the original graph. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+process_add_graph (live_track *live, basic_block bb,
|
|
||||||
+ ssa_conflicts *conflict_graph)
|
|
||||||
+{
|
|
||||||
+ tree use, def;
|
|
||||||
+ ssa_op_iter iter;
|
|
||||||
+ gimple *first_visit_stmt = NULL;
|
|
||||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
|
||||||
+ gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ if (gimple_visited_p (gsi_stmt (gsi)))
|
|
||||||
+ {
|
|
||||||
+ first_visit_stmt = gsi_stmt (gsi);
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ if (!first_visit_stmt)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
|
||||||
+ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
|
|
||||||
+ {
|
|
||||||
+ gimple *stmt = gsi_stmt (gsi);
|
|
||||||
+ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ if (gimple_is_assign_copy_p (stmt))
|
|
||||||
+ {
|
|
||||||
+ live_track_clear_var (live, gimple_assign_rhs1 (stmt));
|
|
||||||
+ }
|
|
||||||
+ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
|
|
||||||
+ {
|
|
||||||
+ live_track_process_def (live, def, conflict_graph);
|
|
||||||
+ }
|
|
||||||
+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
|
|
||||||
+ {
|
|
||||||
+ live_track_process_use (live, use);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Build a conflict graph based on ccmp candidate. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
|
|
||||||
+ tree_live_info_p liveinfo, var_map map, basic_block bb)
|
|
||||||
+{
|
|
||||||
+ live_track *live;
|
|
||||||
+ tree use, def;
|
|
||||||
+ ssa_op_iter iter;
|
|
||||||
+ live = new_live_track (map);
|
|
||||||
+ live_track_init (live, live_on_exit (liveinfo, bb));
|
|
||||||
+
|
|
||||||
+ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
|
|
||||||
+ gcc_assert (gimple_cond_lhs (last_stmt));
|
|
||||||
+
|
|
||||||
+ auto_vec<tree> stack;
|
|
||||||
+ stack.safe_push (gimple_cond_lhs (last_stmt));
|
|
||||||
+ while (!stack.is_empty ())
|
|
||||||
+ {
|
|
||||||
+ tree op = stack.pop ();
|
|
||||||
+ gimple *op_stmt = SSA_NAME_DEF_STMT (op);
|
|
||||||
+ if (!op_stmt || gimple_bb (op_stmt) != bb
|
|
||||||
+ || !is_gimple_assign (op_stmt)
|
|
||||||
+ || !ssa_is_replaceable_p (op_stmt))
|
|
||||||
+ {
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ if (gimple_is_assign_copy_p (op_stmt))
|
|
||||||
+ {
|
|
||||||
+ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
|
|
||||||
+ }
|
|
||||||
+ gimple_set_visited (op_stmt, true);
|
|
||||||
+ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
|
|
||||||
+ {
|
|
||||||
+ live_track_process_def (live, def, conflict_graph);
|
|
||||||
+ }
|
|
||||||
+ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
|
|
||||||
+ {
|
|
||||||
+ stack.safe_push (use);
|
|
||||||
+ live_track_process_use (live, use);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ process_add_graph (live, bb, conflict_graph);
|
|
||||||
+ delete_live_track (live);
|
|
||||||
+ remove_high_cost_graph_for_ccmp (conflict_graph);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Determine whether the ccmp conflict graph can be added.
|
|
||||||
+ i.e,
|
|
||||||
+
|
|
||||||
+ ;; basic block 3, loop depth 1
|
|
||||||
+ ;; pred: 2
|
|
||||||
+ ;; 3
|
|
||||||
+ # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
|
|
||||||
+ _7 = b_4 (D) >= c_5 (D);
|
|
||||||
+ _8 = ivtmp.5_10 == 0;
|
|
||||||
+ _9 = _7 | _8;
|
|
||||||
+ ivtmp.5_11 = ivtmp.5_10 - 1;
|
|
||||||
+ if (_9 != 0)
|
|
||||||
+ goto <bb 4>; [10.70%]
|
|
||||||
+ else
|
|
||||||
+ goto <bb 3>; [89.30%]
|
|
||||||
+
|
|
||||||
+ In the above loop, the expression will be replaced:
|
|
||||||
+
|
|
||||||
+ _7 replaced by b_4 (D) >= c_5 (D)
|
|
||||||
+ _8 replaced by ivtmp.5_10 == 0
|
|
||||||
+
|
|
||||||
+ If the current case want use the ccmp instruction, then
|
|
||||||
+
|
|
||||||
+ _9 can replaced by _7 | _8
|
|
||||||
+
|
|
||||||
+ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
|
|
||||||
+ partitions.
|
|
||||||
+
|
|
||||||
+ Now this function can achieve this ability. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
|
|
||||||
+ var_map map, ssa_conflicts *graph)
|
|
||||||
+{
|
|
||||||
+ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
|
|
||||||
+ return;
|
|
||||||
+ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
|
|
||||||
+ gsi_next (&bsi))
|
|
||||||
+ {
|
|
||||||
+ gimple_set_visited (gsi_stmt (bsi), false);
|
|
||||||
+ }
|
|
||||||
+ ssa_conflicts *ccmp_conflict_graph;
|
|
||||||
+ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
|
|
||||||
+ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
|
|
||||||
+ unsigned x;
|
|
||||||
+ bitmap b;
|
|
||||||
+ if (ccmp_conflict_graph)
|
|
||||||
+ {
|
|
||||||
+ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
|
|
||||||
+ {
|
|
||||||
+ if (!b)
|
|
||||||
+ continue;
|
|
||||||
+ unsigned y = bitmap_first_set_bit (b);
|
|
||||||
+ if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
|
|
||||||
+ {
|
|
||||||
+ ssa_conflicts_add (graph, x, y);
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "potential ccmp: add additional "
|
|
||||||
+ "conflict-ssa : bb[%d] %d:%d\n",
|
|
||||||
+ bb->index, x, y);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ ssa_conflicts_delete (ccmp_conflict_graph);
|
|
||||||
+}
|
|
||||||
|
|
||||||
/* Build a conflict graph based on LIVEINFO. Any partitions which are in the
|
|
||||||
partition view of the var_map liveinfo is based on get entries in the
|
|
||||||
@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
|
|
||||||
live_track_process_use (live, var);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
|
|
||||||
+
|
|
||||||
/* If result of a PHI is unused, looping over the statements will not
|
|
||||||
record any conflicts since the def was never live. Since the PHI node
|
|
||||||
is going to be translated out of SSA form, it will insert a copy.
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,83 +0,0 @@
|
|||||||
From 897d637aec3b077eb9ef95b2f4a5f7656e36ebd6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
||||||
Date: Wed, 15 Jun 2022 11:33:03 +0800
|
|
||||||
Subject: [PATCH 03/12] [Backport] loop-invariant: Don't move cold bb
|
|
||||||
instructions to preheader in RTL
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc1969dab392661cdac1170bbb8c9f83f388580d
|
|
||||||
|
|
||||||
When inner loop is unlikely to execute, loop invariant motion would move
|
|
||||||
cold instrcutions to a hotter loop. This patch adds profile count checking
|
|
||||||
to fix the problem.
|
|
||||||
---
|
|
||||||
gcc/loop-invariant.c | 17 ++++++++++++++---
|
|
||||||
gcc/testsuite/gcc.dg/loop-invariant-2.c | 20 ++++++++++++++++++++
|
|
||||||
2 files changed, 34 insertions(+), 3 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/loop-invariant-2.c
|
|
||||||
|
|
||||||
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
|
|
||||||
index 37ae6549e..24b9bcb11 100644
|
|
||||||
--- a/gcc/loop-invariant.c
|
|
||||||
+++ b/gcc/loop-invariant.c
|
|
||||||
@@ -1184,9 +1184,21 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
|
|
||||||
call. */
|
|
||||||
|
|
||||||
static void
|
|
||||||
-find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
|
|
||||||
+find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
|
|
||||||
+ bool always_executed)
|
|
||||||
{
|
|
||||||
rtx_insn *insn;
|
|
||||||
+ basic_block preheader = loop_preheader_edge (loop)->src;
|
|
||||||
+
|
|
||||||
+ /* Don't move insn of cold BB out of loop to preheader to reduce calculations
|
|
||||||
+ and register live range in hot loop with cold BB. */
|
|
||||||
+ if (!always_executed && preheader->count > bb->count)
|
|
||||||
+ {
|
|
||||||
+ if (dump_file)
|
|
||||||
+ fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n",
|
|
||||||
+ bb->index, loop->num);
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
FOR_BB_INSNS (bb, insn)
|
|
||||||
{
|
|
||||||
@@ -1215,8 +1227,7 @@ find_invariants_body (class loop *loop, basic_block *body,
|
|
||||||
unsigned i;
|
|
||||||
|
|
||||||
for (i = 0; i < loop->num_nodes; i++)
|
|
||||||
- find_invariants_bb (body[i],
|
|
||||||
- bitmap_bit_p (always_reached, i),
|
|
||||||
+ find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
|
|
||||||
bitmap_bit_p (always_executed, i));
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/loop-invariant-2.c b/gcc/testsuite/gcc.dg/loop-invariant-2.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..df3d84585
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/loop-invariant-2.c
|
|
||||||
@@ -0,0 +1,20 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
|
|
||||||
+
|
|
||||||
+volatile int x;
|
|
||||||
+void
|
|
||||||
+bar (int, char *, char *);
|
|
||||||
+void
|
|
||||||
+foo (int *a, int n, int k)
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+
|
|
||||||
+ for (i = 0; i < n; i++)
|
|
||||||
+ {
|
|
||||||
+ if (__builtin_expect (x, 0))
|
|
||||||
+ bar (k / 5, "one", "two");
|
|
||||||
+ a[i] = k;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-rtl-dump "Don't move invariant from bb: .*out of loop" "loop2_invariant" } } */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,902 +0,0 @@
|
|||||||
From edd4200e2b3e94d5c124900657b91c22dfe9c557 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
|
||||||
Date: Wed, 15 Jun 2022 16:00:25 +0800
|
|
||||||
Subject: [PATCH 04/12] [DFE] Add Dead Field Elimination in Struct-Reorg.
|
|
||||||
|
|
||||||
We can transform gimple to eliminate fields that are never read
|
|
||||||
and remove their redundant stmts.
|
|
||||||
Also we adapted the partial escape_cast_another_ptr for struct relayout.
|
|
||||||
Add flag -fipa-struct-reorg=3 to enable dead field elimination.
|
|
||||||
---
|
|
||||||
gcc/common.opt | 4 +-
|
|
||||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 209 ++++++++++++++++--
|
|
||||||
gcc/ipa-struct-reorg/ipa-struct-reorg.h | 9 +-
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 86 +++++++
|
|
||||||
.../gcc.dg/struct/dfe_ele_minus_verify.c | 60 +++++
|
|
||||||
.../gcc.dg/struct/dfe_mem_ref_offset.c | 58 +++++
|
|
||||||
.../struct/dfe_mul_layer_ptr_record_bug.c | 30 +++
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 71 ++++++
|
|
||||||
.../gcc.dg/struct/dfe_ptr_negate_expr.c | 55 +++++
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 55 +++++
|
|
||||||
gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 21 +-
|
|
||||||
11 files changed, 639 insertions(+), 19 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index 7fc075d35..b5ea3c7a1 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -1884,8 +1884,8 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
|
|
||||||
Perform structure layout optimizations.
|
|
||||||
|
|
||||||
fipa-struct-reorg=
|
|
||||||
-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 2)
|
|
||||||
--fipa-struct-reorg=[0,1,2] adding none, struct-reorg, reorder-fields optimizations.
|
|
||||||
+Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3)
|
|
||||||
+-fipa-struct-reorg=[0,1,2,3] adding none, struct-reorg, reorder-fields, dfe optimizations.
|
|
||||||
|
|
||||||
fipa-extend-auto-profile
|
|
||||||
Common Report Var(flag_ipa_extend_auto_profile)
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 9214ee74a..2fa560239 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "tree-pretty-print.h"
|
|
||||||
#include "gimple-pretty-print.h"
|
|
||||||
#include "gimple-iterator.h"
|
|
||||||
+#include "gimple-walk.h"
|
|
||||||
#include "cfg.h"
|
|
||||||
#include "ssa.h"
|
|
||||||
#include "tree-dfa.h"
|
|
||||||
@@ -238,11 +239,44 @@ enum srmode
|
|
||||||
STRUCT_LAYOUT_OPTIMIZE
|
|
||||||
};
|
|
||||||
|
|
||||||
+/* Enum the struct layout optimize level,
|
|
||||||
+ which should be the same as the option -fstruct-reorg=. */
|
|
||||||
+
|
|
||||||
+enum struct_layout_opt_level
|
|
||||||
+{
|
|
||||||
+ NONE = 0,
|
|
||||||
+ STRUCT_REORG,
|
|
||||||
+ STRUCT_REORDER_FIELDS,
|
|
||||||
+ DEAD_FIELD_ELIMINATION
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
|
|
||||||
bool isptrptr (tree type);
|
|
||||||
|
|
||||||
srmode current_mode;
|
|
||||||
|
|
||||||
+hash_map<tree, tree> replace_type_map;
|
|
||||||
+
|
|
||||||
+/* Return true if one of these types is created by struct-reorg. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+is_replace_type (tree type1, tree type2)
|
|
||||||
+{
|
|
||||||
+ if (replace_type_map.is_empty ())
|
|
||||||
+ return false;
|
|
||||||
+ if (type1 == NULL_TREE || type2 == NULL_TREE)
|
|
||||||
+ return false;
|
|
||||||
+ tree *type_value = replace_type_map.get (type1);
|
|
||||||
+ if (type_value)
|
|
||||||
+ if (types_compatible_p (*type_value, type2))
|
|
||||||
+ return true;
|
|
||||||
+ type_value = replace_type_map.get (type2);
|
|
||||||
+ if (type_value)
|
|
||||||
+ if (types_compatible_p (*type_value, type1))
|
|
||||||
+ return true;
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
} // anon namespace
|
|
||||||
|
|
||||||
namespace struct_reorg {
|
|
||||||
@@ -318,12 +352,13 @@ srfunction::simple_dump (FILE *file)
|
|
||||||
/* Constructor of FIELD. */
|
|
||||||
|
|
||||||
srfield::srfield (tree field, srtype *base)
|
|
||||||
- : offset(int_byte_position (field)),
|
|
||||||
+ : offset (int_byte_position (field)),
|
|
||||||
fieldtype (TREE_TYPE (field)),
|
|
||||||
fielddecl (field),
|
|
||||||
- base(base),
|
|
||||||
- type(NULL),
|
|
||||||
- clusternum(0)
|
|
||||||
+ base (base),
|
|
||||||
+ type (NULL),
|
|
||||||
+ clusternum (0),
|
|
||||||
+ field_access (EMPTY_FIELD)
|
|
||||||
{
|
|
||||||
for(int i = 0;i < max_split; i++)
|
|
||||||
newfield[i] = NULL_TREE;
|
|
||||||
@@ -362,6 +397,25 @@ srtype::srtype (tree type)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Check it if all fields in the RECORD_TYPE are referenced. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+srtype::has_dead_field (void)
|
|
||||||
+{
|
|
||||||
+ bool may_dfe = false;
|
|
||||||
+ srfield *this_field;
|
|
||||||
+ unsigned i;
|
|
||||||
+ FOR_EACH_VEC_ELT (fields, i, this_field)
|
|
||||||
+ {
|
|
||||||
+ if (!(this_field->field_access & READ_FIELD))
|
|
||||||
+ {
|
|
||||||
+ may_dfe = true;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return may_dfe;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Mark the type as escaping type E at statement STMT. */
|
|
||||||
|
|
||||||
void
|
|
||||||
@@ -833,6 +887,10 @@ srtype::create_new_type (void)
|
|
||||||
for (unsigned i = 0; i < fields.length (); i++)
|
|
||||||
{
|
|
||||||
srfield *f = fields[i];
|
|
||||||
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
|
|
||||||
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
|
|
||||||
+ && !(f->field_access & READ_FIELD))
|
|
||||||
+ continue;
|
|
||||||
f->create_new_fields (newtype, newfields, newlast);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -854,6 +912,16 @@ srtype::create_new_type (void)
|
|
||||||
|
|
||||||
warn_padded = save_warn_padded;
|
|
||||||
|
|
||||||
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
|
|
||||||
+ && replace_type_map.get (this->newtype[0]) == NULL)
|
|
||||||
+ replace_type_map.put (this->newtype[0], this->type);
|
|
||||||
+ if (dump_file)
|
|
||||||
+ {
|
|
||||||
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
|
|
||||||
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
|
|
||||||
+ && has_dead_field ())
|
|
||||||
+ fprintf (dump_file, "Dead field elimination.\n");
|
|
||||||
+ }
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
{
|
|
||||||
fprintf (dump_file, "Created %d types:\n", maxclusters);
|
|
||||||
@@ -1128,12 +1196,12 @@ csrtype::init_type_info (void)
|
|
||||||
|
|
||||||
/* Close enough to pad to improve performance.
|
|
||||||
33~63 should pad to 64 but 33~48 (first half) are too far away, and
|
|
||||||
- 65~127 should pad to 128 but 65~96 (first half) are too far away. */
|
|
||||||
+ 65~127 should pad to 128 but 65~80 (first half) are too far away. */
|
|
||||||
if (old_size > 48 && old_size < 64)
|
|
||||||
{
|
|
||||||
new_size = 64;
|
|
||||||
}
|
|
||||||
- if (old_size > 96 && old_size < 128)
|
|
||||||
+ if (old_size > 80 && old_size < 128)
|
|
||||||
{
|
|
||||||
new_size = 128;
|
|
||||||
}
|
|
||||||
@@ -1272,6 +1340,7 @@ public:
|
|
||||||
bool has_rewritten_type (srfunction*);
|
|
||||||
void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt);
|
|
||||||
unsigned execute_struct_relayout (void);
|
|
||||||
+ bool remove_dead_field_stmt (tree lhs);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ipa_struct_relayout
|
|
||||||
@@ -3206,6 +3275,90 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Update field_access in srfield. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+update_field_access (tree record, tree field, unsigned access, void *data)
|
|
||||||
+{
|
|
||||||
+ srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (record);
|
|
||||||
+ if (this_srtype == NULL)
|
|
||||||
+ return;
|
|
||||||
+ srfield *this_srfield = this_srtype->find_field (int_byte_position (field));
|
|
||||||
+ if (this_srfield == NULL)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ this_srfield->field_access |= access;
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "record field access %d:", access);
|
|
||||||
+ print_generic_expr (dump_file, record);
|
|
||||||
+ fprintf (dump_file, " field:");
|
|
||||||
+ print_generic_expr (dump_file, field);
|
|
||||||
+ fprintf (dump_file, "\n");
|
|
||||||
+ }
|
|
||||||
+ return;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* A callback for walk_stmt_load_store_ops to visit store. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+find_field_p_store (gimple *, tree node, tree op, void *data)
|
|
||||||
+{
|
|
||||||
+ if (TREE_CODE (op) != COMPONENT_REF)
|
|
||||||
+ return false;
|
|
||||||
+ tree node_type = TREE_TYPE (node);
|
|
||||||
+ if (!handled_type (node_type))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ update_field_access (node_type, TREE_OPERAND (op, 1), WRITE_FIELD, data);
|
|
||||||
+
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* A callback for walk_stmt_load_store_ops to visit load. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+find_field_p_load (gimple *, tree node, tree op, void *data)
|
|
||||||
+{
|
|
||||||
+ if (TREE_CODE (op) != COMPONENT_REF)
|
|
||||||
+ return false;
|
|
||||||
+ tree node_type = TREE_TYPE (node);
|
|
||||||
+ if (!handled_type (node_type))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ update_field_access (node_type, TREE_OPERAND (op, 1), READ_FIELD, data);
|
|
||||||
+
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Determine whether the stmt should be deleted. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+ipa_struct_reorg::remove_dead_field_stmt (tree lhs)
|
|
||||||
+{
|
|
||||||
+ tree base = NULL_TREE;
|
|
||||||
+ bool indirect = false;
|
|
||||||
+ srtype *t = NULL;
|
|
||||||
+ srfield *f = NULL;
|
|
||||||
+ bool realpart = false;
|
|
||||||
+ bool imagpart = false;
|
|
||||||
+ bool address = false;
|
|
||||||
+ bool escape_from_base = false;
|
|
||||||
+ if (!get_type_field (lhs, base, indirect, t, f, realpart, imagpart,
|
|
||||||
+ address, escape_from_base))
|
|
||||||
+ return false;
|
|
||||||
+ if (t ==NULL)
|
|
||||||
+ return false;
|
|
||||||
+ if (t->newtype[0] == t->type)
|
|
||||||
+ return false;
|
|
||||||
+ if (f == NULL)
|
|
||||||
+ return false;
|
|
||||||
+ if (f->newfield[0] == NULL
|
|
||||||
+ && (f->field_access & WRITE_FIELD))
|
|
||||||
+ return true;
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Maybe record access of statement for further analaysis. */
|
|
||||||
|
|
||||||
void
|
|
||||||
@@ -3227,6 +3380,13 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt)
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
|
|
||||||
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION)
|
|
||||||
+ {
|
|
||||||
+ /* Look for loads and stores. */
|
|
||||||
+ walk_stmt_load_store_ops (stmt, this, find_field_p_load,
|
|
||||||
+ find_field_p_store);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Calculate the multiplier. */
|
|
||||||
@@ -3543,8 +3703,11 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple
|
|
||||||
}
|
|
||||||
else if (type != d->type)
|
|
||||||
{
|
|
||||||
- type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
- d->type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ if (!is_replace_type (d->type->type, type->type))
|
|
||||||
+ {
|
|
||||||
+ type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ d->type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
/* x_1 = y.x_nodes; void *x;
|
|
||||||
Directly mark the structure pointer type assigned
|
|
||||||
@@ -4131,8 +4294,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
|
|
||||||
}
|
|
||||||
/* If we have a non void* or a decl (which is hard to track),
|
|
||||||
then mark the type as escaping. */
|
|
||||||
- if (!VOID_POINTER_P (TREE_TYPE (newdecl))
|
|
||||||
- || DECL_P (newdecl))
|
|
||||||
+ if (replace_type_map.get (type->type) == NULL
|
|
||||||
+ && (!VOID_POINTER_P (TREE_TYPE (newdecl))
|
|
||||||
+ || DECL_P (newdecl)))
|
|
||||||
{
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
{
|
|
||||||
@@ -4142,7 +4306,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
|
|
||||||
print_generic_expr (dump_file, TREE_TYPE (newdecl));
|
|
||||||
fprintf (dump_file, "\n");
|
|
||||||
}
|
|
||||||
- type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* At this point there should only be unkown void* ssa names. */
|
|
||||||
@@ -4465,11 +4629,13 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
+ if (!is_replace_type (t1->type, type->type))
|
|
||||||
+ {
|
|
||||||
+ if (t1)
|
|
||||||
+ t1->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
|
|
||||||
- if (t1)
|
|
||||||
- t1->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
-
|
|
||||||
- type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ type->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@@ -5722,6 +5888,19 @@ bool
|
|
||||||
ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
|
|
||||||
{
|
|
||||||
bool remove = false;
|
|
||||||
+
|
|
||||||
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
|
|
||||||
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
|
|
||||||
+ && remove_dead_field_stmt (gimple_assign_lhs (stmt)))
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "\n rewriting statement (remove): \n");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (gimple_clobber_p (stmt))
|
|
||||||
{
|
|
||||||
tree lhs = gimple_assign_lhs (stmt);
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
|
|
||||||
index 54b0dc655..936c0fa6f 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
|
|
||||||
@@ -142,6 +142,7 @@ public:
|
|
||||||
|
|
||||||
bool create_new_type (void);
|
|
||||||
void analyze (void);
|
|
||||||
+ bool has_dead_field (void);
|
|
||||||
void mark_escape (escape_type, gimple *stmt);
|
|
||||||
bool has_escaped (void)
|
|
||||||
{
|
|
||||||
@@ -163,6 +164,12 @@ public:
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
+/* Bitflags used for determining if a field
|
|
||||||
+ is never accessed, read or written. */
|
|
||||||
+const unsigned EMPTY_FIELD = 0x0u;
|
|
||||||
+const unsigned READ_FIELD = 0x01u;
|
|
||||||
+const unsigned WRITE_FIELD = 0x02u;
|
|
||||||
+
|
|
||||||
struct srfield
|
|
||||||
{
|
|
||||||
unsigned HOST_WIDE_INT offset;
|
|
||||||
@@ -174,7 +181,7 @@ struct srfield
|
|
||||||
unsigned clusternum;
|
|
||||||
|
|
||||||
tree newfield[max_split];
|
|
||||||
-
|
|
||||||
+ unsigned field_access; /* FIELD_DECL -> bitflag (use for dfe). */
|
|
||||||
// Constructors
|
|
||||||
srfield (tree field, srtype *base);
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..4261d2352
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
|
|
||||||
@@ -0,0 +1,86 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct node node_t;
|
|
||||||
+typedef struct node *node_p;
|
|
||||||
+
|
|
||||||
+typedef struct arc arc_t;
|
|
||||||
+typedef struct arc *arc_p;
|
|
||||||
+
|
|
||||||
+typedef struct network
|
|
||||||
+{
|
|
||||||
+ arc_p arcs;
|
|
||||||
+ arc_p sorted_arcs;
|
|
||||||
+ int x;
|
|
||||||
+ node_p nodes;
|
|
||||||
+ node_p stop_nodes;
|
|
||||||
+} network_t;
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ int64_t potential;
|
|
||||||
+ int orientation;
|
|
||||||
+ node_p child;
|
|
||||||
+ node_p pred;
|
|
||||||
+ node_p sibling;
|
|
||||||
+ node_p sibling_prev;
|
|
||||||
+ arc_p basic_arc;
|
|
||||||
+ arc_p firstout;
|
|
||||||
+ arc_p firstin;
|
|
||||||
+ arc_p arc_tmp;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t depth;
|
|
||||||
+ int number;
|
|
||||||
+ int time;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct arc
|
|
||||||
+{
|
|
||||||
+ int id;
|
|
||||||
+ int64_t cost;
|
|
||||||
+ node_p tail;
|
|
||||||
+ node_p head;
|
|
||||||
+ short ident;
|
|
||||||
+ arc_p nextout;
|
|
||||||
+ arc_p nextin;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t org_cost;
|
|
||||||
+ network_t* net_add;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+const int MAX = 100;
|
|
||||||
+
|
|
||||||
+/* let it escape_array, "Type is used in an array [not handled yet]". */
|
|
||||||
+network_t* net[2];
|
|
||||||
+arc_p stop_arcs = NULL;
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ net[0] = (network_t*) calloc (1, sizeof(network_t));
|
|
||||||
+ net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
|
|
||||||
+ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
|
|
||||||
+
|
|
||||||
+ net[0]->arcs->id = 100;
|
|
||||||
+
|
|
||||||
+ for (unsigned i = 0; i < 3; i++)
|
|
||||||
+ {
|
|
||||||
+ net[0]->arcs->id = net[0]->arcs->id + 2;
|
|
||||||
+ stop_arcs->cost = net[0]->arcs->id / 2;
|
|
||||||
+ stop_arcs->net_add = net[0];
|
|
||||||
+ printf("stop_arcs->cost = %ld\n", stop_arcs->cost);
|
|
||||||
+ net[0]->arcs++;
|
|
||||||
+ stop_arcs++;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if( net[1] != 0 && stop_arcs != 0)
|
|
||||||
+ {
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..42d38c63a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
|
|
||||||
@@ -0,0 +1,60 @@
|
|
||||||
+// verify newarc[cmp-1].flow
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct node node_t;
|
|
||||||
+typedef struct node *node_p;
|
|
||||||
+
|
|
||||||
+typedef struct arc arc_t;
|
|
||||||
+typedef struct arc *arc_p;
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ int64_t potential;
|
|
||||||
+ int orientation;
|
|
||||||
+ node_p child;
|
|
||||||
+ node_p pred;
|
|
||||||
+ node_p sibling;
|
|
||||||
+ node_p sibling_prev;
|
|
||||||
+ arc_p basic_arc;
|
|
||||||
+ arc_p firstout;
|
|
||||||
+ arc_p firstin;
|
|
||||||
+ arc_p arc_tmp;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t depth;
|
|
||||||
+ int number;
|
|
||||||
+ int time;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct arc
|
|
||||||
+{
|
|
||||||
+ int id;
|
|
||||||
+ int64_t cost;
|
|
||||||
+ node_p tail;
|
|
||||||
+ node_p head;
|
|
||||||
+ short ident;
|
|
||||||
+ arc_p nextout;
|
|
||||||
+ arc_p nextin;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t org_cost;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+const int MAX = 100;
|
|
||||||
+arc_p ap = NULL;
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ ap = (arc_p) calloc(MAX, sizeof(arc_t));
|
|
||||||
+ printf("%d\n", ap[0].id);
|
|
||||||
+ for (int i = 1; i < MAX; i++)
|
|
||||||
+ {
|
|
||||||
+ ap[i-1].id = 500;
|
|
||||||
+ }
|
|
||||||
+ printf("%d\n", ap[0].id);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..53583fe82
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
|
|
||||||
@@ -0,0 +1,58 @@
|
|
||||||
+/* Supports the MEM_REF offset.
|
|
||||||
+ _1 = MEM[(struct arc *)ap_4 + 72B].flow;
|
|
||||||
+ Old rewrite:_1 = ap.reorder.0_8->flow;
|
|
||||||
+ New rewrite:_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow. */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct node node_t;
|
|
||||||
+typedef struct node *node_p;
|
|
||||||
+
|
|
||||||
+typedef struct arc arc_t;
|
|
||||||
+typedef struct arc *arc_p;
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ int64_t potential;
|
|
||||||
+ int orientation;
|
|
||||||
+ node_p child;
|
|
||||||
+ node_p pred;
|
|
||||||
+ node_p sibling;
|
|
||||||
+ node_p sibling_prev;
|
|
||||||
+ arc_p basic_arc;
|
|
||||||
+ arc_p firstout;
|
|
||||||
+ arc_p firstin;
|
|
||||||
+ arc_p arc_tmp;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t depth;
|
|
||||||
+ int number;
|
|
||||||
+ int time;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct arc
|
|
||||||
+{
|
|
||||||
+ int id;
|
|
||||||
+ int64_t cost;
|
|
||||||
+ node_p tail;
|
|
||||||
+ node_p head;
|
|
||||||
+ short ident;
|
|
||||||
+ arc_p nextout;
|
|
||||||
+ arc_p nextin;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t org_cost;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ const int MAX = 100;
|
|
||||||
+ /* A similar scenario can be reproduced only by using local variables. */
|
|
||||||
+ arc_p ap = NULL;
|
|
||||||
+ ap = (arc_p) calloc(MAX, sizeof(arc_t));
|
|
||||||
+ printf("%d\n", ap[1].flow);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..fd675ec2e
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
|
|
||||||
@@ -0,0 +1,30 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct T_HASH_ENTRY
|
|
||||||
+{
|
|
||||||
+ unsigned int hash;
|
|
||||||
+ unsigned int klen;
|
|
||||||
+ char *key;
|
|
||||||
+} iHashEntry;
|
|
||||||
+
|
|
||||||
+typedef struct T_HASH
|
|
||||||
+{
|
|
||||||
+ unsigned int size;
|
|
||||||
+ unsigned int fill;
|
|
||||||
+ unsigned int keys;
|
|
||||||
+
|
|
||||||
+ iHashEntry **array;
|
|
||||||
+} uHash;
|
|
||||||
+
|
|
||||||
+uHash *retval;
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main() {
|
|
||||||
+ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..600e7908b
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
|
|
||||||
@@ -0,0 +1,71 @@
|
|
||||||
+// support POINTER_DIFF_EXPR & NOP_EXPR to avoid
|
|
||||||
+// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt"
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct node node_t;
|
|
||||||
+typedef struct node *node_p;
|
|
||||||
+
|
|
||||||
+typedef struct arc arc_t;
|
|
||||||
+typedef struct arc *arc_p;
|
|
||||||
+
|
|
||||||
+typedef struct network
|
|
||||||
+{
|
|
||||||
+ arc_p arcs;
|
|
||||||
+ arc_p sorted_arcs;
|
|
||||||
+ int x;
|
|
||||||
+ node_p nodes;
|
|
||||||
+ node_p stop_nodes;
|
|
||||||
+} network_t;
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ int64_t potential;
|
|
||||||
+ int orientation;
|
|
||||||
+ node_p child;
|
|
||||||
+ node_p pred;
|
|
||||||
+ node_p sibling;
|
|
||||||
+ node_p sibling_prev;
|
|
||||||
+ arc_p basic_arc;
|
|
||||||
+ arc_p firstout;
|
|
||||||
+ arc_p firstin;
|
|
||||||
+ arc_p arc_tmp;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t depth;
|
|
||||||
+ int number;
|
|
||||||
+ int time;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct arc
|
|
||||||
+{
|
|
||||||
+ int id;
|
|
||||||
+ int64_t cost;
|
|
||||||
+ node_p tail;
|
|
||||||
+ node_p head;
|
|
||||||
+ short ident;
|
|
||||||
+ arc_p nextout;
|
|
||||||
+ arc_p nextin;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t org_cost;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ arc_t *old_arcs;
|
|
||||||
+ node_t *node;
|
|
||||||
+ node_t *stop;
|
|
||||||
+ size_t off;
|
|
||||||
+ network_t* net;
|
|
||||||
+
|
|
||||||
+ for( ; node->number < stop->number; node++ )
|
|
||||||
+ {
|
|
||||||
+ off = node->basic_arc - old_arcs;
|
|
||||||
+ node->basic_arc = (arc_t *)(net->arcs + off);
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..f411364a7
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
|
|
||||||
@@ -0,0 +1,55 @@
|
|
||||||
+// support NEGATE_EXPR rewriting
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct node node_t;
|
|
||||||
+typedef struct node *node_p;
|
|
||||||
+
|
|
||||||
+typedef struct arc arc_t;
|
|
||||||
+typedef struct arc *arc_p;
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ int64_t potential;
|
|
||||||
+ int orientation;
|
|
||||||
+ node_p child;
|
|
||||||
+ node_p pred;
|
|
||||||
+ node_p sibling;
|
|
||||||
+ node_p sibling_prev;
|
|
||||||
+ arc_p basic_arc;
|
|
||||||
+ arc_p firstout;
|
|
||||||
+ arc_p firstin;
|
|
||||||
+ arc_p arc_tmp;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t depth;
|
|
||||||
+ int number;
|
|
||||||
+ int time;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct arc
|
|
||||||
+{
|
|
||||||
+ int id;
|
|
||||||
+ int64_t cost;
|
|
||||||
+ node_p tail;
|
|
||||||
+ node_p head;
|
|
||||||
+ short ident;
|
|
||||||
+ arc_p nextout;
|
|
||||||
+ arc_p nextin;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t org_cost;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ int64_t susp = 0;
|
|
||||||
+ const int MAX = 100;
|
|
||||||
+ arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t));
|
|
||||||
+ ap -= susp;
|
|
||||||
+ printf("%d\n", ap[1].flow);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..a4e723763
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
|
|
||||||
@@ -0,0 +1,55 @@
|
|
||||||
+// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]";
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+typedef struct node node_t;
|
|
||||||
+typedef struct node *node_p;
|
|
||||||
+
|
|
||||||
+typedef struct arc arc_t;
|
|
||||||
+typedef struct arc *arc_p;
|
|
||||||
+
|
|
||||||
+struct node
|
|
||||||
+{
|
|
||||||
+ int64_t potential;
|
|
||||||
+ int orientation;
|
|
||||||
+ node_p child;
|
|
||||||
+ node_p pred;
|
|
||||||
+ node_p sibling;
|
|
||||||
+ node_p sibling_prev;
|
|
||||||
+ arc_p basic_arc;
|
|
||||||
+ arc_p firstout;
|
|
||||||
+ arc_p firstin;
|
|
||||||
+ arc_p arc_tmp;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t depth;
|
|
||||||
+ int number;
|
|
||||||
+ int time;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct arc
|
|
||||||
+{
|
|
||||||
+ int id;
|
|
||||||
+ int64_t cost;
|
|
||||||
+ node_p tail;
|
|
||||||
+ node_p head;
|
|
||||||
+ short ident;
|
|
||||||
+ arc_p nextout;
|
|
||||||
+ arc_p nextin;
|
|
||||||
+ int64_t flow;
|
|
||||||
+ int64_t org_cost;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+const int MAX = 100;
|
|
||||||
+arc_t **ap = NULL;
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+main ()
|
|
||||||
+{
|
|
||||||
+ ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
|
|
||||||
+ (*ap)[0].id = 300;
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
|
|
||||||
index 67b3ac2d5..ac5585813 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
|
|
||||||
@@ -64,8 +64,27 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout
|
|
||||||
"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
|
|
||||||
# -fipa-struct-reorg=2
|
|
||||||
-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
|
|
||||||
"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+
|
|
||||||
+# -fipa-struct-reorg=3
|
|
||||||
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \
|
|
||||||
+ "" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
|
||||||
+
|
|
||||||
# All done.
|
|
||||||
torture-finish
|
|
||||||
dg-finish
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,143 +0,0 @@
|
|||||||
From d8753de2129d230afc9a887d5804747c69824a68 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhaowenyu <804544223@qq.com>
|
|
||||||
Date: Mon, 20 Jun 2022 11:24:45 +0800
|
|
||||||
Subject: [PATCH 05/12] [Backport] ipa-sra: Fix thinko when overriding
|
|
||||||
safe_to_import_accesses (PR 101066)
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5aa28c8cf15cd254cc5a3a12278133b93b8b017f
|
|
||||||
|
|
||||||
ipa-sra: Fix thinko when overriding safe_to_import_accesses (PR 101066)
|
|
||||||
|
|
||||||
The "new" IPA-SRA has a more difficult job than the previous
|
|
||||||
not-truly-IPA version when identifying situations in which a parameter
|
|
||||||
passed by reference can be passed into a third function and only thee
|
|
||||||
converted to one passed by value (and possibly "split" at the same
|
|
||||||
time).
|
|
||||||
|
|
||||||
In order to allow this, two conditions must be fulfilled. First the
|
|
||||||
call to the third function must happen before any modifications of
|
|
||||||
memory, because it could change the value passed by reference.
|
|
||||||
Second, in order to make sure we do not introduce new (invalid)
|
|
||||||
dereferences, the call must postdominate the entry BB.
|
|
||||||
|
|
||||||
The second condition is actually not necessary if the caller function
|
|
||||||
is also certain to dereference the pointer but the first one must
|
|
||||||
still hold. Unfortunately, the code making this overriding decision
|
|
||||||
also happen to trigger when the first condition is not fulfilled.
|
|
||||||
This is fixed in the following patch.
|
|
||||||
|
|
||||||
gcc/ChangeLog:
|
|
||||||
|
|
||||||
2021-06-16 Martin Jambor <mjambor@suse.cz>
|
|
||||||
|
|
||||||
(cherry picked from commit 763121ccd908f52bc666f277ea2cf42110b3aad9)
|
|
||||||
---
|
|
||||||
gcc/ipa-sra.c | 15 +++++++++++++--
|
|
||||||
gcc/testsuite/gcc.dg/ipa/pr101066.c | 20 ++++++++++++++++++++
|
|
||||||
2 files changed, 33 insertions(+), 2 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/ipa/pr101066.c
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c
|
|
||||||
index b706fceff..1cb30afc3 100644
|
|
||||||
--- a/gcc/ipa-sra.c
|
|
||||||
+++ b/gcc/ipa-sra.c
|
|
||||||
@@ -340,7 +340,7 @@ class isra_call_summary
|
|
||||||
public:
|
|
||||||
isra_call_summary ()
|
|
||||||
: m_arg_flow (), m_return_ignored (false), m_return_returned (false),
|
|
||||||
- m_bit_aligned_arg (false)
|
|
||||||
+ m_bit_aligned_arg (false), m_before_any_store (false)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void init_inputs (unsigned arg_count);
|
|
||||||
@@ -359,6 +359,10 @@ public:
|
|
||||||
|
|
||||||
/* Set when any of the call arguments are not byte-aligned. */
|
|
||||||
unsigned m_bit_aligned_arg : 1;
|
|
||||||
+
|
|
||||||
+ /* Set to true if the call happend before any (other) store to memory in the
|
|
||||||
+ caller. */
|
|
||||||
+ unsigned m_before_any_store : 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Class to manage function summaries. */
|
|
||||||
@@ -472,6 +476,8 @@ isra_call_summary::dump (FILE *f)
|
|
||||||
fprintf (f, " return value ignored\n");
|
|
||||||
if (m_return_returned)
|
|
||||||
fprintf (f, " return value used only to compute caller return value\n");
|
|
||||||
+ if (m_before_any_store)
|
|
||||||
+ fprintf (f, " happens before any store to memory\n");
|
|
||||||
for (unsigned i = 0; i < m_arg_flow.length (); i++)
|
|
||||||
{
|
|
||||||
fprintf (f, " Parameter %u:\n", i);
|
|
||||||
@@ -516,6 +522,7 @@ ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
|
|
||||||
new_sum->m_return_ignored = old_sum->m_return_ignored;
|
|
||||||
new_sum->m_return_returned = old_sum->m_return_returned;
|
|
||||||
new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
|
|
||||||
+ new_sum->m_before_any_store = old_sum->m_before_any_store;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@@ -2355,6 +2362,7 @@ process_scan_results (cgraph_node *node, struct function *fun,
|
|
||||||
unsigned count = gimple_call_num_args (call_stmt);
|
|
||||||
isra_call_summary *csum = call_sums->get_create (cs);
|
|
||||||
csum->init_inputs (count);
|
|
||||||
+ csum->m_before_any_store = uses_memory_as_obtained;
|
|
||||||
for (unsigned argidx = 0; argidx < count; argidx++)
|
|
||||||
{
|
|
||||||
if (!csum->m_arg_flow[argidx].pointer_pass_through)
|
|
||||||
@@ -2601,6 +2609,7 @@ isra_write_edge_summary (output_block *ob, cgraph_edge *e)
|
|
||||||
bp_pack_value (&bp, csum->m_return_ignored, 1);
|
|
||||||
bp_pack_value (&bp, csum->m_return_returned, 1);
|
|
||||||
bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
|
|
||||||
+ bp_pack_value (&bp, csum->m_before_any_store, 1);
|
|
||||||
streamer_write_bitpack (&bp);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -2719,6 +2728,7 @@ isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
|
|
||||||
csum->m_return_ignored = bp_unpack_value (&bp, 1);
|
|
||||||
csum->m_return_returned = bp_unpack_value (&bp, 1);
|
|
||||||
csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
|
|
||||||
+ csum->m_before_any_store = bp_unpack_value (&bp, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Read intraprocedural analysis information about NODE and all of its outgoing
|
|
||||||
@@ -3475,7 +3485,8 @@ param_splitting_across_edge (cgraph_edge *cs)
|
|
||||||
}
|
|
||||||
else if (!ipf->safe_to_import_accesses)
|
|
||||||
{
|
|
||||||
- if (!all_callee_accesses_present_p (param_desc, arg_desc))
|
|
||||||
+ if (!csum->m_before_any_store
|
|
||||||
+ || !all_callee_accesses_present_p (param_desc, arg_desc))
|
|
||||||
{
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
fprintf (dump_file, " %u->%u: cannot import accesses.\n",
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/ipa/pr101066.c b/gcc/testsuite/gcc.dg/ipa/pr101066.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..1ceb6e431
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/ipa/pr101066.c
|
|
||||||
@@ -0,0 +1,20 @@
|
|
||||||
+/* { dg-do run } */
|
|
||||||
+/* { dg-options "-Os -fno-ipa-cp -fno-inline" } */
|
|
||||||
+
|
|
||||||
+int a = 1, c, d, e;
|
|
||||||
+int *b = &a;
|
|
||||||
+static int g(int *h) {
|
|
||||||
+ c = *h;
|
|
||||||
+ return d;
|
|
||||||
+}
|
|
||||||
+static void f(int *h) {
|
|
||||||
+ e = *h;
|
|
||||||
+ *b = 0;
|
|
||||||
+ g(h);
|
|
||||||
+}
|
|
||||||
+int main() {
|
|
||||||
+ f(b);
|
|
||||||
+ if (c)
|
|
||||||
+ __builtin_abort();
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,59 +0,0 @@
|
|||||||
From ea059ab02ac79eba1c05d6e05cbb2590c47d7c1f Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhaowenyu <804544223@qq.com>
|
|
||||||
Date: Thu, 23 Jun 2022 10:16:08 +0800
|
|
||||||
Subject: [PATCH 06/12] [Backport] ifcvt: Allow constants for
|
|
||||||
noce_convert_multiple.
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9b8eaa282250ad260e01d164093b597579db00d9
|
|
||||||
|
|
||||||
This lifts the restriction of not allowing constants for noce_convert_multiple.
|
|
||||||
The code later checks if a valid sequence is produced anyway.
|
|
||||||
gcc/ChangeLog:
|
|
||||||
|
|
||||||
* ifcvt.cc (noce_convert_multiple_sets): Allow constants.
|
|
||||||
(bb_ok_for_noce_convert_multiple_sets): Likewise.
|
|
||||||
---
|
|
||||||
gcc/ifcvt.c | 13 ++++++++-----
|
|
||||||
1 file changed, 8 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
|
|
||||||
index 977dd1bd4..2452f231c 100644
|
|
||||||
--- a/gcc/ifcvt.c
|
|
||||||
+++ b/gcc/ifcvt.c
|
|
||||||
@@ -3252,7 +3252,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
|
|
||||||
we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI).
|
|
||||||
Wrap the two cmove operands into subregs if appropriate to prevent
|
|
||||||
that. */
|
|
||||||
- if (GET_MODE (new_val) != GET_MODE (temp))
|
|
||||||
+
|
|
||||||
+ if (!CONSTANT_P (new_val)
|
|
||||||
+ && GET_MODE (new_val) != GET_MODE (temp))
|
|
||||||
{
|
|
||||||
machine_mode src_mode = GET_MODE (new_val);
|
|
||||||
machine_mode dst_mode = GET_MODE (temp);
|
|
||||||
@@ -3263,7 +3265,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
|
|
||||||
}
|
|
||||||
new_val = lowpart_subreg (dst_mode, new_val, src_mode);
|
|
||||||
}
|
|
||||||
- if (GET_MODE (old_val) != GET_MODE (temp))
|
|
||||||
+ if (!CONSTANT_P (old_val)
|
|
||||||
+ && GET_MODE (old_val) != GET_MODE (temp))
|
|
||||||
{
|
|
||||||
machine_mode src_mode = GET_MODE (old_val);
|
|
||||||
machine_mode dst_mode = GET_MODE (temp);
|
|
||||||
@@ -3392,9 +3395,9 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
|
|
||||||
if (!REG_P (dest))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- if (!(REG_P (src)
|
|
||||||
- || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
|
|
||||||
- && subreg_lowpart_p (src))))
|
|
||||||
+ if (!((REG_P (src) || CONSTANT_P (src))
|
|
||||||
+ || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
|
|
||||||
+ && subreg_lowpart_p (src))))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Destination must be appropriate for a conditional write. */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
From beeb0fb50c7e40ee3d79044abc6408f760d6584a Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhaowenyu <804544223@qq.com>
|
|
||||||
Date: Thu, 23 Jun 2022 10:40:46 +0800
|
|
||||||
Subject: [PATCH 07/12] [Backport] Register --sysroot in the driver switches
|
|
||||||
table
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=48e2d9b7b88dabed336cd098cd212d0e53c5125f
|
|
||||||
|
|
||||||
This change adjusts the processing of --sysroot to save the option in the internal "switches"
|
|
||||||
array, which lets self-specs test for it and provide a default value possibly dependent on
|
|
||||||
environment variables, as in
|
|
||||||
|
|
||||||
--with-specs=%{!-sysroot*:--sysroot=%:getenv("WIND_BASE" /target)}
|
|
||||||
|
|
||||||
2021-12-20 Olivier Hainque <hainque@adacore.com>
|
|
||||||
|
|
||||||
gcc/
|
|
||||||
* gcc.c (driver_handle_option): do_save --sysroot.
|
|
||||||
---
|
|
||||||
gcc/gcc.c | 4 +++-
|
|
||||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/gcc.c b/gcc/gcc.c
|
|
||||||
index b55075b14..655beffcc 100644
|
|
||||||
--- a/gcc/gcc.c
|
|
||||||
+++ b/gcc/gcc.c
|
|
||||||
@@ -4190,7 +4190,9 @@ driver_handle_option (struct gcc_options *opts,
|
|
||||||
case OPT__sysroot_:
|
|
||||||
target_system_root = arg;
|
|
||||||
target_system_root_changed = 1;
|
|
||||||
- do_save = false;
|
|
||||||
+ /* Saving this option is useful to let self-specs decide to
|
|
||||||
+ provide a default one. */
|
|
||||||
+ do_save = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPT_time_:
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,665 +0,0 @@
|
|||||||
From f8308a2b440efe124cd6ff59924f135e85e53888 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
|
||||||
Date: Sat, 18 Jun 2022 17:51:04 +0800
|
|
||||||
Subject: [PATCH 08/12] [DFE] Fix bugs
|
|
||||||
|
|
||||||
Fix bugs:
|
|
||||||
1. Fixed a bug in check replace type.
|
|
||||||
2. Use new to update field access for ref.
|
|
||||||
3. We now replace the dead fields in stmt by creating a new ssa.
|
|
||||||
4. The replaced type is no longer optimized in NORMAL mode.
|
|
||||||
|
|
||||||
Also we added 5 dejaGNU test cases.
|
|
||||||
---
|
|
||||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 77 ++++++---
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 56 ++++++
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 162 ++++++++++++++++++
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 126 ++++++++++++++
|
|
||||||
.../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 58 +++++++
|
|
||||||
.../gcc.dg/struct/dfe_extr_ui_main.c | 61 +++++++
|
|
||||||
6 files changed, 516 insertions(+), 24 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 2fa560239..00dc4bf1d 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -252,6 +252,7 @@ enum struct_layout_opt_level
|
|
||||||
|
|
||||||
static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
|
|
||||||
bool isptrptr (tree type);
|
|
||||||
+void get_base (tree &base, tree expr);
|
|
||||||
|
|
||||||
srmode current_mode;
|
|
||||||
|
|
||||||
@@ -631,7 +632,15 @@ srtype::analyze (void)
|
|
||||||
into 2 different structures. In future we intend to add profile
|
|
||||||
info and/or static heuristics to differentiate splitting process. */
|
|
||||||
if (fields.length () == 2)
|
|
||||||
- fields[1]->clusternum = 1;
|
|
||||||
+ {
|
|
||||||
+ for (hash_map<tree, tree>::iterator it = replace_type_map.begin ();
|
|
||||||
+ it != replace_type_map.end (); ++it)
|
|
||||||
+ {
|
|
||||||
+ if (types_compatible_p ((*it).second, this->type))
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ fields[1]->clusternum = 1;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
/* Otherwise we do nothing. */
|
|
||||||
if (fields.length () >= 3)
|
|
||||||
@@ -3278,12 +3287,33 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
/* Update field_access in srfield. */
|
|
||||||
|
|
||||||
static void
|
|
||||||
-update_field_access (tree record, tree field, unsigned access, void *data)
|
|
||||||
+update_field_access (tree node, tree op, unsigned access, void *data)
|
|
||||||
{
|
|
||||||
- srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (record);
|
|
||||||
+ HOST_WIDE_INT offset = 0;
|
|
||||||
+ switch (TREE_CODE (op))
|
|
||||||
+ {
|
|
||||||
+ case COMPONENT_REF:
|
|
||||||
+ {
|
|
||||||
+ offset = int_byte_position (TREE_OPERAND (op, 1));
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ case MEM_REF:
|
|
||||||
+ {
|
|
||||||
+ offset = tree_to_uhwi (TREE_OPERAND (op, 1));
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ default:
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ tree base = node;
|
|
||||||
+ get_base (base, node);
|
|
||||||
+ srdecl *this_srdecl = ((ipa_struct_reorg *)data)->find_decl (base);
|
|
||||||
+ if (this_srdecl == NULL)
|
|
||||||
+ return;
|
|
||||||
+ srtype *this_srtype = this_srdecl->type;
|
|
||||||
if (this_srtype == NULL)
|
|
||||||
return;
|
|
||||||
- srfield *this_srfield = this_srtype->find_field (int_byte_position (field));
|
|
||||||
+ srfield *this_srfield = this_srtype->find_field (offset);
|
|
||||||
if (this_srfield == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
@@ -3291,9 +3321,9 @@ update_field_access (tree record, tree field, unsigned access, void *data)
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
{
|
|
||||||
fprintf (dump_file, "record field access %d:", access);
|
|
||||||
- print_generic_expr (dump_file, record);
|
|
||||||
+ print_generic_expr (dump_file, this_srtype->type);
|
|
||||||
fprintf (dump_file, " field:");
|
|
||||||
- print_generic_expr (dump_file, field);
|
|
||||||
+ print_generic_expr (dump_file, this_srfield->fielddecl);
|
|
||||||
fprintf (dump_file, "\n");
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
@@ -3302,15 +3332,10 @@ update_field_access (tree record, tree field, unsigned access, void *data)
|
|
||||||
/* A callback for walk_stmt_load_store_ops to visit store. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
-find_field_p_store (gimple *, tree node, tree op, void *data)
|
|
||||||
+find_field_p_store (gimple *stmt ATTRIBUTE_UNUSED,
|
|
||||||
+ tree node, tree op, void *data)
|
|
||||||
{
|
|
||||||
- if (TREE_CODE (op) != COMPONENT_REF)
|
|
||||||
- return false;
|
|
||||||
- tree node_type = TREE_TYPE (node);
|
|
||||||
- if (!handled_type (node_type))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- update_field_access (node_type, TREE_OPERAND (op, 1), WRITE_FIELD, data);
|
|
||||||
+ update_field_access (node, op, WRITE_FIELD, data);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
@@ -3318,15 +3343,10 @@ find_field_p_store (gimple *, tree node, tree op, void *data)
|
|
||||||
/* A callback for walk_stmt_load_store_ops to visit load. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
-find_field_p_load (gimple *, tree node, tree op, void *data)
|
|
||||||
+find_field_p_load (gimple *stmt ATTRIBUTE_UNUSED,
|
|
||||||
+ tree node, tree op, void *data)
|
|
||||||
{
|
|
||||||
- if (TREE_CODE (op) != COMPONENT_REF)
|
|
||||||
- return false;
|
|
||||||
- tree node_type = TREE_TYPE (node);
|
|
||||||
- if (!handled_type (node_type))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- update_field_access (node_type, TREE_OPERAND (op, 1), READ_FIELD, data);
|
|
||||||
+ update_field_access (node, op, READ_FIELD, data);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
@@ -4629,7 +4649,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
- if (!is_replace_type (t1->type, type->type))
|
|
||||||
+ if (!is_replace_type (inner_type (t), type->type))
|
|
||||||
{
|
|
||||||
if (t1)
|
|
||||||
t1->mark_escape (escape_cast_another_ptr, stmt);
|
|
||||||
@@ -5898,7 +5918,16 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
|
|
||||||
fprintf (dump_file, "\n rewriting statement (remove): \n");
|
|
||||||
print_gimple_stmt (dump_file, stmt, 0);
|
|
||||||
}
|
|
||||||
- return true;
|
|
||||||
+ /* Replace the dead field in stmt by creating a dummy ssa. */
|
|
||||||
+ tree dummy_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (stmt)));
|
|
||||||
+ gimple_assign_set_lhs (stmt, dummy_ssa);
|
|
||||||
+ update_stmt (stmt);
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "To: \n");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gimple_clobber_p (stmt))
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..13a226ee8
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
|
|
||||||
@@ -0,0 +1,56 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+typedef struct TYPE_4__ TYPE_2__;
|
|
||||||
+typedef struct TYPE_3__ TYPE_1__;
|
|
||||||
+
|
|
||||||
+typedef int uint8_t;
|
|
||||||
+typedef int uint16_t;
|
|
||||||
+
|
|
||||||
+struct TYPE_4__
|
|
||||||
+{
|
|
||||||
+ size_t cpu_id;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_3__
|
|
||||||
+{
|
|
||||||
+ int cpuc_dtrace_flags;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+TYPE_2__ *CPU;
|
|
||||||
+volatile int CPU_DTRACE_FAULT;
|
|
||||||
+TYPE_1__ *cpu_core;
|
|
||||||
+scalar_t__ dtrace_load8 (uintptr_t);
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static int
|
|
||||||
+dtrace_bcmp (const void *s1, const void *s2, size_t len)
|
|
||||||
+{
|
|
||||||
+ volatile uint16_t *flags;
|
|
||||||
+ flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
|
|
||||||
+ if (s1 == s2)
|
|
||||||
+ return (0);
|
|
||||||
+ if (s1 == NULL || s2 == NULL)
|
|
||||||
+ return (1);
|
|
||||||
+ if (s1 != s2 && len != 0)
|
|
||||||
+ {
|
|
||||||
+ const uint8_t *ps1 = s1;
|
|
||||||
+ const uint8_t *ps2 = s2;
|
|
||||||
+ do
|
|
||||||
+ {
|
|
||||||
+ if (dtrace_load8 ((uintptr_t)ps1++) != *ps2++)
|
|
||||||
+ return (1);
|
|
||||||
+ }
|
|
||||||
+ while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
|
|
||||||
+ }
|
|
||||||
+ return (0);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..1fff2cb9d
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
|
|
||||||
@@ -0,0 +1,162 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+struct mrb_context
|
|
||||||
+{
|
|
||||||
+ size_t stack;
|
|
||||||
+ size_t stbase;
|
|
||||||
+ size_t stend;
|
|
||||||
+ size_t eidx;
|
|
||||||
+ int *ci;
|
|
||||||
+ int *cibase;
|
|
||||||
+ int status;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct RObject
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct RHash
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct RFiber
|
|
||||||
+{
|
|
||||||
+ struct mrb_context *cxt;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct RClass
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct RBasic
|
|
||||||
+{
|
|
||||||
+ int tt;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct RArray
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+typedef int mrb_state;
|
|
||||||
+typedef int mrb_gc;
|
|
||||||
+typedef int mrb_callinfo;
|
|
||||||
+size_t ARY_LEN (struct RArray *);
|
|
||||||
+size_t MRB_ENV_STACK_LEN (struct RBasic *);
|
|
||||||
+int MRB_FIBER_TERMINATED;
|
|
||||||
+
|
|
||||||
+#define MRB_TT_ARRAY 140
|
|
||||||
+#define MRB_TT_CLASS 139
|
|
||||||
+#define MRB_TT_DATA 138
|
|
||||||
+#define MRB_TT_ENV 137
|
|
||||||
+#define MRB_TT_EXCEPTION 136
|
|
||||||
+#define MRB_TT_FIBER 135
|
|
||||||
+#define MRB_TT_HASH 134
|
|
||||||
+#define MRB_TT_ICLASS 133
|
|
||||||
+#define MRB_TT_MODULE 132
|
|
||||||
+#define MRB_TT_OBJECT 131
|
|
||||||
+#define MRB_TT_PROC 130
|
|
||||||
+#define MRB_TT_RANGE 129
|
|
||||||
+#define MRB_TT_SCLASS 128
|
|
||||||
+
|
|
||||||
+size_t ci_nregs (int *);
|
|
||||||
+int gc_mark_children (int *, int *, struct RBasic *);
|
|
||||||
+size_t mrb_gc_mark_hash_size (int *, struct RHash *);
|
|
||||||
+size_t mrb_gc_mark_iv_size (int *, struct RObject *);
|
|
||||||
+size_t mrb_gc_mark_mt_size (int *, struct RClass *);
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static size_t
|
|
||||||
+gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj)
|
|
||||||
+{
|
|
||||||
+ size_t children = 0;
|
|
||||||
+ gc_mark_children (mrb, gc, obj);
|
|
||||||
+ switch (obj->tt)
|
|
||||||
+ {
|
|
||||||
+ case MRB_TT_ICLASS:
|
|
||||||
+ children++;
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_CLASS:
|
|
||||||
+ case MRB_TT_SCLASS:
|
|
||||||
+ case MRB_TT_MODULE:
|
|
||||||
+ {
|
|
||||||
+ struct RClass *c = (struct RClass *)obj;
|
|
||||||
+ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
|
|
||||||
+ children += mrb_gc_mark_mt_size (mrb, c);
|
|
||||||
+ children ++;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_OBJECT:
|
|
||||||
+ case MRB_TT_DATA:
|
|
||||||
+ case MRB_TT_EXCEPTION:
|
|
||||||
+ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_ENV:
|
|
||||||
+ children += MRB_ENV_STACK_LEN (obj);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_FIBER:
|
|
||||||
+ {
|
|
||||||
+ struct mrb_context *c = ((struct RFiber *)obj)->cxt;
|
|
||||||
+ size_t i;
|
|
||||||
+ mrb_callinfo *ci;
|
|
||||||
+ if (!c || c->status == MRB_FIBER_TERMINATED)
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ i = c->stack - c->stbase;
|
|
||||||
+ if (c->ci)
|
|
||||||
+ {
|
|
||||||
+ i += ci_nregs (c->ci);
|
|
||||||
+ }
|
|
||||||
+ if (c->stbase + i > c->stend)
|
|
||||||
+ i = c->stend - c->stbase;
|
|
||||||
+
|
|
||||||
+ children += i;
|
|
||||||
+ children += c->eidx;
|
|
||||||
+ if (c->cibase)
|
|
||||||
+ {
|
|
||||||
+ for (i = 0, ci = c->cibase; ci <= c->ci; i++, ci++)
|
|
||||||
+ ;
|
|
||||||
+ }
|
|
||||||
+ children += i;
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_ARRAY:
|
|
||||||
+ {
|
|
||||||
+ struct RArray *a = (struct RArray *)obj;
|
|
||||||
+ children += ARY_LEN (a);
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_HASH:
|
|
||||||
+ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
|
|
||||||
+ children += mrb_gc_mark_hash_size (mrb, (struct RHash *)obj);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case MRB_TT_PROC:
|
|
||||||
+ case MRB_TT_RANGE:
|
|
||||||
+ children += 2;
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return children;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..0f577667c
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
|
|
||||||
@@ -0,0 +1,126 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+typedef struct TYPE_6__ TYPE_3__;
|
|
||||||
+typedef struct TYPE_5__ TYPE_2__;
|
|
||||||
+typedef struct TYPE_4__ TYPE_1__;
|
|
||||||
+
|
|
||||||
+struct io_accel2_cmd
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct hpsa_tmf_struct
|
|
||||||
+{
|
|
||||||
+ int it_nexus;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct hpsa_scsi_dev_t
|
|
||||||
+{
|
|
||||||
+ int nphysical_disks;
|
|
||||||
+ int ioaccel_handle;
|
|
||||||
+ struct hpsa_scsi_dev_t **phys_disk;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct ctlr_info
|
|
||||||
+{
|
|
||||||
+ TYPE_3__ *pdev;
|
|
||||||
+ struct io_accel2_cmd *ioaccel2_cmd_pool;
|
|
||||||
+};
|
|
||||||
+struct TYPE_4__
|
|
||||||
+{
|
|
||||||
+ int LunAddrBytes;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_5__
|
|
||||||
+{
|
|
||||||
+ TYPE_1__ LUN;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct CommandList
|
|
||||||
+{
|
|
||||||
+ size_t cmdindex;
|
|
||||||
+ int cmd_type;
|
|
||||||
+ struct hpsa_scsi_dev_t *phys_disk;
|
|
||||||
+ TYPE_2__ Header;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_6__
|
|
||||||
+{
|
|
||||||
+ int dev;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int BUG ();
|
|
||||||
+#define CMD_IOACCEL1 132
|
|
||||||
+#define CMD_IOACCEL2 131
|
|
||||||
+#define CMD_IOCTL_PEND 130
|
|
||||||
+#define CMD_SCSI 129
|
|
||||||
+#define IOACCEL2_TMF 128
|
|
||||||
+int dev_err (int *, char *, int);
|
|
||||||
+scalar_t__ hpsa_is_cmd_idle (struct CommandList *);
|
|
||||||
+int le32_to_cpu (int);
|
|
||||||
+int test_memcmp (unsigned char *, int *, int);
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static bool
|
|
||||||
+hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c,
|
|
||||||
+ struct hpsa_scsi_dev_t *dev, unsigned char *scsi3addr)
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+ bool match = false;
|
|
||||||
+ struct io_accel2_cmd * c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
|
|
||||||
+ struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *)c2;
|
|
||||||
+
|
|
||||||
+ if (hpsa_is_cmd_idle (c))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ switch (c->cmd_type)
|
|
||||||
+ {
|
|
||||||
+ case CMD_SCSI:
|
|
||||||
+ case CMD_IOCTL_PEND:
|
|
||||||
+ match = !test_memcmp (scsi3addr, &c->Header.LUN.LunAddrBytes,
|
|
||||||
+ sizeof (c->Header.LUN.LunAddrBytes));
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case CMD_IOACCEL1:
|
|
||||||
+ case CMD_IOACCEL2:
|
|
||||||
+ if (c->phys_disk == dev)
|
|
||||||
+ {
|
|
||||||
+ match = true;
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ for (i = 0; i < dev->nphysical_disks && !match; i++)
|
|
||||||
+ {
|
|
||||||
+ match = dev->phys_disk[i] == c->phys_disk;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case IOACCEL2_TMF:
|
|
||||||
+ for (i = 0; i < dev->nphysical_disks && !match; i++)
|
|
||||||
+ {
|
|
||||||
+ match = dev->phys_disk[i]->ioaccel_handle ==
|
|
||||||
+ le32_to_cpu (ac->it_nexus);
|
|
||||||
+ }
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case 0:
|
|
||||||
+ match = false;
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ dev_err (&h->pdev->dev, "unexpected cmd_type: %d\n", c->cmd_type);
|
|
||||||
+ BUG ();
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return match;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..5570c762e
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
|
|
||||||
@@ -0,0 +1,58 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+struct tcpcb
|
|
||||||
+{
|
|
||||||
+ int t_state;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct socket
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct proc
|
|
||||||
+{
|
|
||||||
+ int dummy;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct inpcb
|
|
||||||
+{
|
|
||||||
+ scalar_t__ inp_lport;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int COMMON_END (int);
|
|
||||||
+int COMMON_START ();
|
|
||||||
+int PRU_LISTEN;
|
|
||||||
+int TCPS_LISTEN;
|
|
||||||
+int in_pcbbind (struct inpcb *, int *, struct proc *);
|
|
||||||
+struct inpcb* sotoinpcb (struct socket *);
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static void
|
|
||||||
+tcp_usr_listen (struct socket *so, struct proc *p)
|
|
||||||
+{
|
|
||||||
+ int error = 0;
|
|
||||||
+ struct inpcb *inp = sotoinpcb (so);
|
|
||||||
+ struct tcpcb *tp;
|
|
||||||
+
|
|
||||||
+ COMMON_START ();
|
|
||||||
+ if (inp->inp_lport == 0)
|
|
||||||
+ {
|
|
||||||
+ error = in_pcbbind (inp, NULL, p);
|
|
||||||
+ }
|
|
||||||
+ if (error == 0)
|
|
||||||
+ {
|
|
||||||
+ tp->t_state = TCPS_LISTEN;
|
|
||||||
+ }
|
|
||||||
+ COMMON_END (PRU_LISTEN);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..50ab9cc24
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
|
|
||||||
@@ -0,0 +1,61 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+typedef struct TYPE_4__ TYPE_2__;
|
|
||||||
+typedef struct TYPE_3__ TYPE_1__;
|
|
||||||
+
|
|
||||||
+struct TYPE_4__
|
|
||||||
+{
|
|
||||||
+ size_t modCount;
|
|
||||||
+ TYPE_1__ *modList;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_3__
|
|
||||||
+{
|
|
||||||
+ void *modDescr;
|
|
||||||
+ void *modName;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+size_t MAX_MODS;
|
|
||||||
+void *String_Alloc (char *);
|
|
||||||
+int test_strlen (char *);
|
|
||||||
+int trap_FD_GetFileList (char *, char *, char *, int);
|
|
||||||
+TYPE_2__ uiInfo;
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static void
|
|
||||||
+UI_LoadMods ()
|
|
||||||
+{
|
|
||||||
+ int numdirs;
|
|
||||||
+ char dirlist[2048];
|
|
||||||
+ char *dirptr;
|
|
||||||
+ char *descptr;
|
|
||||||
+ int i;
|
|
||||||
+ int dirlen;
|
|
||||||
+
|
|
||||||
+ uiInfo.modCount = 0;
|
|
||||||
+ numdirs = trap_FD_GetFileList ("$modelist", "", dirlist, sizeof (dirlist));
|
|
||||||
+ dirptr = dirlist;
|
|
||||||
+ for (i = 0; i < numdirs; i++)
|
|
||||||
+ {
|
|
||||||
+ dirlen = test_strlen (dirptr) + 1;
|
|
||||||
+ descptr = dirptr + dirlen;
|
|
||||||
+ uiInfo.modList[uiInfo.modCount].modName = String_Alloc (dirptr);
|
|
||||||
+ uiInfo.modList[uiInfo.modCount].modDescr = String_Alloc (descptr);
|
|
||||||
+ dirptr += dirlen + test_strlen (descptr) + 1;
|
|
||||||
+ uiInfo.modCount++;
|
|
||||||
+ if (uiInfo.modCount >= MAX_MODS)
|
|
||||||
+ {
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,379 +0,0 @@
|
|||||||
From 8f51c8c83355cb1b69553e582fb512c6e37b71f5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
|
||||||
Date: Thu, 18 Aug 2022 17:15:08 +0800
|
|
||||||
Subject: [PATCH] [DFE] Fix the bug caused by inconsistent types: 1. Update
|
|
||||||
some functions to fix the bug caused by inconsistent base and node types.
|
|
||||||
|
|
||||||
Also we added 3 dejaGNU test cases.
|
|
||||||
---
|
|
||||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 57 ++++++++-----
|
|
||||||
.../gcc.dg/struct/dfe_extr_board_init.c | 77 +++++++++++++++++
|
|
||||||
gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 77 +++++++++++++++++
|
|
||||||
.../gcc.dg/struct/dfe_extr_mv_udc_core.c | 82 +++++++++++++++++++
|
|
||||||
4 files changed, 273 insertions(+), 20 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 00dc4bf1d..8d3da3540 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -3284,33 +3284,31 @@ ipa_struct_reorg::find_vars (gimple *stmt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Update field_access in srfield. */
|
|
||||||
-
|
|
||||||
-static void
|
|
||||||
-update_field_access (tree node, tree op, unsigned access, void *data)
|
|
||||||
+static HOST_WIDE_INT
|
|
||||||
+get_offset (tree op, HOST_WIDE_INT offset)
|
|
||||||
{
|
|
||||||
- HOST_WIDE_INT offset = 0;
|
|
||||||
switch (TREE_CODE (op))
|
|
||||||
{
|
|
||||||
case COMPONENT_REF:
|
|
||||||
{
|
|
||||||
- offset = int_byte_position (TREE_OPERAND (op, 1));
|
|
||||||
- break;
|
|
||||||
+ return int_byte_position (TREE_OPERAND (op, 1));
|
|
||||||
}
|
|
||||||
case MEM_REF:
|
|
||||||
{
|
|
||||||
- offset = tree_to_uhwi (TREE_OPERAND (op, 1));
|
|
||||||
- break;
|
|
||||||
+ return tree_to_uhwi (TREE_OPERAND (op, 1));
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
- return;
|
|
||||||
+ return offset;
|
|
||||||
}
|
|
||||||
- tree base = node;
|
|
||||||
- get_base (base, node);
|
|
||||||
- srdecl *this_srdecl = ((ipa_struct_reorg *)data)->find_decl (base);
|
|
||||||
- if (this_srdecl == NULL)
|
|
||||||
- return;
|
|
||||||
- srtype *this_srtype = this_srdecl->type;
|
|
||||||
+ return offset;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Record field access. */
|
|
||||||
+static void
|
|
||||||
+record_field_access (tree type, HOST_WIDE_INT offset,
|
|
||||||
+ unsigned access, void *data)
|
|
||||||
+{
|
|
||||||
+ srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (type);
|
|
||||||
if (this_srtype == NULL)
|
|
||||||
return;
|
|
||||||
srfield *this_srfield = this_srtype->find_field (offset);
|
|
||||||
@@ -3321,12 +3319,33 @@ update_field_access (tree node, tree op, unsigned access, void *data)
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
{
|
|
||||||
fprintf (dump_file, "record field access %d:", access);
|
|
||||||
- print_generic_expr (dump_file, this_srtype->type);
|
|
||||||
+ print_generic_expr (dump_file, type);
|
|
||||||
fprintf (dump_file, " field:");
|
|
||||||
print_generic_expr (dump_file, this_srfield->fielddecl);
|
|
||||||
fprintf (dump_file, "\n");
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
+
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Update field_access in srfield. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+update_field_access (tree node, tree op, unsigned access, void *data)
|
|
||||||
+{
|
|
||||||
+ HOST_WIDE_INT offset = 0;
|
|
||||||
+ offset = get_offset (op, offset);
|
|
||||||
+ tree node_type = inner_type (TREE_TYPE (node));
|
|
||||||
+ record_field_access (node_type, offset, access, data);
|
|
||||||
+ tree base = node;
|
|
||||||
+ get_base (base, node);
|
|
||||||
+ tree base_type = inner_type (TREE_TYPE (base));
|
|
||||||
+ if (!types_compatible_p (base_type, node_type))
|
|
||||||
+ {
|
|
||||||
+ record_field_access (base_type, get_offset (node, offset),
|
|
||||||
+ access, data);
|
|
||||||
+ }
|
|
||||||
+ return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* A callback for walk_stmt_load_store_ops to visit store. */
|
|
||||||
@@ -3373,8 +3392,7 @@ ipa_struct_reorg::remove_dead_field_stmt (tree lhs)
|
|
||||||
return false;
|
|
||||||
if (f == NULL)
|
|
||||||
return false;
|
|
||||||
- if (f->newfield[0] == NULL
|
|
||||||
- && (f->field_access & WRITE_FIELD))
|
|
||||||
+ if (f->newfield[0] == NULL)
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
@@ -5927,7 +5945,6 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
|
|
||||||
fprintf (dump_file, "To: \n");
|
|
||||||
print_gimple_stmt (dump_file, stmt, 0);
|
|
||||||
}
|
|
||||||
- return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gimple_clobber_p (stmt))
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..4e52564b6
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
|
|
||||||
@@ -0,0 +1,77 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+typedef struct TYPE_5__ TYPE_2__;
|
|
||||||
+typedef struct TYPE_4__ TYPE_1__;
|
|
||||||
+
|
|
||||||
+struct TYPE_4__
|
|
||||||
+{
|
|
||||||
+ int Pin;
|
|
||||||
+ int Pull;
|
|
||||||
+ int Mode;
|
|
||||||
+ int Speed;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_5__
|
|
||||||
+{
|
|
||||||
+ int MEMRMP;
|
|
||||||
+};
|
|
||||||
+typedef TYPE_1__ GPIO_InitTypeDef;
|
|
||||||
+
|
|
||||||
+int BT_RST_PIN;
|
|
||||||
+int BT_RST_PORT;
|
|
||||||
+int CONN_POS10_PIN;
|
|
||||||
+int CONN_POS10_PORT;
|
|
||||||
+int GPIO_HIGH (int, int);
|
|
||||||
+int GPIO_MODE_INPUT;
|
|
||||||
+int GPIO_MODE_OUTPUT_PP;
|
|
||||||
+int GPIO_NOPULL;
|
|
||||||
+int GPIO_PULLUP;
|
|
||||||
+int GPIO_SPEED_FREQ_LOW;
|
|
||||||
+int HAL_GPIO_Init (int, TYPE_1__ *);
|
|
||||||
+scalar_t__ IS_GPIO_RESET (int, int);
|
|
||||||
+TYPE_2__ *SYSCFG;
|
|
||||||
+int __HAL_RCC_GPIOB_CLK_ENABLE ();
|
|
||||||
+int __HAL_RCC_GPIOC_CLK_ENABLE ();
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static void
|
|
||||||
+LBF_DFU_If_Needed (void)
|
|
||||||
+{
|
|
||||||
+ GPIO_InitTypeDef GPIO_InitStruct;
|
|
||||||
+ __HAL_RCC_GPIOC_CLK_ENABLE ();
|
|
||||||
+ GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
|
|
||||||
+ GPIO_InitStruct.Pull = GPIO_NOPULL;
|
|
||||||
+ GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
|
|
||||||
+ GPIO_InitStruct.Pin = BT_RST_PIN;
|
|
||||||
+ HAL_GPIO_Init (BT_RST_PORT, &GPIO_InitStruct);
|
|
||||||
+
|
|
||||||
+ GPIO_HIGH (BT_RST_PORT, BT_RST_PIN);
|
|
||||||
+ __HAL_RCC_GPIOB_CLK_ENABLE ();
|
|
||||||
+ GPIO_InitStruct.Mode = GPIO_MODE_INPUT;
|
|
||||||
+ GPIO_InitStruct.Pull = GPIO_PULLUP;
|
|
||||||
+ GPIO_InitStruct.Pin = CONN_POS10_PIN;
|
|
||||||
+ HAL_GPIO_Init (CONN_POS10_PORT, &GPIO_InitStruct);
|
|
||||||
+
|
|
||||||
+ if (IS_GPIO_RESET (CONN_POS10_PORT, CONN_POS10_PIN))
|
|
||||||
+ {
|
|
||||||
+ SYSCFG->MEMRMP = 0x00000001;
|
|
||||||
+ asm (
|
|
||||||
+ "LDR R0, =0x000000\n\t"
|
|
||||||
+ "LDR SP, [R0, #0]\n\t"
|
|
||||||
+ );
|
|
||||||
+ asm (
|
|
||||||
+ "LDR R0, [R0, #0]\n\t"
|
|
||||||
+ "BX R0\n\t"
|
|
||||||
+ );
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..894e9f460
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
|
|
||||||
@@ -0,0 +1,77 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+typedef struct TYPE_2__ TYPE_1__;
|
|
||||||
+
|
|
||||||
+struct net_device
|
|
||||||
+{
|
|
||||||
+ struct claw_privbk* ml_priv;
|
|
||||||
+};
|
|
||||||
+struct clawctl
|
|
||||||
+{
|
|
||||||
+ int linkid;
|
|
||||||
+};
|
|
||||||
+struct claw_privbk
|
|
||||||
+{
|
|
||||||
+ int system_validate_comp;
|
|
||||||
+ TYPE_1__* p_env;
|
|
||||||
+ int ctl_bk;
|
|
||||||
+};
|
|
||||||
+typedef int __u8;
|
|
||||||
+struct TYPE_2__
|
|
||||||
+{
|
|
||||||
+ scalar_t__ packing;
|
|
||||||
+ int api_type;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int CLAW_DBF_TEXT (int, int, char*);
|
|
||||||
+int CONNECTION_REQUEST;
|
|
||||||
+int HOST_APPL_NAME;
|
|
||||||
+scalar_t__ PACKING_ASK;
|
|
||||||
+scalar_t__ PACK_SEND;
|
|
||||||
+int WS_APPL_NAME_IP_NAME;
|
|
||||||
+int WS_APPL_NAME_PACKED;
|
|
||||||
+int claw_send_control (struct net_device*, int, int, int, int, int, int);
|
|
||||||
+int setup;
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static int
|
|
||||||
+claw_snd_conn_req (struct net_device *dev, __u8 link)
|
|
||||||
+{
|
|
||||||
+ int rc;
|
|
||||||
+ struct claw_privbk *privptr = dev->ml_priv;
|
|
||||||
+ struct clawctl *p_ctl;
|
|
||||||
+ CLAW_DBF_TEXT (2, setup, "snd_conn");
|
|
||||||
+ rc = 1;
|
|
||||||
+ p_ctl = (struct clawctl *)&privptr->ctl_bk;
|
|
||||||
+ p_ctl->linkid = link;
|
|
||||||
+ if (privptr->system_validate_comp == 0x00)
|
|
||||||
+ {
|
|
||||||
+ return rc;
|
|
||||||
+ }
|
|
||||||
+ if (privptr->p_env->packing == PACKING_ASK)
|
|
||||||
+ {
|
|
||||||
+ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
|
|
||||||
+ WS_APPL_NAME_PACKED, WS_APPL_NAME_PACKED);
|
|
||||||
+ }
|
|
||||||
+ if (privptr->p_env->packing == PACK_SEND)
|
|
||||||
+ {
|
|
||||||
+ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
|
|
||||||
+ WS_APPL_NAME_IP_NAME, WS_APPL_NAME_IP_NAME);
|
|
||||||
+ }
|
|
||||||
+ if (privptr->p_env->packing == 0)
|
|
||||||
+ {
|
|
||||||
+ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
|
|
||||||
+ HOST_APPL_NAME, privptr->p_env->api_type);
|
|
||||||
+ }
|
|
||||||
+ return rc;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..9801f87f1
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
|
|
||||||
@@ -0,0 +1,82 @@
|
|
||||||
+/* { dg-do compile} */
|
|
||||||
+
|
|
||||||
+#define NULL ((void*)0)
|
|
||||||
+typedef unsigned long size_t;
|
|
||||||
+typedef long intptr_t;
|
|
||||||
+typedef unsigned long uintptr_t;
|
|
||||||
+typedef long scalar_t__;
|
|
||||||
+typedef int bool;
|
|
||||||
+#define false 0
|
|
||||||
+#define true 1
|
|
||||||
+
|
|
||||||
+typedef struct TYPE_4__ TYPE_2__;
|
|
||||||
+typedef struct TYPE_3__ TYPE_1__;
|
|
||||||
+typedef int u32;
|
|
||||||
+
|
|
||||||
+struct mv_udc
|
|
||||||
+{
|
|
||||||
+ TYPE_2__ *op_regs;
|
|
||||||
+ TYPE_1__ *ep_dqh;
|
|
||||||
+ struct mv_ep *eps;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct mv_ep
|
|
||||||
+{
|
|
||||||
+ TYPE_1__ *dqh;
|
|
||||||
+ struct mv_udc *udc;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_4__
|
|
||||||
+{
|
|
||||||
+ int *epctrlx;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct TYPE_3__
|
|
||||||
+{
|
|
||||||
+ int max_packet_length;
|
|
||||||
+ int next_dtd_ptr;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+int EP0_MAX_PKT_SIZE;
|
|
||||||
+int EPCTRL_RX_ENABLE;
|
|
||||||
+int EPCTRL_RX_EP_TYPE_SHIFT;
|
|
||||||
+int EPCTRL_TX_ENABLE;
|
|
||||||
+int EPCTRL_TX_EP_TYPE_SHIFT;
|
|
||||||
+int EP_QUEUE_HEAD_IOS;
|
|
||||||
+int EP_QUEUE_HEAD_MAX_PKT_LEN_POS;
|
|
||||||
+int EP_QUEUE_HEAD_NEXT_TERMINATE;
|
|
||||||
+int USB_ENDPOINT_XFER_CONTROL;
|
|
||||||
+int readl (int *);
|
|
||||||
+int writel (int, int *);
|
|
||||||
+
|
|
||||||
+__attribute__((used)) static void
|
|
||||||
+ep0_reset (struct mv_udc *udc)
|
|
||||||
+{
|
|
||||||
+ struct mv_ep *ep;
|
|
||||||
+ u32 epctrlx;
|
|
||||||
+ int i = 0;
|
|
||||||
+ for (i = 0; i < 2; i++)
|
|
||||||
+ {
|
|
||||||
+ ep = &udc->eps[i];
|
|
||||||
+ ep->udc = udc;
|
|
||||||
+ ep->dqh = &udc->ep_dqh[i];
|
|
||||||
+ ep->dqh->max_packet_length =
|
|
||||||
+ (EP0_MAX_PKT_SIZE << EP_QUEUE_HEAD_MAX_PKT_LEN_POS)
|
|
||||||
+ | EP_QUEUE_HEAD_IOS;
|
|
||||||
+ ep->dqh->next_dtd_ptr = EP_QUEUE_HEAD_NEXT_TERMINATE;
|
|
||||||
+ epctrlx = readl (&udc->op_regs->epctrlx[0]);
|
|
||||||
+ if (i)
|
|
||||||
+ {
|
|
||||||
+ epctrlx |= EPCTRL_TX_ENABLE
|
|
||||||
+ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_TX_EP_TYPE_SHIFT);
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ epctrlx |= EPCTRL_RX_ENABLE
|
|
||||||
+ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_RX_EP_TYPE_SHIFT);
|
|
||||||
+ }
|
|
||||||
+ writel (epctrlx, &udc->op_regs->epctrlx[0]);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,146 +0,0 @@
|
|||||||
From b66a843505f32685f428c502f1a88e0f681b4acd Mon Sep 17 00:00:00 2001
|
|
||||||
From: eastb233 <xiezhiheng@huawei.com>
|
|
||||||
Date: Thu, 15 Sep 2022 17:57:00 +0800
|
|
||||||
Subject: [PATCH] [Struct Reorg] Type simplify limitation when in structure
|
|
||||||
optimizaiton
|
|
||||||
|
|
||||||
When enable structure optimization, we should not simplify
|
|
||||||
TYPE NODE. But now we unconditionally skip the simplification
|
|
||||||
under structure optimization regardless of whether it takes
|
|
||||||
effect. So add the same limitation as the optimization has.
|
|
||||||
---
|
|
||||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 72 ++++++++++++-------------
|
|
||||||
gcc/tree.c | 13 ++++-
|
|
||||||
2 files changed, 47 insertions(+), 38 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 8d3da35400d..54c20ca3f33 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -104,6 +104,42 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
|
|
||||||
#define VOID_POINTER_P(type) (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
|
|
||||||
|
|
||||||
+/* Check whether in C language or LTO with only C language. */
|
|
||||||
+bool
|
|
||||||
+lang_c_p (void)
|
|
||||||
+{
|
|
||||||
+ const char *language_string = lang_hooks.name;
|
|
||||||
+
|
|
||||||
+ if (!language_string)
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (lang_GNU_C ())
|
|
||||||
+ {
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
|
|
||||||
+ {
|
|
||||||
+ unsigned i = 0;
|
|
||||||
+ tree t = NULL_TREE;
|
|
||||||
+
|
|
||||||
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
|
|
||||||
+ {
|
|
||||||
+ language_string = TRANSLATION_UNIT_LANGUAGE (t);
|
|
||||||
+ if (language_string == NULL
|
|
||||||
+ || strncmp (language_string, "GNU C", 5)
|
|
||||||
+ || (language_string[5] != '\0'
|
|
||||||
+ && !(ISDIGIT (language_string[5]))))
|
|
||||||
+ {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
using namespace struct_reorg;
|
|
||||||
@@ -163,42 +199,6 @@ handled_type (tree type)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Check whether in C language or LTO with only C language. */
|
|
||||||
-bool
|
|
||||||
-lang_c_p (void)
|
|
||||||
-{
|
|
||||||
- const char *language_string = lang_hooks.name;
|
|
||||||
-
|
|
||||||
- if (!language_string)
|
|
||||||
- {
|
|
||||||
- return false;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- if (lang_GNU_C ())
|
|
||||||
- {
|
|
||||||
- return true;
|
|
||||||
- }
|
|
||||||
- else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
|
|
||||||
- {
|
|
||||||
- unsigned i = 0;
|
|
||||||
- tree t = NULL_TREE;
|
|
||||||
-
|
|
||||||
- FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
|
|
||||||
- {
|
|
||||||
- language_string = TRANSLATION_UNIT_LANGUAGE (t);
|
|
||||||
- if (language_string == NULL
|
|
||||||
- || strncmp (language_string, "GNU C", 5)
|
|
||||||
- || (language_string[5] != '\0'
|
|
||||||
- && !(ISDIGIT (language_string[5]))))
|
|
||||||
- {
|
|
||||||
- return false;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- return true;
|
|
||||||
- }
|
|
||||||
- return false;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
/* Get the number of pointer layers. */
|
|
||||||
|
|
||||||
int
|
|
||||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
||||||
index c2075d73586..84a440b3576 100644
|
|
||||||
--- a/gcc/tree.c
|
|
||||||
+++ b/gcc/tree.c
|
|
||||||
@@ -128,6 +128,9 @@ const char *const tree_code_class_strings[] =
|
|
||||||
/* obstack.[ch] explicitly declined to prototype this. */
|
|
||||||
extern int _obstack_allocated_p (struct obstack *h, void *obj);
|
|
||||||
|
|
||||||
+/* Check whether in C language or LTO with only C language. */
|
|
||||||
+extern bool lang_c_p (void);
|
|
||||||
+
|
|
||||||
/* Statistics-gathering stuff. */
|
|
||||||
|
|
||||||
static uint64_t tree_code_counts[MAX_TREE_CODES];
|
|
||||||
@@ -5219,7 +5222,10 @@ fld_simplified_type_name (tree type)
|
|
||||||
/* Simplify type will cause that struct A and struct A within
|
|
||||||
struct B are different type pointers, so skip it in structure
|
|
||||||
optimizations. */
|
|
||||||
- if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
|
|
||||||
+ if ((flag_ipa_struct_layout || flag_ipa_struct_reorg)
|
|
||||||
+ && lang_c_p ()
|
|
||||||
+ && flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
+ && (in_lto_p || flag_whole_program))
|
|
||||||
return TYPE_NAME (type);
|
|
||||||
|
|
||||||
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
|
|
||||||
@@ -5463,7 +5469,10 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
|
|
||||||
/* Simplify type will cause that struct A and struct A within
|
|
||||||
struct B are different type pointers, so skip it in structure
|
|
||||||
optimizations. */
|
|
||||||
- if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
|
|
||||||
+ if ((flag_ipa_struct_layout || flag_ipa_struct_reorg)
|
|
||||||
+ && lang_c_p ()
|
|
||||||
+ && flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
+ && (in_lto_p || flag_whole_program))
|
|
||||||
return t;
|
|
||||||
if (POINTER_TYPE_P (t))
|
|
||||||
return fld_incomplete_type_of (t, fld);
|
|
||||||
--
|
|
||||||
2.21.0.windows.1
|
|
||||||
|
|
||||||
@ -1,21 +0,0 @@
|
|||||||
From 7dffda64fcbbd522616d7dc9c70530d146f4fed6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Tue, 1 Nov 2022 16:38:38 +0800
|
|
||||||
Subject: [PATCH 01/35] [build] Add some file right to executable
|
|
||||||
|
|
||||||
---
|
|
||||||
libgcc/mkheader.sh | 0
|
|
||||||
move-if-change | 0
|
|
||||||
2 files changed, 0 insertions(+), 0 deletions(-)
|
|
||||||
mode change 100644 => 100755 libgcc/mkheader.sh
|
|
||||||
mode change 100644 => 100755 move-if-change
|
|
||||||
|
|
||||||
diff --git a/libgcc/mkheader.sh b/libgcc/mkheader.sh
|
|
||||||
old mode 100644
|
|
||||||
new mode 100755
|
|
||||||
diff --git a/move-if-change b/move-if-change
|
|
||||||
old mode 100644
|
|
||||||
new mode 100755
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,186 +0,0 @@
|
|||||||
From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Wed, 4 Nov 2020 11:55:29 +0100
|
|
||||||
Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x <<
|
|
||||||
10 [PR97690]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab
|
|
||||||
|
|
||||||
The following patch generalizes the x ? 1 : 0 -> (int) x optimization
|
|
||||||
to handle also left shifts by constant.
|
|
||||||
|
|
||||||
During x86_64-linux and i686-linux bootstraps + regtests it triggered
|
|
||||||
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
|
|
||||||
filename, function name and shift count) and 1866 -m32 cases.
|
|
||||||
|
|
||||||
Unfortunately, the patch regresses (before the tests have been adjusted):
|
|
||||||
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
|
|
||||||
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1
|
|
||||||
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1
|
|
||||||
and in both cases it actually results in worse code.
|
|
||||||
|
|
||||||
> > We'd need some optimization that would go through all PHI edges and
|
|
||||||
> > compute if some use of the phi results don't actually compute a constant
|
|
||||||
> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0.
|
|
||||||
|
|
||||||
> PRE should do this, IMHO only optimizing it at -O2 is fine.
|
|
||||||
|
|
||||||
> > Similarly, in the slp vectorization test there is:
|
|
||||||
> > a[0] = b[0] ? 1 : 7;
|
|
||||||
|
|
||||||
> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ...
|
|
||||||
|
|
||||||
> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase
|
|
||||||
> wasn't added for any real-world case but is artificial I guess for
|
|
||||||
> COND_EXPR handling of invariants.
|
|
||||||
|
|
||||||
> But yeah, for things like SLP it means we eventually have to
|
|
||||||
> implement reverse transforms for all of this to make the lanes
|
|
||||||
> matching. But that's true anyway for things like x + 1 vs. x + 0
|
|
||||||
> or x / 3 vs. x / 2 or other simplifications we do.
|
|
||||||
|
|
||||||
2020-11-04 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
PR tree-optimization/97690
|
|
||||||
* tree-ssa-phiopt.c (conditional_replacement): Also optimize
|
|
||||||
cond ? pow2p_cst : 0 as ((type) cond) << cst.
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/phi-opt-22.c: New test.
|
|
||||||
* gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1.
|
|
||||||
* gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and
|
|
||||||
? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++--
|
|
||||||
gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------
|
|
||||||
4 files changed, 43 insertions(+), 14 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..fd3706666
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
|
||||||
@@ -0,0 +1,11 @@
|
|
||||||
+/* PR tree-optimization/97690 */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+
|
|
||||||
+int foo (_Bool d) { return d ? 2 : 0; }
|
|
||||||
+int bar (_Bool d) { return d ? 1 : 0; }
|
|
||||||
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
|
|
||||||
+int qux (_Bool d) { return d ? 1024 : 0; }
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
|
||||||
index 36b8e7fc8..d70ea5a01 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
/* Test for CPROP across a DAG. */
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
|
||||||
index d32cb7585..e64f0115a 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
|
||||||
@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride)
|
|
||||||
for (i = 0; i < N/stride; i++, a += stride, b += stride)
|
|
||||||
{
|
|
||||||
a[0] = b[0] ? 1 : 7;
|
|
||||||
- a[1] = b[1] ? 2 : 0;
|
|
||||||
+ a[1] = b[1] ? 2 : 7;
|
|
||||||
a[2] = b[2] ? 3 : 0;
|
|
||||||
- a[3] = b[3] ? 4 : 0;
|
|
||||||
+ a[3] = b[3] ? 4 : 7;
|
|
||||||
a[4] = b[4] ? 5 : 0;
|
|
||||||
a[5] = b[5] ? 6 : 0;
|
|
||||||
a[6] = b[6] ? 7 : 0;
|
|
||||||
- a[7] = b[7] ? 8 : 0;
|
|
||||||
+ a[7] = b[7] ? 8 : 7;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 591b6435f..85587e8d1 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
edge true_edge, false_edge;
|
|
||||||
tree new_var, new_var2;
|
|
||||||
- bool neg;
|
|
||||||
+ bool neg = false;
|
|
||||||
+ int shift = 0;
|
|
||||||
+ tree nonzero_arg;
|
|
||||||
|
|
||||||
/* FIXME: Gimplification of complex type is too hard for now. */
|
|
||||||
/* We aren't prepared to handle vectors either (and it is a question
|
|
||||||
@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|| POINTER_TYPE_P (TREE_TYPE (arg1))))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
|
|
||||||
- convert it to the conditional. */
|
|
||||||
- if ((integer_zerop (arg0) && integer_onep (arg1))
|
|
||||||
- || (integer_zerop (arg1) && integer_onep (arg0)))
|
|
||||||
- neg = false;
|
|
||||||
- else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
|
|
||||||
- || (integer_zerop (arg1) && integer_all_onesp (arg0)))
|
|
||||||
+ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
|
|
||||||
+ 0 and (1 << cst), then convert it to the conditional. */
|
|
||||||
+ if (integer_zerop (arg0))
|
|
||||||
+ nonzero_arg = arg1;
|
|
||||||
+ else if (integer_zerop (arg1))
|
|
||||||
+ nonzero_arg = arg0;
|
|
||||||
+ else
|
|
||||||
+ return false;
|
|
||||||
+ if (integer_all_onesp (nonzero_arg))
|
|
||||||
neg = true;
|
|
||||||
+ else if (integer_pow2p (nonzero_arg))
|
|
||||||
+ {
|
|
||||||
+ shift = tree_log2 (nonzero_arg);
|
|
||||||
+ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
|
|
||||||
@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
falls through into BB.
|
|
||||||
|
|
||||||
There is a single PHI node at the join point (BB) and its arguments
|
|
||||||
- are constants (0, 1) or (0, -1).
|
|
||||||
+ are constants (0, 1) or (0, -1) or (0, (1 << shift)).
|
|
||||||
|
|
||||||
So, given the condition COND, and the two PHI arguments, we can
|
|
||||||
rewrite this PHI into non-branching code:
|
|
||||||
|
|
||||||
- dest = (COND) or dest = COND'
|
|
||||||
+ dest = (COND) or dest = COND' or dest = (COND) << shift
|
|
||||||
|
|
||||||
We use the condition as-is if the argument associated with the
|
|
||||||
true edge has the value one or the argument associated with the
|
|
||||||
@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
cond = fold_build1_loc (gimple_location (stmt),
|
|
||||||
NEGATE_EXPR, TREE_TYPE (cond), cond);
|
|
||||||
}
|
|
||||||
+ else if (shift)
|
|
||||||
+ {
|
|
||||||
+ cond = fold_convert_loc (gimple_location (stmt),
|
|
||||||
+ TREE_TYPE (result), cond);
|
|
||||||
+ cond = fold_build2_loc (gimple_location (stmt),
|
|
||||||
+ LSHIFT_EXPR, TREE_TYPE (cond), cond,
|
|
||||||
+ build_int_cst (integer_type_node, shift));
|
|
||||||
+ }
|
|
||||||
|
|
||||||
/* Insert our new statements at the end of conditional block before the
|
|
||||||
COND_STMT. */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,92 +0,0 @@
|
|||||||
From 79a974bc7bb67cf425a7839f3c1f5689e41c7ee8 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Tue, 9 Mar 2021 19:13:11 +0100
|
|
||||||
Subject: [PATCH 03/35] [Backport] phiopt: Fix up conditional_replacement
|
|
||||||
[PR99305]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b610c30453d8e4cc88693d85a5a100d089640be5
|
|
||||||
|
|
||||||
Before my PR97690 changes, conditional_replacement would not set neg
|
|
||||||
when the nonzero arg was boolean true.
|
|
||||||
I've simplified the testing, so that it first finds the zero argument
|
|
||||||
and then checks the other argument for all the handled cases
|
|
||||||
(1, -1 and 1 << X, where the last case is what the patch added support for).
|
|
||||||
But, unfortunately I've placed the integer_all_onesp test first.
|
|
||||||
For unsigned precision 1 types such as bool integer_all_onesp, integer_onep
|
|
||||||
and integer_pow2p can all be true and the code set neg to true in that case,
|
|
||||||
which is undesirable.
|
|
||||||
|
|
||||||
The following patch tests integer_pow2p first (which is trivially true
|
|
||||||
for integer_onep too and tree_log2 in that case gives shift == 0)
|
|
||||||
and only if that isn't the case, integer_all_onesp.
|
|
||||||
|
|
||||||
2021-03-09 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
PR tree-optimization/99305
|
|
||||||
* tree-ssa-phiopt.c (conditional_replacement): Test integer_pow2p
|
|
||||||
before integer_all_onesp instead of vice versa.
|
|
||||||
|
|
||||||
* g++.dg/opt/pr99305.C: New test.
|
|
||||||
---
|
|
||||||
gcc/testsuite/g++.dg/opt/pr99305.C | 26 ++++++++++++++++++++++++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 6 +++---
|
|
||||||
2 files changed, 29 insertions(+), 3 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/g++.dg/opt/pr99305.C
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..8a91277e7
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/g++.dg/opt/pr99305.C
|
|
||||||
@@ -0,0 +1,26 @@
|
|
||||||
+// PR tree-optimization/99305
|
|
||||||
+// { dg-do compile }
|
|
||||||
+// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" }
|
|
||||||
+// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" } }
|
|
||||||
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } }
|
|
||||||
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } }
|
|
||||||
+// { dg-final { scan-tree-dump-not "if \\\(c_\[0-9]*\\\(D\\\) \[!=]= 0\\\)" "optimized" } }
|
|
||||||
+// { dg-final { scan-tree-dump-not " = PHI <" "optimized" } }
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+foo (char c)
|
|
||||||
+{
|
|
||||||
+ return c >= 48 && c <= 57;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+bar (char c)
|
|
||||||
+{
|
|
||||||
+ return c != 0 && foo (c);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+baz (char c)
|
|
||||||
+{
|
|
||||||
+ return c != 0 && c >= 48 && c <= 57;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 85587e8d1..b9be28474 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -774,14 +774,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
nonzero_arg = arg0;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
- if (integer_all_onesp (nonzero_arg))
|
|
||||||
- neg = true;
|
|
||||||
- else if (integer_pow2p (nonzero_arg))
|
|
||||||
+ if (integer_pow2p (nonzero_arg))
|
|
||||||
{
|
|
||||||
shift = tree_log2 (nonzero_arg);
|
|
||||||
if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
+ else if (integer_all_onesp (nonzero_arg))
|
|
||||||
+ neg = true;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,122 +0,0 @@
|
|||||||
From 09263d5ed4d81a008ca8ffcc2883dc766e7874d5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Sun, 6 Dec 2020 10:58:10 +0100
|
|
||||||
Subject: [PATCH 04/35] [Backport] phiopt: Handle bool in two_value_replacement
|
|
||||||
[PR796232]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8c23434fdadcf4caa1f0e966294c5f67ccf4bcf9
|
|
||||||
|
|
||||||
The following patch improves code generation on the included testcase by
|
|
||||||
enabling two_value_replacement on booleans. It does that only for arg0/arg1
|
|
||||||
values that conditional_replacement doesn't handle. Additionally
|
|
||||||
it limits two_value_replacement optimization to the late phiopt like
|
|
||||||
conditional_replacement.
|
|
||||||
|
|
||||||
2020-12-06 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
PR tree-optimization/96232
|
|
||||||
* tree-ssa-phiopt.c (two_value_replacement): Optimize even boolean lhs
|
|
||||||
cases as long as arg0 has wider precision and conditional_replacement
|
|
||||||
doesn't handle that case.
|
|
||||||
(tree_ssa_phiopt_worker): Don't call two_value_replacement during
|
|
||||||
early phiopt.
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/pr96232-2.c: New test.
|
|
||||||
* gcc.dg/tree-ssa/pr88676-2.c: Check phiopt2 dump rather than phiopt1.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c | 4 ++--
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c | 18 ++++++++++++++++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 23 +++++++++++++++++++----
|
|
||||||
3 files changed, 39 insertions(+), 6 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
|
|
||||||
index 0e616365b..ea88407b6 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
|
|
||||||
@@ -1,7 +1,7 @@
|
|
||||||
/* PR tree-optimization/88676 */
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fdump-tree-phiopt1" } */
|
|
||||||
-/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt1" { target le } } } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt2" { target le } } } */
|
|
||||||
|
|
||||||
struct foo1 {
|
|
||||||
int i:1;
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..9f51820ed
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
|
|
||||||
@@ -0,0 +1,18 @@
|
|
||||||
+/* PR tree-optimization/96232 */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-final { scan-tree-dump " 38 - " "optimized" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump " \\+ 97;" "optimized" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+foo (_Bool x)
|
|
||||||
+{
|
|
||||||
+ return x ? 37 : 38;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+bar (_Bool x)
|
|
||||||
+{
|
|
||||||
+ return x ? 98 : 97;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index b9be28474..0623d740d 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -339,7 +339,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Do the replacement of conditional if it can be done. */
|
|
||||||
- if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
|
||||||
+ if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
&& conditional_replacement (bb, bb1, e1, e2, phi,
|
|
||||||
@@ -636,7 +636,6 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|
|
||||||
if (TREE_CODE (lhs) != SSA_NAME
|
|
||||||
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
||||||
- || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|
|
||||||
|| TREE_CODE (rhs) != INTEGER_CST)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
@@ -649,9 +648,25 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
|
|
||||||
+ conditional_replacement. */
|
|
||||||
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|
|
||||||
+ && (integer_zerop (arg0)
|
|
||||||
+ || integer_zerop (arg1)
|
|
||||||
+ || TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
|
|
||||||
+ || (TYPE_PRECISION (TREE_TYPE (arg0))
|
|
||||||
+ <= TYPE_PRECISION (TREE_TYPE (lhs)))))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
wide_int min, max;
|
|
||||||
- if (get_range_info (lhs, &min, &max) != VR_RANGE
|
|
||||||
- || min + 1 != max
|
|
||||||
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
|
|
||||||
+ {
|
|
||||||
+ min = wi::to_wide (boolean_false_node);
|
|
||||||
+ max = wi::to_wide (boolean_true_node);
|
|
||||||
+ }
|
|
||||||
+ else if (get_range_info (lhs, &min, &max) != VR_RANGE)
|
|
||||||
+ return false;
|
|
||||||
+ if (min + 1 != max
|
|
||||||
|| (wi::to_wide (rhs) != min
|
|
||||||
&& wi::to_wide (rhs) != max))
|
|
||||||
return false;
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,256 +0,0 @@
|
|||||||
From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Wed, 21 Oct 2020 10:51:33 +0200
|
|
||||||
Subject: [PATCH 05/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
|
|
||||||
in GIMPLE [PR97503]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163
|
|
||||||
|
|
||||||
While we have at the RTL level noce_try_ifelse_collapse combined with
|
|
||||||
simplify_cond_clz_ctz, that optimization doesn't always trigger because
|
|
||||||
e.g. on powerpc there is an define_insn to compare a reg against zero and
|
|
||||||
copy that register to another one and so we end up with a different pseudo
|
|
||||||
in the simplify_cond_clz_ctz test and punt.
|
|
||||||
|
|
||||||
For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
|
|
||||||
we can optimize it already in phiopt though, just need to ensure that
|
|
||||||
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
|
|
||||||
VRP changes codified that the builtin calls are always undefined at zero,
|
|
||||||
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
|
|
||||||
And, in phiopt we already have popcount handling that does pretty much the
|
|
||||||
same thing, except for always using a zero value rather than the one set
|
|
||||||
by C?Z_DEFINED_VALUE_AT_ZERO.
|
|
||||||
|
|
||||||
So, this patch extends that function to handle not just popcount, but also
|
|
||||||
clz and ctz.
|
|
||||||
|
|
||||||
2020-10-21 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
PR tree-optimization/97503
|
|
||||||
* tree-ssa-phiopt.c: Include internal-fn.h.
|
|
||||||
(cond_removal_in_popcount_pattern): Rename to ...
|
|
||||||
(cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just
|
|
||||||
popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/pr97503.c: New test.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------
|
|
||||||
2 files changed, 95 insertions(+), 24 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..3a3dae6c7
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
|
|
||||||
@@ -0,0 +1,19 @@
|
|
||||||
+/* PR tree-optimization/97503 */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+foo (int x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_clz (x) : 32;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+bar (unsigned long long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_clzll (x) : 64;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 0623d740d..c1e11916e 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "tree-inline.h"
|
|
||||||
#include "case-cfn-macros.h"
|
|
||||||
#include "tree-eh.h"
|
|
||||||
+#include "internal-fn.h"
|
|
||||||
|
|
||||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
|
||||||
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
|
|
||||||
@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *, tree, tree);
|
|
||||||
static bool abs_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *, tree, tree);
|
|
||||||
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
|
|
||||||
- edge, edge, gimple *, tree, tree);
|
|
||||||
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
|
||||||
+ edge, edge, gimple *,
|
|
||||||
+ tree, tree);
|
|
||||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
|
||||||
hash_set<tree> *);
|
|
||||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
|
||||||
@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
|
|
||||||
- phi, arg0, arg1))
|
|
||||||
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
|
||||||
+ e2, phi, arg0,
|
|
||||||
+ arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|
|
||||||
<bb 4>
|
|
||||||
c_12 = PHI <_9(2)>
|
|
||||||
-*/
|
|
||||||
+
|
|
||||||
+ Similarly for __builtin_clz or __builtin_ctz if
|
|
||||||
+ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
|
|
||||||
+ instead of 0 above it uses the value from that macro. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
- edge e1, edge e2,
|
|
||||||
- gimple *phi, tree arg0, tree arg1)
|
|
||||||
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
+ basic_block middle_bb,
|
|
||||||
+ edge e1, edge e2, gimple *phi,
|
|
||||||
+ tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
gimple *cond;
|
|
||||||
gimple_stmt_iterator gsi, gsi_from;
|
|
||||||
- gimple *popcount;
|
|
||||||
+ gimple *call;
|
|
||||||
gimple *cast = NULL;
|
|
||||||
tree lhs, arg;
|
|
||||||
|
|
||||||
@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
gsi_next_nondebug (&gsi);
|
|
||||||
if (!gsi_end_p (gsi))
|
|
||||||
{
|
|
||||||
- popcount = gsi_stmt (gsi);
|
|
||||||
+ call = gsi_stmt (gsi);
|
|
||||||
gsi_next_nondebug (&gsi);
|
|
||||||
if (!gsi_end_p (gsi))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- popcount = cast;
|
|
||||||
+ call = cast;
|
|
||||||
cast = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
- /* Check that we have a popcount builtin. */
|
|
||||||
- if (!is_gimple_call (popcount))
|
|
||||||
+ /* Check that we have a popcount/clz/ctz builtin. */
|
|
||||||
+ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ arg = gimple_call_arg (call, 0);
|
|
||||||
+ lhs = gimple_get_lhs (call);
|
|
||||||
+
|
|
||||||
+ if (lhs == NULL_TREE)
|
|
||||||
return false;
|
|
||||||
- combined_fn cfn = gimple_call_combined_fn (popcount);
|
|
||||||
+
|
|
||||||
+ combined_fn cfn = gimple_call_combined_fn (call);
|
|
||||||
+ internal_fn ifn = IFN_LAST;
|
|
||||||
+ int val = 0;
|
|
||||||
switch (cfn)
|
|
||||||
{
|
|
||||||
CASE_CFN_POPCOUNT:
|
|
||||||
break;
|
|
||||||
+ CASE_CFN_CLZ:
|
|
||||||
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
|
||||||
+ {
|
|
||||||
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
|
||||||
+ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
|
|
||||||
+ OPTIMIZE_FOR_BOTH)
|
|
||||||
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
|
||||||
+ {
|
|
||||||
+ ifn = IFN_CLZ;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+ CASE_CFN_CTZ:
|
|
||||||
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
|
||||||
+ {
|
|
||||||
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
|
||||||
+ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
|
|
||||||
+ OPTIMIZE_FOR_BOTH)
|
|
||||||
+ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
|
||||||
+ {
|
|
||||||
+ ifn = IFN_CTZ;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
- arg = gimple_call_arg (popcount, 0);
|
|
||||||
- lhs = gimple_get_lhs (popcount);
|
|
||||||
-
|
|
||||||
if (cast)
|
|
||||||
{
|
|
||||||
- /* We have a cast stmt feeding popcount builtin. */
|
|
||||||
+ /* We have a cast stmt feeding popcount/clz/ctz builtin. */
|
|
||||||
/* Check that we have a cast prior to that. */
|
|
||||||
if (gimple_code (cast) != GIMPLE_ASSIGN
|
|
||||||
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
|
|
||||||
@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|
|
||||||
cond = last_stmt (cond_bb);
|
|
||||||
|
|
||||||
- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
|
|
||||||
+ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
|
|
||||||
builtin. */
|
|
||||||
if (gimple_code (cond) != GIMPLE_COND
|
|
||||||
|| (gimple_cond_code (cond) != NE_EXPR
|
|
||||||
@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check PHI arguments. */
|
|
||||||
- if (lhs != arg0 || !integer_zerop (arg1))
|
|
||||||
+ if (lhs != arg0
|
|
||||||
+ || TREE_CODE (arg1) != INTEGER_CST
|
|
||||||
+ || wi::to_wide (arg1) != val)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- /* And insert the popcount builtin and cast stmt before the cond_bb. */
|
|
||||||
+ /* And insert the popcount/clz/ctz builtin and cast stmt before the
|
|
||||||
+ cond_bb. */
|
|
||||||
gsi = gsi_last_bb (cond_bb);
|
|
||||||
if (cast)
|
|
||||||
{
|
|
||||||
@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
gsi_move_before (&gsi_from, &gsi);
|
|
||||||
reset_flow_sensitive_info (gimple_get_lhs (cast));
|
|
||||||
}
|
|
||||||
- gsi_from = gsi_for_stmt (popcount);
|
|
||||||
- gsi_move_before (&gsi_from, &gsi);
|
|
||||||
- reset_flow_sensitive_info (gimple_get_lhs (popcount));
|
|
||||||
+ gsi_from = gsi_for_stmt (call);
|
|
||||||
+ if (ifn == IFN_LAST || gimple_call_internal_p (call))
|
|
||||||
+ gsi_move_before (&gsi_from, &gsi);
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
|
|
||||||
+ the latter is well defined at zero. */
|
|
||||||
+ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
|
|
||||||
+ gimple_call_set_lhs (call, lhs);
|
|
||||||
+ gsi_insert_before (&gsi, call, GSI_SAME_STMT);
|
|
||||||
+ gsi_remove (&gsi_from, true);
|
|
||||||
+ }
|
|
||||||
+ reset_flow_sensitive_info (lhs);
|
|
||||||
|
|
||||||
/* Now update the PHI and remove unneeded bbs. */
|
|
||||||
replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,69 +0,0 @@
|
|||||||
From 7d5d2ab082ce9986db4f3313013b44faa46bc412 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Thu, 22 Oct 2020 09:34:28 +0200
|
|
||||||
Subject: [PATCH 06/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
|
|
||||||
in GIMPLE fallout [PR97503]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ef2d3ec325b1b720df5da20784eba46249af2294
|
|
||||||
|
|
||||||
> this broke sparc-sun-solaris2.11 bootstrap
|
|
||||||
>
|
|
||||||
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
|
|
||||||
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
|
|
||||||
> 1858 | scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
|
||||||
> | ^~~~
|
|
||||||
>
|
|
||||||
>
|
|
||||||
> and doubtlessly several other targets that use the defaults.h definition of
|
|
||||||
>
|
|
||||||
> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) 0
|
|
||||||
|
|
||||||
Ugh, seems many of those macros do not evaluate the first argument.
|
|
||||||
This got broken by the change to direct_internal_fn_supported_p, previously
|
|
||||||
it used mode also in the optab test.
|
|
||||||
|
|
||||||
2020-10-22 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
* tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern):
|
|
||||||
For CLZ and CTZ tests, use type temporary instead of mode.
|
|
||||||
---
|
|
||||||
gcc/tree-ssa-phiopt.c | 16 ++++++++--------
|
|
||||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index c1e11916e..707a5882e 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -1836,10 +1836,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
CASE_CFN_CLZ:
|
|
||||||
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
|
||||||
{
|
|
||||||
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
|
||||||
- if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
|
|
||||||
- OPTIMIZE_FOR_BOTH)
|
|
||||||
- && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
|
||||||
+ tree type = TREE_TYPE (arg);
|
|
||||||
+ if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
|
|
||||||
+ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
|
|
||||||
+ val) == 2)
|
|
||||||
{
|
|
||||||
ifn = IFN_CLZ;
|
|
||||||
break;
|
|
||||||
@@ -1849,10 +1849,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
CASE_CFN_CTZ:
|
|
||||||
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
|
||||||
{
|
|
||||||
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
|
||||||
- if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
|
|
||||||
- OPTIMIZE_FOR_BOTH)
|
|
||||||
- && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
|
||||||
+ tree type = TREE_TYPE (arg);
|
|
||||||
+ if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
|
|
||||||
+ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
|
|
||||||
+ val) == 2)
|
|
||||||
{
|
|
||||||
ifn = IFN_CTZ;
|
|
||||||
break;
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,218 +0,0 @@
|
|||||||
From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Tue, 5 Jan 2021 16:35:22 +0100
|
|
||||||
Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31)
|
|
||||||
^ y [PR96928]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146
|
|
||||||
|
|
||||||
As requested in the PR, the one's complement abs can be done more
|
|
||||||
efficiently without cmov or branching.
|
|
||||||
|
|
||||||
Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
|
|
||||||
it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
|
|
||||||
code, but with -m64:
|
|
||||||
movl %edi, %eax
|
|
||||||
- notl %eax
|
|
||||||
- cmpl %edi, %eax
|
|
||||||
- cmovl %edi, %eax
|
|
||||||
+ sarl $31, %eax
|
|
||||||
+ xorl %edi, %eax
|
|
||||||
ret
|
|
||||||
|
|
||||||
2021-01-05 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
PR tree-optimization/96928
|
|
||||||
* tree-ssa-phiopt.c (xor_replacement): New function.
|
|
||||||
(tree_ssa_phiopt_worker): Call it.
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/pr96928.c: New test.
|
|
||||||
* gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
|
|
||||||
instead of scanning rtl dump for ifcvt message check assembly
|
|
||||||
for xor instruction.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++
|
|
||||||
2 files changed, 146 insertions(+)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..209135726
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
|
||||||
@@ -0,0 +1,38 @@
|
|
||||||
+/* PR tree-optimization/96928 */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+foo (int a)
|
|
||||||
+{
|
|
||||||
+ return a < 0 ? ~a : a;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+bar (int a, int b)
|
|
||||||
+{
|
|
||||||
+ return a < 0 ? ~b : b;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+unsigned
|
|
||||||
+baz (int a, unsigned int b)
|
|
||||||
+{
|
|
||||||
+ return a < 0 ? ~b : b;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+unsigned
|
|
||||||
+qux (int a, unsigned int c)
|
|
||||||
+{
|
|
||||||
+ return a >= 0 ? ~c : c;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+corge (int a, int b)
|
|
||||||
+{
|
|
||||||
+ return a >= 0 ? b : ~b;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 707a5882e..b9cd07a60 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *, tree, tree);
|
|
||||||
static bool abs_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *, tree, tree);
|
|
||||||
+static bool xor_replacement (basic_block, basic_block,
|
|
||||||
+ edge, edge, gimple *, tree, tree);
|
|
||||||
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *,
|
|
||||||
tree, tree);
|
|
||||||
@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
+ else if (!early_p
|
|
||||||
+ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
+ cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
|
||||||
e2, phi, arg0,
|
|
||||||
@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+xor_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
+ edge e0 ATTRIBUTE_UNUSED, edge e1,
|
|
||||||
+ gimple *phi, tree arg0, tree arg1)
|
|
||||||
+{
|
|
||||||
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ /* OTHER_BLOCK must have only one executable statement which must have the
|
|
||||||
+ form arg0 = ~arg1 or arg1 = ~arg0. */
|
|
||||||
+
|
|
||||||
+ gimple *assign = last_and_only_stmt (middle_bb);
|
|
||||||
+ /* If we did not find the proper one's complement assignment, then we cannot
|
|
||||||
+ optimize. */
|
|
||||||
+ if (assign == NULL)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ /* If we got here, then we have found the only executable statement
|
|
||||||
+ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
|
|
||||||
+ arg1 = ~arg0, then we cannot optimize. */
|
|
||||||
+ if (!is_gimple_assign (assign))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ tree lhs = gimple_assign_lhs (assign);
|
|
||||||
+ tree rhs = gimple_assign_rhs1 (assign);
|
|
||||||
+
|
|
||||||
+ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
|
|
||||||
+ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ gimple *cond = last_stmt (cond_bb);
|
|
||||||
+ tree result = PHI_RESULT (phi);
|
|
||||||
+
|
|
||||||
+ /* Only relationals comparing arg[01] against zero are interesting. */
|
|
||||||
+ enum tree_code cond_code = gimple_cond_code (cond);
|
|
||||||
+ if (cond_code != LT_EXPR && cond_code != GE_EXPR)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ /* Make sure the conditional is x OP 0. */
|
|
||||||
+ tree clhs = gimple_cond_lhs (cond);
|
|
||||||
+ if (TREE_CODE (clhs) != SSA_NAME
|
|
||||||
+ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
|
|
||||||
+ || TYPE_UNSIGNED (TREE_TYPE (clhs))
|
|
||||||
+ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
|
|
||||||
+ || !integer_zerop (gimple_cond_rhs (cond)))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ /* We need to know which is the true edge and which is the false
|
|
||||||
+ edge so that we know if have xor or inverted xor. */
|
|
||||||
+ edge true_edge, false_edge;
|
|
||||||
+ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
|
||||||
+
|
|
||||||
+ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
|
|
||||||
+ will need to invert the result. Similarly for LT_EXPR if
|
|
||||||
+ the false edge goes to OTHER_BLOCK. */
|
|
||||||
+ edge e;
|
|
||||||
+ if (cond_code == GE_EXPR)
|
|
||||||
+ e = true_edge;
|
|
||||||
+ else
|
|
||||||
+ e = false_edge;
|
|
||||||
+
|
|
||||||
+ bool invert = e->dest == middle_bb;
|
|
||||||
+
|
|
||||||
+ result = duplicate_ssa_name (result, NULL);
|
|
||||||
+
|
|
||||||
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
|
|
||||||
+
|
|
||||||
+ int prec = TYPE_PRECISION (TREE_TYPE (clhs));
|
|
||||||
+ gimple *new_stmt
|
|
||||||
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
|
|
||||||
+ build_int_cst (integer_type_node, prec - 1));
|
|
||||||
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
+
|
|
||||||
+ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
|
|
||||||
+ {
|
|
||||||
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
|
||||||
+ NOP_EXPR, gimple_assign_lhs (new_stmt));
|
|
||||||
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
+ }
|
|
||||||
+ lhs = gimple_assign_lhs (new_stmt);
|
|
||||||
+
|
|
||||||
+ if (invert)
|
|
||||||
+ {
|
|
||||||
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
|
||||||
+ BIT_NOT_EXPR, rhs);
|
|
||||||
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
+ rhs = gimple_assign_lhs (new_stmt);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
|
|
||||||
+ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
+
|
|
||||||
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
|
||||||
+
|
|
||||||
+ /* Note that we optimized this PHI. */
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Auxiliary functions to determine the set of memory accesses which
|
|
||||||
can't trap because they are preceded by accesses to the same memory
|
|
||||||
portion. We do that for MEM_REFs, so we only need to track
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,253 +0,0 @@
|
|||||||
From 96afd5b761a74e9eef40a2e843810c503c669de8 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
|
|
||||||
Date: Thu, 28 May 2020 00:31:15 +0200
|
|
||||||
Subject: [PATCH 09/35] [Backport] Add support for __builtin_bswap128
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=fe7ebef7fe4f9acb79658ed9db0749b07efc3105
|
|
||||||
|
|
||||||
This patch introduces a new builtin named __builtin_bswap128 on targets
|
|
||||||
where TImode is supported, i.e. 64-bit targets only in practice. The
|
|
||||||
implementation simply reuses the existing double word path in optab, so
|
|
||||||
no routine is added to libgcc (which means that you get two calls to
|
|
||||||
_bswapdi2 in the worst case).
|
|
||||||
|
|
||||||
gcc/ChangeLog:
|
|
||||||
|
|
||||||
* builtin-types.def (BT_UINT128): New primitive type.
|
|
||||||
(BT_FN_UINT128_UINT128): New function type.
|
|
||||||
* builtins.def (BUILT_IN_BSWAP128): New GCC builtin.
|
|
||||||
* doc/extend.texi (__builtin_bswap128): Document it.
|
|
||||||
* builtins.c (expand_builtin): Deal with BUILT_IN_BSWAP128.
|
|
||||||
(is_inexpensive_builtin): Likewise.
|
|
||||||
* fold-const-call.c (fold_const_call_ss): Likewise.
|
|
||||||
* fold-const.c (tree_call_nonnegative_warnv_p): Likewise.
|
|
||||||
* tree-ssa-ccp.c (evaluate_stmt): Likewise.
|
|
||||||
* tree-vect-stmts.c (vect_get_data_ptr_increment): Likewise.
|
|
||||||
(vectorizable_call): Likewise.
|
|
||||||
* optabs.c (expand_unop): Always use the double word path for it.
|
|
||||||
* tree-core.h (enum tree_index): Add TI_UINT128_TYPE.
|
|
||||||
* tree.h (uint128_type_node): New global type.
|
|
||||||
* tree.c (build_common_tree_nodes): Build it if TImode is supported.
|
|
||||||
|
|
||||||
gcc/testsuite/ChangeLog:
|
|
||||||
|
|
||||||
* gcc.dg/builtin-bswap-10.c: New test.
|
|
||||||
* gcc.dg/builtin-bswap-11.c: Likewise.
|
|
||||||
* gcc.dg/builtin-bswap-12.c: Likewise.
|
|
||||||
* gcc.target/i386/builtin-bswap-5.c: Likewise.
|
|
||||||
---
|
|
||||||
gcc/builtin-types.def | 4 ++++
|
|
||||||
gcc/builtins.c | 2 ++
|
|
||||||
gcc/builtins.def | 2 ++
|
|
||||||
gcc/doc/extend.texi | 10 ++++++++--
|
|
||||||
gcc/fold-const-call.c | 1 +
|
|
||||||
gcc/fold-const.c | 2 ++
|
|
||||||
gcc/optabs.c | 5 ++++-
|
|
||||||
gcc/tree-core.h | 1 +
|
|
||||||
gcc/tree-ssa-ccp.c | 1 +
|
|
||||||
gcc/tree-vect-stmts.c | 5 +++--
|
|
||||||
gcc/tree.c | 2 ++
|
|
||||||
gcc/tree.h | 1 +
|
|
||||||
12 files changed, 31 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
|
|
||||||
index c7aa691b2..c46b1bc5c 100644
|
|
||||||
--- a/gcc/builtin-types.def
|
|
||||||
+++ b/gcc/builtin-types.def
|
|
||||||
@@ -73,6 +73,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT8, unsigned_char_type_node)
|
|
||||||
DEF_PRIMITIVE_TYPE (BT_UINT16, uint16_type_node)
|
|
||||||
DEF_PRIMITIVE_TYPE (BT_UINT32, uint32_type_node)
|
|
||||||
DEF_PRIMITIVE_TYPE (BT_UINT64, uint64_type_node)
|
|
||||||
+DEF_PRIMITIVE_TYPE (BT_UINT128, uint128_type_node
|
|
||||||
+ ? uint128_type_node
|
|
||||||
+ : error_mark_node)
|
|
||||||
DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
|
|
||||||
DEF_PRIMITIVE_TYPE (BT_UNWINDWORD, (*lang_hooks.types.type_for_mode)
|
|
||||||
(targetm.unwind_word_mode (), 1))
|
|
||||||
@@ -300,6 +303,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT)
|
|
||||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16)
|
|
||||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32)
|
|
||||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64)
|
|
||||||
+DEF_FUNCTION_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128)
|
|
||||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_FLOAT, BT_UINT64, BT_FLOAT)
|
|
||||||
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
|
|
||||||
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_PTR, BT_BOOL, BT_PTR)
|
|
||||||
diff --git a/gcc/builtins.c b/gcc/builtins.c
|
|
||||||
index 10b6fd3bb..1b1c75cc1 100644
|
|
||||||
--- a/gcc/builtins.c
|
|
||||||
+++ b/gcc/builtins.c
|
|
||||||
@@ -8015,6 +8015,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
|
||||||
case BUILT_IN_BSWAP16:
|
|
||||||
case BUILT_IN_BSWAP32:
|
|
||||||
case BUILT_IN_BSWAP64:
|
|
||||||
+ case BUILT_IN_BSWAP128:
|
|
||||||
target = expand_builtin_bswap (target_mode, exp, target, subtarget);
|
|
||||||
if (target)
|
|
||||||
return target;
|
|
||||||
@@ -11732,6 +11733,7 @@ is_inexpensive_builtin (tree decl)
|
|
||||||
case BUILT_IN_BSWAP16:
|
|
||||||
case BUILT_IN_BSWAP32:
|
|
||||||
case BUILT_IN_BSWAP64:
|
|
||||||
+ case BUILT_IN_BSWAP128:
|
|
||||||
case BUILT_IN_CLZ:
|
|
||||||
case BUILT_IN_CLZIMAX:
|
|
||||||
case BUILT_IN_CLZL:
|
|
||||||
diff --git a/gcc/builtins.def b/gcc/builtins.def
|
|
||||||
index fa8b0641a..ee67ac15d 100644
|
|
||||||
--- a/gcc/builtins.def
|
|
||||||
+++ b/gcc/builtins.def
|
|
||||||
@@ -834,6 +834,8 @@ DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_L
|
|
||||||
DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST)
|
|
||||||
DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
|
|
||||||
DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
|
|
||||||
+DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST)
|
|
||||||
+
|
|
||||||
DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
|
|
||||||
/* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */
|
|
||||||
DEF_LIB_BUILTIN (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_WARN_UNUSED_RESULT_SIZE_1_2_NOTHROW_LEAF_LIST)
|
|
||||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
|
||||||
index 9c7345959..a7bd772de 100644
|
|
||||||
--- a/gcc/doc/extend.texi
|
|
||||||
+++ b/gcc/doc/extend.texi
|
|
||||||
@@ -13727,14 +13727,20 @@ exactly 8 bits.
|
|
||||||
|
|
||||||
@deftypefn {Built-in Function} uint32_t __builtin_bswap32 (uint32_t x)
|
|
||||||
Similar to @code{__builtin_bswap16}, except the argument and return types
|
|
||||||
-are 32 bit.
|
|
||||||
+are 32-bit.
|
|
||||||
@end deftypefn
|
|
||||||
|
|
||||||
@deftypefn {Built-in Function} uint64_t __builtin_bswap64 (uint64_t x)
|
|
||||||
Similar to @code{__builtin_bswap32}, except the argument and return types
|
|
||||||
-are 64 bit.
|
|
||||||
+are 64-bit.
|
|
||||||
@end deftypefn
|
|
||||||
|
|
||||||
+@deftypefn {Built-in Function} uint128_t __builtin_bswap128 (uint128_t x)
|
|
||||||
+Similar to @code{__builtin_bswap64}, except the argument and return types
|
|
||||||
+are 128-bit. Only supported on targets when 128-bit types are supported.
|
|
||||||
+@end deftypefn
|
|
||||||
+
|
|
||||||
+
|
|
||||||
@deftypefn {Built-in Function} Pmode __builtin_extend_pointer (void * x)
|
|
||||||
On targets where the user visible pointer size is smaller than the size
|
|
||||||
of an actual hardware address this function returns the extended user
|
|
||||||
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
|
|
||||||
index 6150d7ada..da01759d9 100644
|
|
||||||
--- a/gcc/fold-const-call.c
|
|
||||||
+++ b/gcc/fold-const-call.c
|
|
||||||
@@ -1032,6 +1032,7 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
|
|
||||||
case CFN_BUILT_IN_BSWAP16:
|
|
||||||
case CFN_BUILT_IN_BSWAP32:
|
|
||||||
case CFN_BUILT_IN_BSWAP64:
|
|
||||||
+ case CFN_BUILT_IN_BSWAP128:
|
|
||||||
*result = wide_int::from (arg, precision, TYPE_SIGN (arg_type)).bswap ();
|
|
||||||
return true;
|
|
||||||
|
|
||||||
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
|
|
||||||
index 6e635382f..78227a83d 100644
|
|
||||||
--- a/gcc/fold-const.c
|
|
||||||
+++ b/gcc/fold-const.c
|
|
||||||
@@ -13889,8 +13889,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1,
|
|
||||||
CASE_CFN_POPCOUNT:
|
|
||||||
CASE_CFN_CLZ:
|
|
||||||
CASE_CFN_CLRSB:
|
|
||||||
+ case CFN_BUILT_IN_BSWAP16:
|
|
||||||
case CFN_BUILT_IN_BSWAP32:
|
|
||||||
case CFN_BUILT_IN_BSWAP64:
|
|
||||||
+ case CFN_BUILT_IN_BSWAP128:
|
|
||||||
/* Always true. */
|
|
||||||
return true;
|
|
||||||
|
|
||||||
diff --git a/gcc/optabs.c b/gcc/optabs.c
|
|
||||||
index 049a18ceb..c3751fdf7 100644
|
|
||||||
--- a/gcc/optabs.c
|
|
||||||
+++ b/gcc/optabs.c
|
|
||||||
@@ -2896,8 +2896,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
|
|
||||||
if (temp)
|
|
||||||
return temp;
|
|
||||||
|
|
||||||
+ /* We do not provide a 128-bit bswap in libgcc so force the use of
|
|
||||||
+ a double bswap for 64-bit targets. */
|
|
||||||
if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
|
|
||||||
- && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
|
|
||||||
+ && (UNITS_PER_WORD == 64
|
|
||||||
+ || optab_handler (unoptab, word_mode) != CODE_FOR_nothing))
|
|
||||||
{
|
|
||||||
temp = expand_doubleword_bswap (mode, op0, target);
|
|
||||||
if (temp)
|
|
||||||
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
|
|
||||||
index eb01c2434..058e046aa 100644
|
|
||||||
--- a/gcc/tree-core.h
|
|
||||||
+++ b/gcc/tree-core.h
|
|
||||||
@@ -600,6 +600,7 @@ enum tree_index {
|
|
||||||
TI_UINT16_TYPE,
|
|
||||||
TI_UINT32_TYPE,
|
|
||||||
TI_UINT64_TYPE,
|
|
||||||
+ TI_UINT128_TYPE,
|
|
||||||
|
|
||||||
TI_VOID,
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
|
|
||||||
index 952fd9cd4..dcdf10369 100644
|
|
||||||
--- a/gcc/tree-ssa-ccp.c
|
|
||||||
+++ b/gcc/tree-ssa-ccp.c
|
|
||||||
@@ -2005,6 +2005,7 @@ evaluate_stmt (gimple *stmt)
|
|
||||||
case BUILT_IN_BSWAP16:
|
|
||||||
case BUILT_IN_BSWAP32:
|
|
||||||
case BUILT_IN_BSWAP64:
|
|
||||||
+ case BUILT_IN_BSWAP128:
|
|
||||||
val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
|
|
||||||
if (val.lattice_val == UNDEFINED)
|
|
||||||
break;
|
|
||||||
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
|
||||||
index b872cfc8d..4636b7ba2 100644
|
|
||||||
--- a/gcc/tree-vect-stmts.c
|
|
||||||
+++ b/gcc/tree-vect-stmts.c
|
|
||||||
@@ -3085,7 +3085,7 @@ vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
|
|
||||||
return iv_step;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
|
|
||||||
+/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
||||||
@@ -3454,7 +3454,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
||||||
else if (modifier == NONE
|
|
||||||
&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
|
|
||||||
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
|
|
||||||
- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
|
|
||||||
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
|
|
||||||
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
|
|
||||||
return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
|
|
||||||
vectype_in, cost_vec);
|
|
||||||
else
|
|
||||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
||||||
index 84a440b35..3e6647ae0 100644
|
|
||||||
--- a/gcc/tree.c
|
|
||||||
+++ b/gcc/tree.c
|
|
||||||
@@ -10394,6 +10394,8 @@ build_common_tree_nodes (bool signed_char)
|
|
||||||
uint16_type_node = make_or_reuse_type (16, 1);
|
|
||||||
uint32_type_node = make_or_reuse_type (32, 1);
|
|
||||||
uint64_type_node = make_or_reuse_type (64, 1);
|
|
||||||
+ if (targetm.scalar_mode_supported_p (TImode))
|
|
||||||
+ uint128_type_node = make_or_reuse_type (128, 1);
|
|
||||||
|
|
||||||
/* Decimal float types. */
|
|
||||||
if (targetm.decimal_float_supported_p ())
|
|
||||||
diff --git a/gcc/tree.h b/gcc/tree.h
|
|
||||||
index 328a2d5d2..bddc6e528 100644
|
|
||||||
--- a/gcc/tree.h
|
|
||||||
+++ b/gcc/tree.h
|
|
||||||
@@ -4035,6 +4035,7 @@ tree_strip_any_location_wrapper (tree exp)
|
|
||||||
#define uint16_type_node global_trees[TI_UINT16_TYPE]
|
|
||||||
#define uint32_type_node global_trees[TI_UINT32_TYPE]
|
|
||||||
#define uint64_type_node global_trees[TI_UINT64_TYPE]
|
|
||||||
+#define uint128_type_node global_trees[TI_UINT128_TYPE]
|
|
||||||
|
|
||||||
#define void_node global_trees[TI_VOID]
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,113 +0,0 @@
|
|||||||
From b9ac0cc69aab3c8d662d5b0a9ed43d971c13ac70 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Richard Biener <rguenther@suse.de>
|
|
||||||
Date: Fri, 29 May 2020 09:25:53 +0200
|
|
||||||
Subject: [PATCH 10/35] [Backport] tree-optimization/95393 - fold MIN/MAX_EXPR
|
|
||||||
generated by phiopt
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=07852a81f58532c63a57631d7c3757fc6bcea17d
|
|
||||||
|
|
||||||
This makes sure to fold generated stmts so they do not survive
|
|
||||||
until RTL expansion and cause awkward code generation.
|
|
||||||
|
|
||||||
2020-05-29 Richard Biener <rguenther@suse.de>
|
|
||||||
|
|
||||||
PR tree-optimization/95393
|
|
||||||
* tree-ssa-phiopt.c (minmax_replacement): Use gimple_build
|
|
||||||
to build the min/max expression so we simplify cases like
|
|
||||||
MAX(0, s) immediately.
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/phi-opt-21.c: New testcase.
|
|
||||||
* g++.dg/vect/slp-pr87105.cc: Adjust.
|
|
||||||
---
|
|
||||||
gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c | 15 +++++++++++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 25 +++++++++++-----------
|
|
||||||
3 files changed, 29 insertions(+), 13 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
|
|
||||||
index 5518f319b..d07b1cd46 100644
|
|
||||||
--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
|
|
||||||
+++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
|
|
||||||
@@ -102,4 +102,4 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
|
|
||||||
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
|
|
||||||
// It's a bit awkward to detect that all stores were vectorized but the
|
|
||||||
// following more or less does the trick
|
|
||||||
-// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
|
|
||||||
+// { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..9f3d56957
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
|
|
||||||
@@ -0,0 +1,15 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-phiopt4-details" } */
|
|
||||||
+
|
|
||||||
+int f(unsigned s)
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+ for (i = 0; i < s; ++i)
|
|
||||||
+ ;
|
|
||||||
+
|
|
||||||
+ return i;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump "converted to straightline code" "phiopt4" } } */
|
|
||||||
+/* Make sure we fold the detected MAX<s, 0>. */
|
|
||||||
+/* { dg-final { scan-tree-dump-not "MAX" "phiopt4" } } */
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index fca32222f..269eda21c 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "tree-inline.h"
|
|
||||||
#include "case-cfn-macros.h"
|
|
||||||
#include "tree-eh.h"
|
|
||||||
+#include "gimple-fold.h"
|
|
||||||
#include "internal-fn.h"
|
|
||||||
|
|
||||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
|
||||||
@@ -1414,7 +1415,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
{
|
|
||||||
tree result, type, rhs;
|
|
||||||
gcond *cond;
|
|
||||||
- gassign *new_stmt;
|
|
||||||
edge true_edge, false_edge;
|
|
||||||
enum tree_code cmp, minmax, ass_code;
|
|
||||||
tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
|
|
||||||
@@ -1738,19 +1738,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
gsi_move_before (&gsi_from, &gsi);
|
|
||||||
}
|
|
||||||
|
|
||||||
- /* Create an SSA var to hold the min/max result. If we're the only
|
|
||||||
- things setting the target PHI, then we can clone the PHI
|
|
||||||
- variable. Otherwise we must create a new one. */
|
|
||||||
- result = PHI_RESULT (phi);
|
|
||||||
- if (EDGE_COUNT (gimple_bb (phi)->preds) == 2)
|
|
||||||
- result = duplicate_ssa_name (result, NULL);
|
|
||||||
- else
|
|
||||||
- result = make_ssa_name (TREE_TYPE (result));
|
|
||||||
-
|
|
||||||
/* Emit the statement to compute min/max. */
|
|
||||||
- new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
|
|
||||||
+ gimple_seq stmts = NULL;
|
|
||||||
+ tree phi_result = PHI_RESULT (phi);
|
|
||||||
+ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
|
|
||||||
+ /* Duplicate range info if we're the only things setting the target PHI. */
|
|
||||||
+ if (!gimple_seq_empty_p (stmts)
|
|
||||||
+ && EDGE_COUNT (gimple_bb (phi)->preds) == 2
|
|
||||||
+ && !POINTER_TYPE_P (TREE_TYPE (phi_result))
|
|
||||||
+ && SSA_NAME_RANGE_INFO (phi_result))
|
|
||||||
+ duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result),
|
|
||||||
+ SSA_NAME_RANGE_INFO (phi_result));
|
|
||||||
+
|
|
||||||
gsi = gsi_last_bb (cond_bb);
|
|
||||||
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
+ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
|
|
||||||
|
|
||||||
replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,91 +0,0 @@
|
|||||||
From 9f3a8c600abe16f172b36d8113862e8f7aea940c Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Pinski <apinski@marvell.com>
|
|
||||||
Date: Sun, 16 May 2021 13:07:06 -0700
|
|
||||||
Subject: [PATCH 11/35] [Backport] Add a couple of A?CST1:CST2 match and
|
|
||||||
simplify optimizations
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b6bdd7a4cb41ee057f2d064fffcb00f23ce6b497
|
|
||||||
|
|
||||||
Instead of some of the more manual optimizations inside phi-opt,
|
|
||||||
it would be good idea to do a lot of the heavy lifting inside match
|
|
||||||
and simplify instead. In the process, this moves the three simple
|
|
||||||
A?CST1:CST2 (where CST1 or CST2 is zero) simplifications.
|
|
||||||
|
|
||||||
OK? Boostrapped and tested on x86_64-linux-gnu with no regressions.
|
|
||||||
|
|
||||||
Differences from V1:
|
|
||||||
* Use bit_xor 1 instead of bit_not to fix the problem with boolean types
|
|
||||||
which are not 1 bit precision.
|
|
||||||
|
|
||||||
Thanks,
|
|
||||||
Andrew Pinski
|
|
||||||
|
|
||||||
gcc:
|
|
||||||
* match.pd (A?CST1:CST2): Add simplifcations for A?0:+-1, A?+-1:0,
|
|
||||||
A?POW2:0 and A?0:POW2.
|
|
||||||
---
|
|
||||||
gcc/match.pd | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
1 file changed, 48 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index 660d5c268..032830b0d 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -3334,6 +3334,54 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
(if (cst1 && cst2)
|
|
||||||
(vec_cond @0 { cst1; } { cst2; })))))
|
|
||||||
|
|
||||||
+/* A few simplifications of "a ? CST1 : CST2". */
|
|
||||||
+/* NOTE: Only do this on gimple as the if-chain-to-switch
|
|
||||||
+ optimization depends on the gimple to have if statements in it. */
|
|
||||||
+#if GIMPLE
|
|
||||||
+(simplify
|
|
||||||
+ (cond @0 INTEGER_CST@1 INTEGER_CST@2)
|
|
||||||
+ (switch
|
|
||||||
+ (if (integer_zerop (@2))
|
|
||||||
+ (switch
|
|
||||||
+ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
|
|
||||||
+ (if (integer_onep (@1))
|
|
||||||
+ (convert (convert:boolean_type_node @0)))
|
|
||||||
+ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
|
|
||||||
+ (with {
|
|
||||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
|
|
||||||
+ }
|
|
||||||
+ (lshift (convert (convert:boolean_type_node @0)) { shift; })))
|
|
||||||
+ /* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1
|
|
||||||
+ here as the powerof2cst case above will handle that case correctly. */
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
|
|
||||||
+ (negate (convert (convert:boolean_type_node @0))))))
|
|
||||||
+ (if (integer_zerop (@1))
|
|
||||||
+ (with {
|
|
||||||
+ tree booltrue = constant_boolean_node (true, boolean_type_node);
|
|
||||||
+ }
|
|
||||||
+ (switch
|
|
||||||
+ /* a ? 0 : 1 -> !a. */
|
|
||||||
+ (if (integer_onep (@2))
|
|
||||||
+ (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
|
|
||||||
+ /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
|
|
||||||
+ (with {
|
|
||||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
|
||||||
+ }
|
|
||||||
+ (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))
|
|
||||||
+ { shift; })))
|
|
||||||
+ /* a ? -1 : 0 -> -(!a). No need to check the TYPE_PRECISION not being 1
|
|
||||||
+ here as the powerof2cst case above will handle that case correctly. */
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
|
|
||||||
+ (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))))
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+)
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
|
||||||
be extended. */
|
|
||||||
/* This pattern implements two kinds simplification:
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,155 +0,0 @@
|
|||||||
From 4352b952ba24c413697fcfc191d06165a8a31ced Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Pinski <apinski@marvell.com>
|
|
||||||
Date: Sat, 22 May 2021 19:49:50 +0000
|
|
||||||
Subject: [PATCH 12/35] [Backport] Optimize x < 0 ? ~y : y to (x >> 31) ^ y in
|
|
||||||
match.pd
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1fd76b24306ed4df4cf9e797d900699ed59ce7f7
|
|
||||||
|
|
||||||
This copies the optimization that is done in phiopt for
|
|
||||||
"x < 0 ? ~y : y to (x >> 31) ^ y" into match.pd. The code
|
|
||||||
for phiopt is kept around until phiopt uses match.pd (which
|
|
||||||
I am working towards).
|
|
||||||
|
|
||||||
Note the original testcase is now optimized early on and I added a
|
|
||||||
new testcase to optimize during phiopt.
|
|
||||||
|
|
||||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
|
||||||
|
|
||||||
Thanks,
|
|
||||||
Andrew Pinski
|
|
||||||
|
|
||||||
Differences from v1:
|
|
||||||
V2: Add check for integeral type to make sure vector types are not done.
|
|
||||||
|
|
||||||
gcc:
|
|
||||||
* match.pd (x < 0 ? ~y : y): New patterns.
|
|
||||||
|
|
||||||
gcc/testsuite:
|
|
||||||
* gcc.dg/tree-ssa/pr96928.c: Update test for slightly different IR.
|
|
||||||
* gcc.dg/tree-ssa/pr96928-1.c: New testcase.
|
|
||||||
---
|
|
||||||
gcc/match.pd | 32 +++++++++++++++
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 48 +++++++++++++++++++++++
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 7 +++-
|
|
||||||
3 files changed, 85 insertions(+), 2 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
|
||||||
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index 032830b0d..5899eea95 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -4390,6 +4390,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
(cmp (bit_and@2 @0 integer_pow2p@1) @1)
|
|
||||||
(icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
|
|
||||||
|
|
||||||
+(for cmp (ge lt)
|
|
||||||
+/* x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
|
|
||||||
+/* x >= 0 ? ~y : y into ~((x >> (prec-1)) ^ y). */
|
|
||||||
+ (simplify
|
|
||||||
+ (cond (cmp @0 integer_zerop) (bit_not @1) @1)
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type)
|
|
||||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
|
||||||
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
|
|
||||||
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
|
|
||||||
+ (with
|
|
||||||
+ {
|
|
||||||
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
|
|
||||||
+ }
|
|
||||||
+ (if (cmp == LT_EXPR)
|
|
||||||
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
|
|
||||||
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1))))))
|
|
||||||
+/* x < 0 ? y : ~y into ~((x >> (prec-1)) ^ y). */
|
|
||||||
+/* x >= 0 ? y : ~y into (x >> (prec-1)) ^ y. */
|
|
||||||
+ (simplify
|
|
||||||
+ (cond (cmp @0 integer_zerop) @1 (bit_not @1))
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type)
|
|
||||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
|
||||||
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
|
|
||||||
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
|
|
||||||
+ (with
|
|
||||||
+ {
|
|
||||||
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
|
|
||||||
+ }
|
|
||||||
+ (if (cmp == GE_EXPR)
|
|
||||||
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
|
|
||||||
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1)))))))
|
|
||||||
+
|
|
||||||
/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2,
|
|
||||||
convert this into a shift followed by ANDing with D. */
|
|
||||||
(simplify
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..a2770e5e8
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
|
||||||
@@ -0,0 +1,48 @@
|
|
||||||
+/* PR tree-optimization/96928 */
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+foo (int a)
|
|
||||||
+{
|
|
||||||
+ if (a < 0)
|
|
||||||
+ return ~a;
|
|
||||||
+ return a;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+bar (int a, int b)
|
|
||||||
+{
|
|
||||||
+ if (a < 0)
|
|
||||||
+ return ~b;
|
|
||||||
+ return b;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+unsigned
|
|
||||||
+baz (int a, unsigned int b)
|
|
||||||
+{
|
|
||||||
+ if (a < 0)
|
|
||||||
+ return ~b;
|
|
||||||
+ return b;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+unsigned
|
|
||||||
+qux (int a, unsigned int c)
|
|
||||||
+{
|
|
||||||
+ if (a >= 0)
|
|
||||||
+ return ~c;
|
|
||||||
+ return c;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+corge (int a, int b)
|
|
||||||
+{
|
|
||||||
+ if (a >= 0)
|
|
||||||
+ return b;
|
|
||||||
+ return ~b;
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
|
||||||
index 209135726..e8fd82fc2 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
|
||||||
@@ -1,8 +1,11 @@
|
|
||||||
/* PR tree-optimization/96928 */
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */
|
|
||||||
/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
|
||||||
-/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
|
||||||
+/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b)
|
|
||||||
+ and in the case of match.pd optimizing these ?:, the ~ is moved out already
|
|
||||||
+ by the time we get to phiopt2. */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "\\\^ c_\[0-9]*\\\(D\\\);" 1 "optimized" } } */
|
|
||||||
/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
|
||||||
/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
|
||||||
/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,249 +0,0 @@
|
|||||||
From 406071e8c1838c824f06c35ef3cf9419aa543e6e Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Pinski <apinski@marvell.com>
|
|
||||||
Date: Tue, 1 Jun 2021 01:05:09 +0000
|
|
||||||
Subject: [PATCH 13/35] [Backport] Replace conditional_replacement with match
|
|
||||||
and simplify
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9f55df63154a39d67ef5b24def7044bf87300831
|
|
||||||
|
|
||||||
This is the first of series of patches to simplify phi-opt
|
|
||||||
to use match and simplify in many cases. This simplification
|
|
||||||
will more things to optimize.
|
|
||||||
|
|
||||||
This is what Richard requested in
|
|
||||||
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html
|
|
||||||
and I think it is the right thing to do too.
|
|
||||||
|
|
||||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
|
||||||
|
|
||||||
gcc/ChangeLog:
|
|
||||||
|
|
||||||
PR tree-optimization/25290
|
|
||||||
* tree-ssa-phiopt.c (match_simplify_replacement):
|
|
||||||
New function.
|
|
||||||
(tree_ssa_phiopt_worker): Use match_simplify_replacement.
|
|
||||||
(two_value_replacement): Change the comment about
|
|
||||||
conditional_replacement.
|
|
||||||
(conditional_replacement): Delete.
|
|
||||||
---
|
|
||||||
gcc/tree-ssa-phiopt.c | 144 ++++++++++++------------------------------
|
|
||||||
1 file changed, 39 insertions(+), 105 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 269eda21c..9fa6363b6 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -52,8 +52,8 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
|
||||||
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
|
|
||||||
tree, tree);
|
|
||||||
-static bool conditional_replacement (basic_block, basic_block,
|
|
||||||
- edge, edge, gphi *, tree, tree);
|
|
||||||
+static bool match_simplify_replacement (basic_block, basic_block,
|
|
||||||
+ edge, edge, gphi *, tree, tree);
|
|
||||||
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
|
|
||||||
gimple *);
|
|
||||||
static int value_replacement (basic_block, basic_block,
|
|
||||||
@@ -349,8 +349,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
- && conditional_replacement (bb, bb1, e1, e2, phi,
|
|
||||||
- arg0, arg1))
|
|
||||||
+ && match_simplify_replacement (bb, bb1, e1, e2, phi,
|
|
||||||
+ arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
@@ -662,7 +662,7 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
|
|
||||||
- conditional_replacement. */
|
|
||||||
+ match_simplify_replacement. */
|
|
||||||
if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|
|
||||||
&& (integer_zerop (arg0)
|
|
||||||
|| integer_zerop (arg1)
|
|
||||||
@@ -763,137 +763,71 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* The function conditional_replacement does the main work of doing the
|
|
||||||
- conditional replacement. Return true if the replacement is done.
|
|
||||||
+/* The function match_simplify_replacement does the main work of doing the
|
|
||||||
+ replacement using match and simplify. Return true if the replacement is done.
|
|
||||||
Otherwise return false.
|
|
||||||
BB is the basic block where the replacement is going to be done on. ARG0
|
|
||||||
is argument 0 from PHI. Likewise for ARG1. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
-conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
- edge e0, edge e1, gphi *phi,
|
|
||||||
- tree arg0, tree arg1)
|
|
||||||
+match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
+ edge e0, edge e1, gphi *phi,
|
|
||||||
+ tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
- tree result;
|
|
||||||
gimple *stmt;
|
|
||||||
- gassign *new_stmt;
|
|
||||||
tree cond;
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
edge true_edge, false_edge;
|
|
||||||
- tree new_var, new_var2;
|
|
||||||
- bool neg = false;
|
|
||||||
- int shift = 0;
|
|
||||||
- tree nonzero_arg;
|
|
||||||
-
|
|
||||||
- /* FIXME: Gimplification of complex type is too hard for now. */
|
|
||||||
- /* We aren't prepared to handle vectors either (and it is a question
|
|
||||||
- if it would be worthwhile anyway). */
|
|
||||||
- if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
|
|
||||||
- || POINTER_TYPE_P (TREE_TYPE (arg0)))
|
|
||||||
- || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
|
|
||||||
- || POINTER_TYPE_P (TREE_TYPE (arg1))))
|
|
||||||
- return false;
|
|
||||||
+ gimple_seq seq = NULL;
|
|
||||||
+ tree result;
|
|
||||||
|
|
||||||
- /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
|
|
||||||
- 0 and (1 << cst), then convert it to the conditional. */
|
|
||||||
- if (integer_zerop (arg0))
|
|
||||||
- nonzero_arg = arg1;
|
|
||||||
- else if (integer_zerop (arg1))
|
|
||||||
- nonzero_arg = arg0;
|
|
||||||
- else
|
|
||||||
- return false;
|
|
||||||
- if (integer_pow2p (nonzero_arg))
|
|
||||||
- {
|
|
||||||
- shift = tree_log2 (nonzero_arg);
|
|
||||||
- if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
|
||||||
- return false;
|
|
||||||
- }
|
|
||||||
- else if (integer_all_onesp (nonzero_arg))
|
|
||||||
- neg = true;
|
|
||||||
- else
|
|
||||||
+ if (!empty_block_p (middle_bb))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- if (!empty_block_p (middle_bb))
|
|
||||||
+ /* Special case A ? B : B as this will always simplify to B. */
|
|
||||||
+ if (operand_equal_for_phi_arg_p (arg0, arg1))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- /* At this point we know we have a GIMPLE_COND with two successors.
|
|
||||||
+ /* At this point we know we have a GIMPLE_COND with two successors.
|
|
||||||
One successor is BB, the other successor is an empty block which
|
|
||||||
falls through into BB.
|
|
||||||
|
|
||||||
- There is a single PHI node at the join point (BB) and its arguments
|
|
||||||
- are constants (0, 1) or (0, -1) or (0, (1 << shift)).
|
|
||||||
-
|
|
||||||
- So, given the condition COND, and the two PHI arguments, we can
|
|
||||||
- rewrite this PHI into non-branching code:
|
|
||||||
+ There is a single PHI node at the join point (BB).
|
|
||||||
|
|
||||||
- dest = (COND) or dest = COND' or dest = (COND) << shift
|
|
||||||
-
|
|
||||||
- We use the condition as-is if the argument associated with the
|
|
||||||
- true edge has the value one or the argument associated with the
|
|
||||||
- false edge as the value zero. Note that those conditions are not
|
|
||||||
- the same since only one of the outgoing edges from the GIMPLE_COND
|
|
||||||
- will directly reach BB and thus be associated with an argument. */
|
|
||||||
+ So, given the condition COND, and the two PHI arguments, match and simplify
|
|
||||||
+ can happen on (COND) ? arg0 : arg1. */
|
|
||||||
|
|
||||||
stmt = last_stmt (cond_bb);
|
|
||||||
- result = PHI_RESULT (phi);
|
|
||||||
|
|
||||||
/* To handle special cases like floating point comparison, it is easier and
|
|
||||||
less error-prone to build a tree and gimplify it on the fly though it is
|
|
||||||
- less efficient. */
|
|
||||||
- cond = fold_build2_loc (gimple_location (stmt),
|
|
||||||
- gimple_cond_code (stmt), boolean_type_node,
|
|
||||||
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
|
|
||||||
+ less efficient.
|
|
||||||
+ Don't use fold_build2 here as that might create (bool)a instead of just
|
|
||||||
+ "a != 0". */
|
|
||||||
+ cond = build2_loc (gimple_location (stmt),
|
|
||||||
+ gimple_cond_code (stmt), boolean_type_node,
|
|
||||||
+ gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
|
|
||||||
|
|
||||||
/* We need to know which is the true edge and which is the false
|
|
||||||
edge so that we know when to invert the condition below. */
|
|
||||||
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
|
||||||
- if ((e0 == true_edge && integer_zerop (arg0))
|
|
||||||
- || (e0 == false_edge && !integer_zerop (arg0))
|
|
||||||
- || (e1 == true_edge && integer_zerop (arg1))
|
|
||||||
- || (e1 == false_edge && !integer_zerop (arg1)))
|
|
||||||
- cond = fold_build1_loc (gimple_location (stmt),
|
|
||||||
- TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
|
|
||||||
-
|
|
||||||
- if (neg)
|
|
||||||
- {
|
|
||||||
- cond = fold_convert_loc (gimple_location (stmt),
|
|
||||||
- TREE_TYPE (result), cond);
|
|
||||||
- cond = fold_build1_loc (gimple_location (stmt),
|
|
||||||
- NEGATE_EXPR, TREE_TYPE (cond), cond);
|
|
||||||
- }
|
|
||||||
- else if (shift)
|
|
||||||
- {
|
|
||||||
- cond = fold_convert_loc (gimple_location (stmt),
|
|
||||||
- TREE_TYPE (result), cond);
|
|
||||||
- cond = fold_build2_loc (gimple_location (stmt),
|
|
||||||
- LSHIFT_EXPR, TREE_TYPE (cond), cond,
|
|
||||||
- build_int_cst (integer_type_node, shift));
|
|
||||||
- }
|
|
||||||
+ if (e1 == true_edge || e0 == false_edge)
|
|
||||||
+ std::swap (arg0, arg1);
|
|
||||||
|
|
||||||
- /* Insert our new statements at the end of conditional block before the
|
|
||||||
- COND_STMT. */
|
|
||||||
- gsi = gsi_for_stmt (stmt);
|
|
||||||
- new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
|
|
||||||
- GSI_SAME_STMT);
|
|
||||||
+ tree type = TREE_TYPE (gimple_phi_result (phi));
|
|
||||||
+ result = gimple_simplify (COND_EXPR, type,
|
|
||||||
+ cond,
|
|
||||||
+ arg0, arg1,
|
|
||||||
+ &seq, NULL);
|
|
||||||
+ if (!result)
|
|
||||||
+ return false;
|
|
||||||
|
|
||||||
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
|
|
||||||
- {
|
|
||||||
- location_t locus_0, locus_1;
|
|
||||||
+ gsi = gsi_last_bb (cond_bb);
|
|
||||||
|
|
||||||
- new_var2 = make_ssa_name (TREE_TYPE (result));
|
|
||||||
- new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
|
|
||||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
- new_var = new_var2;
|
|
||||||
-
|
|
||||||
- /* Set the locus to the first argument, unless is doesn't have one. */
|
|
||||||
- locus_0 = gimple_phi_arg_location (phi, 0);
|
|
||||||
- locus_1 = gimple_phi_arg_location (phi, 1);
|
|
||||||
- if (locus_0 == UNKNOWN_LOCATION)
|
|
||||||
- locus_0 = locus_1;
|
|
||||||
- gimple_set_location (new_stmt, locus_0);
|
|
||||||
- }
|
|
||||||
+ if (seq)
|
|
||||||
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
|
||||||
|
|
||||||
- replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
|
|
||||||
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
|
||||||
|
|
||||||
/* Note that we optimized this PHI. */
|
|
||||||
return true;
|
|
||||||
@@ -3905,7 +3839,7 @@ gate_hoist_loads (void)
|
|
||||||
Conditional Replacement
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
- This transformation, implemented in conditional_replacement,
|
|
||||||
+ This transformation, implemented in match_simplify_replacement,
|
|
||||||
replaces
|
|
||||||
|
|
||||||
bb0:
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,174 +0,0 @@
|
|||||||
From fabbe6ccc798d3cb097c6371b4d53cd6dfde6c7c Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Pinski <apinski@marvell.com>
|
|
||||||
Date: Fri, 11 Jun 2021 13:21:34 -0700
|
|
||||||
Subject: [PATCH 14/35] [Backport] Allow match-and-simplified phiopt to run in
|
|
||||||
early phiopt
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cd48e550d1dc58307ab1c0ab490745673f748ccc
|
|
||||||
|
|
||||||
To move a few things more to match-and-simplify from phiopt,
|
|
||||||
we need to allow match_simplify_replacement to run in early
|
|
||||||
phiopt. To do this we add a replacement for gimple_simplify
|
|
||||||
that is explictly for phiopt.
|
|
||||||
|
|
||||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no
|
|
||||||
regressions.
|
|
||||||
|
|
||||||
gcc/ChangeLog:
|
|
||||||
|
|
||||||
* tree-ssa-phiopt.c (match_simplify_replacement):
|
|
||||||
Add early_p argument. Call gimple_simplify_phiopt
|
|
||||||
instead of gimple_simplify.
|
|
||||||
(tree_ssa_phiopt_worker): Update call to
|
|
||||||
match_simplify_replacement and allow unconditionally.
|
|
||||||
(phiopt_early_allow): New function.
|
|
||||||
(gimple_simplify_phiopt): New function.
|
|
||||||
---
|
|
||||||
gcc/tree-ssa-phiopt.c | 89 ++++++++++++++++++++++++++++++++++---------
|
|
||||||
1 file changed, 70 insertions(+), 19 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 9fa6363b6..92aeb8415 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -48,12 +48,13 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "tree-eh.h"
|
|
||||||
#include "gimple-fold.h"
|
|
||||||
#include "internal-fn.h"
|
|
||||||
+#include "gimple-match.h"
|
|
||||||
|
|
||||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
|
||||||
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
|
|
||||||
tree, tree);
|
|
||||||
static bool match_simplify_replacement (basic_block, basic_block,
|
|
||||||
- edge, edge, gphi *, tree, tree);
|
|
||||||
+ edge, edge, gphi *, tree, tree, bool);
|
|
||||||
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
|
|
||||||
gimple *);
|
|
||||||
static int value_replacement (basic_block, basic_block,
|
|
||||||
@@ -348,9 +349,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
/* Do the replacement of conditional if it can be done. */
|
|
||||||
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
- else if (!early_p
|
|
||||||
- && match_simplify_replacement (bb, bb1, e1, e2, phi,
|
|
||||||
- arg0, arg1))
|
|
||||||
+ else if (match_simplify_replacement (bb, bb1, e1, e2, phi,
|
|
||||||
+ arg0, arg1,
|
|
||||||
+ early_p))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
@@ -763,6 +764,67 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Return TRUE if CODE should be allowed during early phiopt.
|
|
||||||
+ Currently this is to allow MIN/MAX and ABS/NEGATE. */
|
|
||||||
+static bool
|
|
||||||
+phiopt_early_allow (enum tree_code code)
|
|
||||||
+{
|
|
||||||
+ switch (code)
|
|
||||||
+ {
|
|
||||||
+ case MIN_EXPR:
|
|
||||||
+ case MAX_EXPR:
|
|
||||||
+ case ABS_EXPR:
|
|
||||||
+ case ABSU_EXPR:
|
|
||||||
+ case NEGATE_EXPR:
|
|
||||||
+ case SSA_NAME:
|
|
||||||
+ return true;
|
|
||||||
+ default:
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT.
|
|
||||||
+ Return NULL if nothing can be simplified or the resulting simplified value
|
|
||||||
+ with parts pushed if EARLY_P was true. Also rejects non allowed tree code
|
|
||||||
+ if EARLY_P is set.
|
|
||||||
+ Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries
|
|
||||||
+ to simplify CMP ? ARG0 : ARG1. */
|
|
||||||
+static tree
|
|
||||||
+gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt,
|
|
||||||
+ tree arg0, tree arg1,
|
|
||||||
+ gimple_seq *seq)
|
|
||||||
+{
|
|
||||||
+ tree result;
|
|
||||||
+ enum tree_code comp_code = gimple_cond_code (comp_stmt);
|
|
||||||
+ location_t loc = gimple_location (comp_stmt);
|
|
||||||
+ tree cmp0 = gimple_cond_lhs (comp_stmt);
|
|
||||||
+ tree cmp1 = gimple_cond_rhs (comp_stmt);
|
|
||||||
+ /* To handle special cases like floating point comparison, it is easier and
|
|
||||||
+ less error-prone to build a tree and gimplify it on the fly though it is
|
|
||||||
+ less efficient.
|
|
||||||
+ Don't use fold_build2 here as that might create (bool)a instead of just
|
|
||||||
+ "a != 0". */
|
|
||||||
+ tree cond = build2_loc (loc, comp_code, boolean_type_node,
|
|
||||||
+ cmp0, cmp1);
|
|
||||||
+ gimple_match_op op (gimple_match_cond::UNCOND,
|
|
||||||
+ COND_EXPR, type, cond, arg0, arg1);
|
|
||||||
+
|
|
||||||
+ if (op.resimplify (early_p ? NULL : seq, follow_all_ssa_edges))
|
|
||||||
+ {
|
|
||||||
+ /* Early we want only to allow some generated tree codes. */
|
|
||||||
+ if (!early_p
|
|
||||||
+ || op.code.is_tree_code ()
|
|
||||||
+ || phiopt_early_allow ((tree_code)op.code))
|
|
||||||
+ {
|
|
||||||
+ result = maybe_push_res_to_seq (&op, seq);
|
|
||||||
+ if (result)
|
|
||||||
+ return result;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return NULL;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* The function match_simplify_replacement does the main work of doing the
|
|
||||||
replacement using match and simplify. Return true if the replacement is done.
|
|
||||||
Otherwise return false.
|
|
||||||
@@ -772,10 +834,9 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
static bool
|
|
||||||
match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
edge e0, edge e1, gphi *phi,
|
|
||||||
- tree arg0, tree arg1)
|
|
||||||
+ tree arg0, tree arg1, bool early_p)
|
|
||||||
{
|
|
||||||
gimple *stmt;
|
|
||||||
- tree cond;
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
edge true_edge, false_edge;
|
|
||||||
gimple_seq seq = NULL;
|
|
||||||
@@ -799,15 +860,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|
|
||||||
stmt = last_stmt (cond_bb);
|
|
||||||
|
|
||||||
- /* To handle special cases like floating point comparison, it is easier and
|
|
||||||
- less error-prone to build a tree and gimplify it on the fly though it is
|
|
||||||
- less efficient.
|
|
||||||
- Don't use fold_build2 here as that might create (bool)a instead of just
|
|
||||||
- "a != 0". */
|
|
||||||
- cond = build2_loc (gimple_location (stmt),
|
|
||||||
- gimple_cond_code (stmt), boolean_type_node,
|
|
||||||
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
|
|
||||||
-
|
|
||||||
/* We need to know which is the true edge and which is the false
|
|
||||||
edge so that we know when to invert the condition below. */
|
|
||||||
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
|
||||||
@@ -815,10 +867,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
std::swap (arg0, arg1);
|
|
||||||
|
|
||||||
tree type = TREE_TYPE (gimple_phi_result (phi));
|
|
||||||
- result = gimple_simplify (COND_EXPR, type,
|
|
||||||
- cond,
|
|
||||||
- arg0, arg1,
|
|
||||||
- &seq, NULL);
|
|
||||||
+ result = gimple_simplify_phiopt (early_p, type, stmt,
|
|
||||||
+ arg0, arg1,
|
|
||||||
+ &seq);
|
|
||||||
if (!result)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,259 +0,0 @@
|
|||||||
From d212d216be0752370dbe7bc63bd75b3a9249e0b5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Andrew Pinski <apinski@marvell.com>
|
|
||||||
Date: Tue, 1 Jun 2021 06:48:05 +0000
|
|
||||||
Subject: [PATCH 15/35] [Backport] Improve match_simplify_replacement in
|
|
||||||
phi-opt
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791
|
|
||||||
|
|
||||||
This improves match_simplify_replace in phi-opt to handle the
|
|
||||||
case where there is one cheap (non-call) preparation statement in the
|
|
||||||
middle basic block similar to xor_replacement and others.
|
|
||||||
This allows to remove xor_replacement which it does too.
|
|
||||||
|
|
||||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
|
||||||
|
|
||||||
Thanks,
|
|
||||||
Andrew Pinski
|
|
||||||
|
|
||||||
Changes since v1:
|
|
||||||
v3 - Just minor changes to using gimple_assign_lhs
|
|
||||||
instead of gimple_lhs and fixing a comment.
|
|
||||||
v2 - change the check on the preparation statement to
|
|
||||||
allow only assignments and no calls and only assignments
|
|
||||||
that feed into the phi.
|
|
||||||
|
|
||||||
gcc/ChangeLog:
|
|
||||||
|
|
||||||
PR tree-optimization/25290
|
|
||||||
* tree-ssa-phiopt.c (xor_replacement): Delete.
|
|
||||||
(tree_ssa_phiopt_worker): Delete use of xor_replacement.
|
|
||||||
(match_simplify_replacement): Allow one cheap preparation
|
|
||||||
statement that can be moved to before the if.
|
|
||||||
|
|
||||||
gcc/testsuite/ChangeLog:
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~
|
|
||||||
happens on the outside of the bit_xor.
|
|
||||||
---
|
|
||||||
gcc/tree-ssa-phiopt.c | 164 ++++++++++++++----------------------------
|
|
||||||
1 file changed, 52 insertions(+), 112 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 92aeb8415..51a2d3684 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
#include "cfghooks.h"
|
|
||||||
#include "tree-pass.h"
|
|
||||||
#include "ssa.h"
|
|
||||||
+#include "tree-ssa.h"
|
|
||||||
#include "optabs-tree.h"
|
|
||||||
#include "insn-config.h"
|
|
||||||
#include "gimple-pretty-print.h"
|
|
||||||
@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *, tree, tree);
|
|
||||||
static bool abs_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple *, tree, tree);
|
|
||||||
-static bool xor_replacement (basic_block, basic_block,
|
|
||||||
- edge, edge, gimple *, tree, tree);
|
|
||||||
static bool spaceship_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gphi *, tree, tree);
|
|
||||||
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
|
||||||
@@ -355,9 +354,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
- else if (!early_p
|
|
||||||
- && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
- cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
|
||||||
e2, phi, arg0,
|
|
||||||
@@ -841,14 +837,51 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
edge true_edge, false_edge;
|
|
||||||
gimple_seq seq = NULL;
|
|
||||||
tree result;
|
|
||||||
-
|
|
||||||
- if (!empty_block_p (middle_bb))
|
|
||||||
- return false;
|
|
||||||
+ gimple *stmt_to_move = NULL;
|
|
||||||
|
|
||||||
/* Special case A ? B : B as this will always simplify to B. */
|
|
||||||
if (operand_equal_for_phi_arg_p (arg0, arg1))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
+ /* If the basic block only has a cheap preparation statement,
|
|
||||||
+ allow it and move it once the transformation is done. */
|
|
||||||
+ if (!empty_block_p (middle_bb))
|
|
||||||
+ {
|
|
||||||
+ stmt_to_move = last_and_only_stmt (middle_bb);
|
|
||||||
+ if (!stmt_to_move)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ if (gimple_vuse (stmt_to_move))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ if (gimple_could_trap_p (stmt_to_move)
|
|
||||||
+ || gimple_has_side_effects (stmt_to_move))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ if (gimple_uses_undefined_value_p (stmt_to_move))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ /* Allow assignments and not no calls.
|
|
||||||
+ As const calls don't match any of the above, yet they could
|
|
||||||
+ still have some side-effects - they could contain
|
|
||||||
+ gimple_could_trap_p statements, like floating point
|
|
||||||
+ exceptions or integer division by zero. See PR70586.
|
|
||||||
+ FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
|
|
||||||
+ should handle this. */
|
|
||||||
+ if (!is_gimple_assign (stmt_to_move))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ tree lhs = gimple_assign_lhs (stmt_to_move);
|
|
||||||
+ gimple *use_stmt;
|
|
||||||
+ use_operand_p use_p;
|
|
||||||
+
|
|
||||||
+ /* Allow only a statement which feeds into the phi. */
|
|
||||||
+ if (!lhs || TREE_CODE (lhs) != SSA_NAME
|
|
||||||
+ || !single_imm_use (lhs, &use_p, &use_stmt)
|
|
||||||
+ || use_stmt != phi)
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
/* At this point we know we have a GIMPLE_COND with two successors.
|
|
||||||
One successor is BB, the other successor is an empty block which
|
|
||||||
falls through into BB.
|
|
||||||
@@ -874,7 +907,17 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return false;
|
|
||||||
|
|
||||||
gsi = gsi_last_bb (cond_bb);
|
|
||||||
-
|
|
||||||
+ if (stmt_to_move)
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "statement un-sinked:\n");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt_to_move, 0,
|
|
||||||
+ TDF_VOPS|TDF_MEMSYMS);
|
|
||||||
+ }
|
|
||||||
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
|
|
||||||
+ gsi_move_before (&gsi1, &gsi);
|
|
||||||
+ }
|
|
||||||
if (seq)
|
|
||||||
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
|
||||||
|
|
||||||
@@ -2474,109 +2517,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
|
|
||||||
-
|
|
||||||
-static bool
|
|
||||||
-xor_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
- edge e0 ATTRIBUTE_UNUSED, edge e1,
|
|
||||||
- gimple *phi, tree arg0, tree arg1)
|
|
||||||
-{
|
|
||||||
- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- /* OTHER_BLOCK must have only one executable statement which must have the
|
|
||||||
- form arg0 = ~arg1 or arg1 = ~arg0. */
|
|
||||||
-
|
|
||||||
- gimple *assign = last_and_only_stmt (middle_bb);
|
|
||||||
- /* If we did not find the proper one's complement assignment, then we cannot
|
|
||||||
- optimize. */
|
|
||||||
- if (assign == NULL)
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- /* If we got here, then we have found the only executable statement
|
|
||||||
- in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
|
|
||||||
- arg1 = ~arg0, then we cannot optimize. */
|
|
||||||
- if (!is_gimple_assign (assign))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- tree lhs = gimple_assign_lhs (assign);
|
|
||||||
- tree rhs = gimple_assign_rhs1 (assign);
|
|
||||||
-
|
|
||||||
- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
|
|
||||||
- if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- gimple *cond = last_stmt (cond_bb);
|
|
||||||
- tree result = PHI_RESULT (phi);
|
|
||||||
-
|
|
||||||
- /* Only relationals comparing arg[01] against zero are interesting. */
|
|
||||||
- enum tree_code cond_code = gimple_cond_code (cond);
|
|
||||||
- if (cond_code != LT_EXPR && cond_code != GE_EXPR)
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- /* Make sure the conditional is x OP 0. */
|
|
||||||
- tree clhs = gimple_cond_lhs (cond);
|
|
||||||
- if (TREE_CODE (clhs) != SSA_NAME
|
|
||||||
- || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
|
|
||||||
- || TYPE_UNSIGNED (TREE_TYPE (clhs))
|
|
||||||
- || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
|
|
||||||
- || !integer_zerop (gimple_cond_rhs (cond)))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
- /* We need to know which is the true edge and which is the false
|
|
||||||
- edge so that we know if have xor or inverted xor. */
|
|
||||||
- edge true_edge, false_edge;
|
|
||||||
- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
|
||||||
-
|
|
||||||
- /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
|
|
||||||
- will need to invert the result. Similarly for LT_EXPR if
|
|
||||||
- the false edge goes to OTHER_BLOCK. */
|
|
||||||
- edge e;
|
|
||||||
- if (cond_code == GE_EXPR)
|
|
||||||
- e = true_edge;
|
|
||||||
- else
|
|
||||||
- e = false_edge;
|
|
||||||
-
|
|
||||||
- bool invert = e->dest == middle_bb;
|
|
||||||
-
|
|
||||||
- result = duplicate_ssa_name (result, NULL);
|
|
||||||
-
|
|
||||||
- gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
|
|
||||||
-
|
|
||||||
- int prec = TYPE_PRECISION (TREE_TYPE (clhs));
|
|
||||||
- gimple *new_stmt
|
|
||||||
- = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
|
|
||||||
- build_int_cst (integer_type_node, prec - 1));
|
|
||||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
-
|
|
||||||
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
|
|
||||||
- {
|
|
||||||
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
|
||||||
- NOP_EXPR, gimple_assign_lhs (new_stmt));
|
|
||||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
- }
|
|
||||||
- lhs = gimple_assign_lhs (new_stmt);
|
|
||||||
-
|
|
||||||
- if (invert)
|
|
||||||
- {
|
|
||||||
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
|
||||||
- BIT_NOT_EXPR, rhs);
|
|
||||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
|
||||||
- rhs = gimple_assign_lhs (new_stmt);
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
|
|
||||||
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
-
|
|
||||||
- replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
|
||||||
-
|
|
||||||
- /* Note that we optimized this PHI. */
|
|
||||||
- return true;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
/* Auxiliary functions to determine the set of memory accesses which
|
|
||||||
can't trap because they are preceded by accesses to the same memory
|
|
||||||
portion. We do that for MEM_REFs, so we only need to track
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,103 +0,0 @@
|
|||||||
From 0d55d24aa4e47c40f74e0281d023089cfaafcf74 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jakub Jelinek <jakub@redhat.com>
|
|
||||||
Date: Thu, 6 May 2021 14:05:06 +0200
|
|
||||||
Subject: [PATCH 16/35] [Backport] phiopt: Use gphi *phi instead of gimple *phi
|
|
||||||
some more
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cfd65e8d5299a7cf7d2ecd92b0e24ea4cfb697d9
|
|
||||||
|
|
||||||
Various functions in phiopt are also called with a gphi * but use
|
|
||||||
gimple * argument for it.
|
|
||||||
|
|
||||||
2021-05-06 Jakub Jelinek <jakub@redhat.com>
|
|
||||||
|
|
||||||
* tree-ssa-phiopt.c (value_replacement, minmax_replacement,
|
|
||||||
abs_replacement, xor_replacement,
|
|
||||||
cond_removal_in_popcount_clz_ctz_pattern,
|
|
||||||
replace_phi_edge_with_variable): Change type of phi argument from
|
|
||||||
gimple * to gphi *.
|
|
||||||
---
|
|
||||||
gcc/tree-ssa-phiopt.c | 22 ++++++++++------------
|
|
||||||
1 file changed, 10 insertions(+), 12 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 51a2d3684..045a7b1b8 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -59,21 +59,21 @@ static bool match_simplify_replacement (basic_block, basic_block,
|
|
||||||
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
|
|
||||||
gimple *);
|
|
||||||
static int value_replacement (basic_block, basic_block,
|
|
||||||
- edge, edge, gimple *, tree, tree);
|
|
||||||
+ edge, edge, gphi *, tree, tree);
|
|
||||||
static bool minmax_replacement (basic_block, basic_block,
|
|
||||||
- edge, edge, gimple *, tree, tree);
|
|
||||||
+ edge, edge, gphi *, tree, tree);
|
|
||||||
static bool abs_replacement (basic_block, basic_block,
|
|
||||||
- edge, edge, gimple *, tree, tree);
|
|
||||||
+ edge, edge, gphi *, tree, tree);
|
|
||||||
static bool spaceship_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gphi *, tree, tree);
|
|
||||||
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
|
||||||
- edge, edge, gimple *,
|
|
||||||
+ edge, edge, gphi *,
|
|
||||||
tree, tree);
|
|
||||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
|
||||||
hash_set<tree> *);
|
|
||||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
|
||||||
static hash_set<tree> * get_non_trapping ();
|
|
||||||
-static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
|
|
||||||
+static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree);
|
|
||||||
static void hoist_adjacent_loads (basic_block, basic_block,
|
|
||||||
basic_block, basic_block);
|
|
||||||
static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
|
|
||||||
@@ -389,7 +389,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
|
|
||||||
static void
|
|
||||||
replace_phi_edge_with_variable (basic_block cond_block,
|
|
||||||
- edge e, gimple *phi, tree new_tree)
|
|
||||||
+ edge e, gphi *phi, tree new_tree)
|
|
||||||
{
|
|
||||||
basic_block bb = gimple_bb (phi);
|
|
||||||
basic_block block_to_remove;
|
|
||||||
@@ -1129,8 +1129,7 @@ absorbing_element_p (tree_code code, tree arg, bool right, tree rval)
|
|
||||||
|
|
||||||
static int
|
|
||||||
value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
- edge e0, edge e1, gimple *phi,
|
|
||||||
- tree arg0, tree arg1)
|
|
||||||
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
gimple *cond;
|
|
||||||
@@ -1438,8 +1437,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
|
|
||||||
static bool
|
|
||||||
minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
- edge e0, edge e1, gimple *phi,
|
|
||||||
- tree arg0, tree arg1)
|
|
||||||
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
tree result, type, rhs;
|
|
||||||
gcond *cond;
|
|
||||||
@@ -2240,7 +2238,7 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
static bool
|
|
||||||
cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
basic_block middle_bb,
|
|
||||||
- edge e1, edge e2, gimple *phi,
|
|
||||||
+ edge e1, edge e2, gphi *phi,
|
|
||||||
tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
gimple *cond;
|
|
||||||
@@ -2398,7 +2396,7 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
static bool
|
|
||||||
abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
edge e0 ATTRIBUTE_UNUSED, edge e1,
|
|
||||||
- gimple *phi, tree arg0, tree arg1)
|
|
||||||
+ gphi *phi, tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
tree result;
|
|
||||||
gassign *new_stmt;
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,212 +0,0 @@
|
|||||||
From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001
|
|
||||||
From: Roger Sayle <roger@nextmovesoftware.com>
|
|
||||||
Date: Mon, 2 Aug 2021 13:27:53 +0100
|
|
||||||
Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f
|
|
||||||
|
|
||||||
Many thanks again to Jakub Jelinek for a speedy fix for PR 101642.
|
|
||||||
Interestingly, that test case "bswap16(x) ? : x" also reveals a
|
|
||||||
missed optimization opportunity. The resulting "x ? bswap(x) : 0"
|
|
||||||
can be further simplified to just bswap(x).
|
|
||||||
|
|
||||||
Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the
|
|
||||||
related "x ? popcount(x) : 0", so this patch simply makes that
|
|
||||||
transformation make general, additionally handling bswap, parity,
|
|
||||||
ffs and clrsb. All of the required infrastructure is already
|
|
||||||
present thanks to Jakub previously adding support for clz/ctz.
|
|
||||||
To reflect this generalization, the name of the function is changed
|
|
||||||
from cond_removal_in_popcount_clz_ctz_pattern to the hopefully
|
|
||||||
equally descriptive cond_removal_in_builtin_zero_pattern.
|
|
||||||
|
|
||||||
2021-08-02 Roger Sayle <roger@nextmovesoftware.com>
|
|
||||||
|
|
||||||
gcc/ChangeLog
|
|
||||||
* tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern):
|
|
||||||
Renamed from cond_removal_in_popcount_clz_ctz_pattern.
|
|
||||||
Add support for BSWAP, FFS, PARITY and CLRSB builtins.
|
|
||||||
(tree_ssa_phiop_worker): Update call to function above.
|
|
||||||
|
|
||||||
gcc/testsuite/ChangeLog
|
|
||||||
* gcc.dg/tree-ssa/phi-opt-25.c: New test case.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 37 +++++++---
|
|
||||||
2 files changed, 109 insertions(+), 11 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..c52c92e1d
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
|
||||||
@@ -0,0 +1,83 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
|
||||||
+
|
|
||||||
+unsigned short test_bswap16(unsigned short x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_bswap16(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+unsigned int test_bswap32(unsigned int x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_bswap32(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+unsigned long long test_bswap64(unsigned long long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_bswap64(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_clrsb(int x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_clrsbl(long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_clrsbll(long long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+#if 0
|
|
||||||
+/* BUILT_IN_FFS is transformed by match.pd */
|
|
||||||
+int test_ffs(unsigned int x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_ffs(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_ffsl(unsigned long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_ffsl(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_ffsll(unsigned long long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_ffsll(x) : 0;
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+int test_parity(int x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_parity(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_parityl(long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_parityl(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_parityll(long long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_parityll(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_popcount(int x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_popcount(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_popcountl(long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_popcountl(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int test_popcountll(long long x)
|
|
||||||
+{
|
|
||||||
+ return x ? __builtin_popcountll(x) : 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
|
|
||||||
+
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 045a7b1b8..21ac08145 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gphi *, tree, tree);
|
|
||||||
static bool spaceship_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gphi *, tree, tree);
|
|
||||||
-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
|
||||||
- edge, edge, gphi *,
|
|
||||||
- tree, tree);
|
|
||||||
+static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block,
|
|
||||||
+ edge, edge, gphi *,
|
|
||||||
+ tree, tree);
|
|
||||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
|
||||||
hash_set<tree> *);
|
|
||||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
|
||||||
@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
|
||||||
- e2, phi, arg0,
|
|
||||||
- arg1))
|
|
||||||
+ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
|
||||||
+ phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* Convert
|
|
||||||
+/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0).
|
|
||||||
+ Convert
|
|
||||||
|
|
||||||
<bb 2>
|
|
||||||
if (b_4(D) != 0)
|
|
||||||
@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
instead of 0 above it uses the value from that macro. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
- basic_block middle_bb,
|
|
||||||
- edge e1, edge e2, gphi *phi,
|
|
||||||
- tree arg0, tree arg1)
|
|
||||||
+cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
|
|
||||||
+ basic_block middle_bb,
|
|
||||||
+ edge e1, edge e2, gphi *phi,
|
|
||||||
+ tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
gimple *cond;
|
|
||||||
gimple_stmt_iterator gsi, gsi_from;
|
|
||||||
@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
int val = 0;
|
|
||||||
switch (cfn)
|
|
||||||
{
|
|
||||||
+ case CFN_BUILT_IN_BSWAP16:
|
|
||||||
+ case CFN_BUILT_IN_BSWAP32:
|
|
||||||
+ case CFN_BUILT_IN_BSWAP64:
|
|
||||||
+ case CFN_BUILT_IN_BSWAP128:
|
|
||||||
+ CASE_CFN_FFS:
|
|
||||||
+ CASE_CFN_PARITY:
|
|
||||||
CASE_CFN_POPCOUNT:
|
|
||||||
break;
|
|
||||||
CASE_CFN_CLZ:
|
|
||||||
@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
+ case BUILT_IN_CLRSB:
|
|
||||||
+ val = TYPE_PRECISION (integer_type_node) - 1;
|
|
||||||
+ break;
|
|
||||||
+ case BUILT_IN_CLRSBL:
|
|
||||||
+ val = TYPE_PRECISION (long_integer_type_node) - 1;
|
|
||||||
+ break;
|
|
||||||
+ case BUILT_IN_CLRSBLL:
|
|
||||||
+ val = TYPE_PRECISION (long_long_integer_type_node) - 1;
|
|
||||||
+ break;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,251 +0,0 @@
|
|||||||
From 77398954ce517aa011b7a254c7aa2858521b2093 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Richard Biener <rguenther@suse.de>
|
|
||||||
Date: Mon, 15 Nov 2021 15:19:36 +0100
|
|
||||||
Subject: [PATCH 18/35] [Backport] tree-optimization/102880 - make PHI-OPT
|
|
||||||
recognize more CFGs
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f98f373dd822b35c52356b753d528924e9f89678
|
|
||||||
|
|
||||||
This allows extra edges into the middle BB for the PHI-OPT
|
|
||||||
transforms using replace_phi_edge_with_variable that do not
|
|
||||||
end up moving stmts from that middle BB. This avoids regressing
|
|
||||||
gcc.dg/tree-ssa/ssa-hoist-4.c with the actual fix for PR102880
|
|
||||||
where CFG cleanup has the choice to remove two forwarders and
|
|
||||||
picks "the wrong" leading to
|
|
||||||
|
|
||||||
if (a > b) /
|
|
||||||
/\ /
|
|
||||||
/ <BB>
|
|
||||||
/ |
|
|
||||||
# PHI <a, b>
|
|
||||||
|
|
||||||
rather than
|
|
||||||
|
|
||||||
if (a > b) |
|
|
||||||
/\ |
|
|
||||||
<BB> \ |
|
|
||||||
/ \ |
|
|
||||||
# PHI <a, b, b>
|
|
||||||
|
|
||||||
but it's relatively straight-forward to support extra edges
|
|
||||||
into the middle-BB in paths ending in replace_phi_edge_with_variable
|
|
||||||
and that do not require moving stmts. That's because we really
|
|
||||||
only want to remove the edge from the condition to the middle BB.
|
|
||||||
Of course actually doing that means updating dominators in non-trival
|
|
||||||
ways which is why I kept the original code for the single edge
|
|
||||||
case and simply defer to CFG cleanup by adjusting the condition for
|
|
||||||
the complicated case.
|
|
||||||
|
|
||||||
The testcase needs to be a GIMPLE one since it's quite unreliable
|
|
||||||
to produce the desired CFG.
|
|
||||||
|
|
||||||
2021-11-15 Richard Biener <rguenther@suse.de>
|
|
||||||
|
|
||||||
PR tree-optimization/102880
|
|
||||||
* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Push
|
|
||||||
single_pred (bb1) condition to places that really need it.
|
|
||||||
(match_simplify_replacement): Likewise.
|
|
||||||
(value_replacement): Likewise.
|
|
||||||
(replace_phi_edge_with_variable): Deal with extra edges
|
|
||||||
into the middle BB.
|
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/phi-opt-26.c: New testcase.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 +++++++++
|
|
||||||
gcc/tree-ssa-phiopt.c | 73 +++++++++++++---------
|
|
||||||
2 files changed, 75 insertions(+), 29 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..21aa66e38
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
|
||||||
@@ -0,0 +1,31 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
|
|
||||||
+
|
|
||||||
+int __GIMPLE (ssa,startwith("phiopt"))
|
|
||||||
+foo (int a, int b, int flag)
|
|
||||||
+{
|
|
||||||
+ int res;
|
|
||||||
+
|
|
||||||
+ __BB(2):
|
|
||||||
+ if (flag_2(D) != 0)
|
|
||||||
+ goto __BB6;
|
|
||||||
+ else
|
|
||||||
+ goto __BB4;
|
|
||||||
+
|
|
||||||
+ __BB(4):
|
|
||||||
+ if (a_3(D) > b_4(D))
|
|
||||||
+ goto __BB7;
|
|
||||||
+ else
|
|
||||||
+ goto __BB6;
|
|
||||||
+
|
|
||||||
+ __BB(6):
|
|
||||||
+ goto __BB7;
|
|
||||||
+
|
|
||||||
+ __BB(7):
|
|
||||||
+ res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
|
|
||||||
+ return res_1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* We should be able to detect MAX despite the extra edge into
|
|
||||||
+ the middle BB. */
|
|
||||||
+/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 21ac08145..079d29e74 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -219,7 +219,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
|
|
||||||
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
|
|
||||||
if (EDGE_COUNT (bb1->succs) == 0
|
|
||||||
- || bb2 == NULL
|
|
||||||
|| EDGE_COUNT (bb2->succs) == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
@@ -279,14 +278,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
|| (e1->flags & EDGE_FALLTHRU) == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
- /* Also make sure that bb1 only have one predecessor and that it
|
|
||||||
- is bb. */
|
|
||||||
- if (!single_pred_p (bb1)
|
|
||||||
- || single_pred (bb1) != bb)
|
|
||||||
- continue;
|
|
||||||
-
|
|
||||||
if (do_store_elim)
|
|
||||||
{
|
|
||||||
+ /* Also make sure that bb1 only have one predecessor and that it
|
|
||||||
+ is bb. */
|
|
||||||
+ if (!single_pred_p (bb1)
|
|
||||||
+ || single_pred (bb1) != bb)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
/* bb1 is the middle block, bb2 the join block, bb the split block,
|
|
||||||
e1 the fallthrough edge from bb1 to bb2. We can't do the
|
|
||||||
optimization if the join block has more than two predecessors. */
|
|
||||||
@@ -331,10 +330,11 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
node. */
|
|
||||||
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
|
|
||||||
|
|
||||||
- gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
|
|
||||||
- arg0, arg1,
|
|
||||||
- cond_stmt);
|
|
||||||
- if (newphi != NULL)
|
|
||||||
+ gphi *newphi;
|
|
||||||
+ if (single_pred_p (bb1)
|
|
||||||
+ && (newphi = factor_out_conditional_conversion (e1, e2, phi,
|
|
||||||
+ arg0, arg1,
|
|
||||||
+ cond_stmt)))
|
|
||||||
{
|
|
||||||
phi = newphi;
|
|
||||||
/* factor_out_conditional_conversion may create a new PHI in
|
|
||||||
@@ -355,12 +355,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
+ && single_pred_p (bb1)
|
|
||||||
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
|
||||||
phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
- else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
+ else if (single_pred_p (bb1)
|
|
||||||
+ && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -391,35 +393,41 @@ replace_phi_edge_with_variable (basic_block cond_block,
|
|
||||||
edge e, gphi *phi, tree new_tree)
|
|
||||||
{
|
|
||||||
basic_block bb = gimple_bb (phi);
|
|
||||||
- basic_block block_to_remove;
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
|
|
||||||
/* Change the PHI argument to new. */
|
|
||||||
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
|
|
||||||
|
|
||||||
/* Remove the empty basic block. */
|
|
||||||
+ edge edge_to_remove;
|
|
||||||
if (EDGE_SUCC (cond_block, 0)->dest == bb)
|
|
||||||
+ edge_to_remove = EDGE_SUCC (cond_block, 1);
|
|
||||||
+ else
|
|
||||||
+ edge_to_remove = EDGE_SUCC (cond_block, 0);
|
|
||||||
+ if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
|
|
||||||
{
|
|
||||||
- EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
|
|
||||||
- EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
|
||||||
- EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
|
|
||||||
-
|
|
||||||
- block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
|
|
||||||
+ e->flags |= EDGE_FALLTHRU;
|
|
||||||
+ e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
|
||||||
+ e->probability = profile_probability::always ();
|
|
||||||
+ delete_basic_block (edge_to_remove->dest);
|
|
||||||
+
|
|
||||||
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
|
||||||
+ gsi = gsi_last_bb (cond_block);
|
|
||||||
+ gsi_remove (&gsi, true);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
|
|
||||||
- EDGE_SUCC (cond_block, 1)->flags
|
|
||||||
- &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
|
||||||
- EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
|
|
||||||
-
|
|
||||||
- block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
|
|
||||||
+ /* If there are other edges into the middle block make
|
|
||||||
+ CFG cleanup deal with the edge removal to avoid
|
|
||||||
+ updating dominators here in a non-trivial way. */
|
|
||||||
+ gcond *cond = as_a <gcond *> (last_stmt (cond_block));
|
|
||||||
+ if (edge_to_remove->flags & EDGE_TRUE_VALUE)
|
|
||||||
+ gimple_cond_make_false (cond);
|
|
||||||
+ else
|
|
||||||
+ gimple_cond_make_true (cond);
|
|
||||||
}
|
|
||||||
- delete_basic_block (block_to_remove);
|
|
||||||
|
|
||||||
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
|
||||||
- gsi = gsi_last_bb (cond_block);
|
|
||||||
- gsi_remove (&gsi, true);
|
|
||||||
+ statistics_counter_event (cfun, "Replace PHI with variable", 1);
|
|
||||||
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
fprintf (dump_file,
|
|
||||||
@@ -846,6 +854,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
allow it and move it once the transformation is done. */
|
|
||||||
if (!empty_block_p (middle_bb))
|
|
||||||
{
|
|
||||||
+ if (!single_pred_p (middle_bb))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
stmt_to_move = last_and_only_stmt (middle_bb);
|
|
||||||
if (!stmt_to_move)
|
|
||||||
return false;
|
|
||||||
@@ -1225,6 +1236,11 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
+ if (!single_pred_p (middle_bb))
|
|
||||||
+ return 0;
|
|
||||||
+ statistics_counter_event (cfun, "Replace PHI with "
|
|
||||||
+ "variable/value_replacement", 1);
|
|
||||||
+
|
|
||||||
/* Replace the PHI arguments with arg. */
|
|
||||||
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
|
|
||||||
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
|
|
||||||
@@ -1239,7 +1255,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
-
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Now optimize (x != 0) ? x + y : y to just x + y. */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,250 +0,0 @@
|
|||||||
From a2f5e6f38fe7b5b32a252643b00dd2d7ab0e3fac Mon Sep 17 00:00:00 2001
|
|
||||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
|
||||||
Date: Tue, 12 May 2020 09:01:10 +0100
|
|
||||||
Subject: [PATCH 19/35] [Backport] tree: Add vector_element_bits(_tree)
|
|
||||||
[PR94980 1/3]
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d17a896da1e898928d337596d029f0ece0039d55
|
|
||||||
|
|
||||||
A lot of code that wants to know the number of bits in a vector
|
|
||||||
element gets that information from the element's TYPE_SIZE,
|
|
||||||
which is always equal to TYPE_SIZE_UNIT * BITS_PER_UNIT.
|
|
||||||
This doesn't work for SVE and AVX512-style packed boolean vectors,
|
|
||||||
where several elements can occupy a single byte.
|
|
||||||
|
|
||||||
This patch introduces a new pair of helpers for getting the true
|
|
||||||
(possibly sub-byte) size. I made a token attempt to convert obvious
|
|
||||||
element size calculations, but I'm sure I missed some.
|
|
||||||
|
|
||||||
2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
|
|
||||||
|
|
||||||
gcc/
|
|
||||||
PR tree-optimization/94980
|
|
||||||
* tree.h (vector_element_bits, vector_element_bits_tree): Declare.
|
|
||||||
* tree.c (vector_element_bits, vector_element_bits_tree): New.
|
|
||||||
* match.pd: Use the new functions instead of determining the
|
|
||||||
vector element size directly from TYPE_SIZE(_UNIT).
|
|
||||||
* tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
|
|
||||||
* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
|
|
||||||
* tree-vect-stmts.c (vect_is_simple_cond): Likewise.
|
|
||||||
* tree-vect-generic.c (expand_vector_piecewise): Likewise.
|
|
||||||
(expand_vector_conversion): Likewise.
|
|
||||||
(expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
|
|
||||||
a divisor. Convert the dividend to bits to compensate.
|
|
||||||
* tree-vect-loop.c (vectorizable_live_operation): Call
|
|
||||||
vector_element_bits instead of open-coding it.
|
|
||||||
---
|
|
||||||
gcc/ChangeLog | 17 +++++++++++++++++
|
|
||||||
gcc/match.pd | 2 +-
|
|
||||||
gcc/tree-vect-data-refs.c | 2 +-
|
|
||||||
gcc/tree-vect-generic.c | 19 +++++++------------
|
|
||||||
gcc/tree-vect-loop.c | 4 +---
|
|
||||||
gcc/tree-vect-patterns.c | 3 +--
|
|
||||||
gcc/tree-vect-stmts.c | 3 +--
|
|
||||||
gcc/tree.c | 24 ++++++++++++++++++++++++
|
|
||||||
gcc/tree.h | 2 ++
|
|
||||||
9 files changed, 55 insertions(+), 21 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
|
|
||||||
index 3b1384e70..07aea9b86 100644
|
|
||||||
--- a/gcc/ChangeLog
|
|
||||||
+++ b/gcc/ChangeLog
|
|
||||||
@@ -1,3 +1,20 @@
|
|
||||||
+2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
|
|
||||||
+
|
|
||||||
+ PR tree-optimization/94980
|
|
||||||
+ * tree.h (vector_element_bits, vector_element_bits_tree): Declare.
|
|
||||||
+ * tree.c (vector_element_bits, vector_element_bits_tree): New.
|
|
||||||
+ * match.pd: Use the new functions instead of determining the
|
|
||||||
+ vector element size directly from TYPE_SIZE(_UNIT).
|
|
||||||
+ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
|
|
||||||
+ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
|
|
||||||
+ * tree-vect-stmts.c (vect_is_simple_cond): Likewise.
|
|
||||||
+ * tree-vect-generic.c (expand_vector_piecewise): Likewise.
|
|
||||||
+ (expand_vector_conversion): Likewise.
|
|
||||||
+ (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
|
|
||||||
+ a divisor. Convert the dividend to bits to compensate.
|
|
||||||
+ * tree-vect-loop.c (vectorizable_live_operation): Call
|
|
||||||
+ vector_element_bits instead of open-coding it.
|
|
||||||
+
|
|
||||||
2021-04-08 Release Manager
|
|
||||||
|
|
||||||
* GCC 10.3.0 released.
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index 5899eea95..79a0228d2 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -6236,7 +6236,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
}
|
|
||||||
(if (ins)
|
|
||||||
(bit_insert { op0; } { ins; }
|
|
||||||
- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
|
|
||||||
+ { bitsize_int (at * vector_element_bits (type)); })
|
|
||||||
(if (changed)
|
|
||||||
(vec_perm { op0; } { op1; } { op2; }))))))))))
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
|
||||||
index d78b06455..e4466a4f3 100644
|
|
||||||
--- a/gcc/tree-vect-data-refs.c
|
|
||||||
+++ b/gcc/tree-vect-data-refs.c
|
|
||||||
@@ -3709,7 +3709,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
|
|
||||||
tree *offset_vectype_out)
|
|
||||||
{
|
|
||||||
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
|
|
||||||
- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
|
|
||||||
+ unsigned int element_bits = vector_element_bits (vectype);
|
|
||||||
if (element_bits != memory_bits)
|
|
||||||
/* For now the vector elements must be the same width as the
|
|
||||||
memory elements. */
|
|
||||||
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
|
|
||||||
index c10492034..37c3956a4 100644
|
|
||||||
--- a/gcc/tree-vect-generic.c
|
|
||||||
+++ b/gcc/tree-vect-generic.c
|
|
||||||
@@ -276,8 +276,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
|
|
||||||
tree part_width = TYPE_SIZE (inner_type);
|
|
||||||
tree index = bitsize_int (0);
|
|
||||||
int nunits = nunits_for_known_piecewise_op (type);
|
|
||||||
- int delta = tree_to_uhwi (part_width)
|
|
||||||
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
|
|
||||||
+ int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
|
|
||||||
int i;
|
|
||||||
location_t loc = gimple_location (gsi_stmt (*gsi));
|
|
||||||
|
|
||||||
@@ -357,8 +356,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi,
|
|
||||||
elem_op_func f, elem_op_func f_parallel,
|
|
||||||
tree type, tree a, tree b, enum tree_code code)
|
|
||||||
{
|
|
||||||
- int parts_per_word = UNITS_PER_WORD
|
|
||||||
- / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
|
|
||||||
+ int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
|
|
||||||
|
|
||||||
if (INTEGRAL_TYPE_P (TREE_TYPE (type))
|
|
||||||
&& parts_per_word >= 4
|
|
||||||
@@ -1733,19 +1731,17 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
|
|
||||||
optab optab1 = unknown_optab;
|
|
||||||
|
|
||||||
gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
|
|
||||||
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
|
|
||||||
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
|
|
||||||
if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
|
|
||||||
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
|
|
||||||
code = FIX_TRUNC_EXPR;
|
|
||||||
else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
|
|
||||||
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
|
|
||||||
code = FLOAT_EXPR;
|
|
||||||
- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
|
|
||||||
- < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
|
|
||||||
+ unsigned int ret_elt_bits = vector_element_bits (ret_type);
|
|
||||||
+ unsigned int arg_elt_bits = vector_element_bits (arg_type);
|
|
||||||
+ if (ret_elt_bits < arg_elt_bits)
|
|
||||||
modifier = NARROW;
|
|
||||||
- else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
|
|
||||||
- > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
|
|
||||||
+ else if (ret_elt_bits > arg_elt_bits)
|
|
||||||
modifier = WIDEN;
|
|
||||||
|
|
||||||
if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
|
|
||||||
@@ -1908,8 +1904,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
|
|
||||||
tree part_width = TYPE_SIZE (compute_type);
|
|
||||||
tree index = bitsize_int (0);
|
|
||||||
int nunits = nunits_for_known_piecewise_op (arg_type);
|
|
||||||
- int delta = tree_to_uhwi (part_width)
|
|
||||||
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
|
|
||||||
+ int delta = tree_to_uhwi (part_width) / arg_elt_bits;
|
|
||||||
int i;
|
|
||||||
location_t loc = gimple_location (gsi_stmt (*gsi));
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
|
||||||
index 899b56087..7990e31de 100644
|
|
||||||
--- a/gcc/tree-vect-loop.c
|
|
||||||
+++ b/gcc/tree-vect-loop.c
|
|
||||||
@@ -8059,9 +8059,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
|
|
||||||
: gimple_get_lhs (stmt);
|
|
||||||
lhs_type = TREE_TYPE (lhs);
|
|
||||||
|
|
||||||
- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype)
|
|
||||||
- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype)))
|
|
||||||
- : TYPE_SIZE (TREE_TYPE (vectype)));
|
|
||||||
+ bitsize = vector_element_bits_tree (vectype);
|
|
||||||
vec_bitsize = TYPE_SIZE (vectype);
|
|
||||||
|
|
||||||
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
|
|
||||||
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
|
|
||||||
index 84d7ddb17..b076740ef 100644
|
|
||||||
--- a/gcc/tree-vect-patterns.c
|
|
||||||
+++ b/gcc/tree-vect-patterns.c
|
|
||||||
@@ -4406,8 +4406,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
||||||
|| dt == vect_constant_def))
|
|
||||||
{
|
|
||||||
tree wide_scalar_type = build_nonstandard_integer_type
|
|
||||||
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
|
|
||||||
- TYPE_UNSIGNED (rhs1_type));
|
|
||||||
+ (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
|
|
||||||
tree vectype3 = get_vectype_for_scalar_type (vinfo,
|
|
||||||
wide_scalar_type);
|
|
||||||
if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
|
|
||||||
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
|
||||||
index 4636b7ba2..0bdf9a547 100644
|
|
||||||
--- a/gcc/tree-vect-stmts.c
|
|
||||||
+++ b/gcc/tree-vect-stmts.c
|
|
||||||
@@ -10717,8 +10717,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
|
|
||||||
&& tree_int_cst_lt (TYPE_SIZE (scalar_type),
|
|
||||||
TYPE_SIZE (TREE_TYPE (vectype))))
|
|
||||||
scalar_type = build_nonstandard_integer_type
|
|
||||||
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
|
|
||||||
- TYPE_UNSIGNED (scalar_type));
|
|
||||||
+ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
|
|
||||||
*comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
|
|
||||||
slp_node);
|
|
||||||
}
|
|
||||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
||||||
index 3e6647ae0..9a0cedf10 100644
|
|
||||||
--- a/gcc/tree.c
|
|
||||||
+++ b/gcc/tree.c
|
|
||||||
@@ -13892,6 +13892,30 @@ vector_type_mode (const_tree t)
|
|
||||||
return mode;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Return the size in bits of each element of vector type TYPE. */
|
|
||||||
+
|
|
||||||
+unsigned int
|
|
||||||
+vector_element_bits (const_tree type)
|
|
||||||
+{
|
|
||||||
+ gcc_checking_assert (VECTOR_TYPE_P (type));
|
|
||||||
+ if (VECTOR_BOOLEAN_TYPE_P (type))
|
|
||||||
+ return vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (type)),
|
|
||||||
+ TYPE_VECTOR_SUBPARTS (type));
|
|
||||||
+ return tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Calculate the size in bits of each element of vector type TYPE
|
|
||||||
+ and return the result as a tree of type bitsizetype. */
|
|
||||||
+
|
|
||||||
+tree
|
|
||||||
+vector_element_bits_tree (const_tree type)
|
|
||||||
+{
|
|
||||||
+ gcc_checking_assert (VECTOR_TYPE_P (type));
|
|
||||||
+ if (VECTOR_BOOLEAN_TYPE_P (type))
|
|
||||||
+ return bitsize_int (vector_element_bits (type));
|
|
||||||
+ return TYPE_SIZE (TREE_TYPE (type));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Verify that basic properties of T match TV and thus T can be a variant of
|
|
||||||
TV. TV should be the more specified variant (i.e. the main variant). */
|
|
||||||
|
|
||||||
diff --git a/gcc/tree.h b/gcc/tree.h
|
|
||||||
index bddc6e528..c66207fa0 100644
|
|
||||||
--- a/gcc/tree.h
|
|
||||||
+++ b/gcc/tree.h
|
|
||||||
@@ -1996,6 +1996,8 @@ class auto_suppress_location_wrappers
|
|
||||||
|
|
||||||
extern machine_mode element_mode (const_tree);
|
|
||||||
extern machine_mode vector_type_mode (const_tree);
|
|
||||||
+extern unsigned int vector_element_bits (const_tree);
|
|
||||||
+extern tree vector_element_bits_tree (const_tree);
|
|
||||||
|
|
||||||
/* The "canonical" type for this type node, which is used by frontends to
|
|
||||||
compare the type for equality with another type. If two types are
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,379 +0,0 @@
|
|||||||
From 21d265af074726b166e08301a2f847c474fcb680 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
|
||||||
Date: Tue, 30 Nov 2021 09:52:24 +0000
|
|
||||||
Subject: [PATCH 21/35] [Backport] gimple-match: Add a gimple_extract_op
|
|
||||||
function
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=33973fa754de1f95d459bfca66c0d80deec36537
|
|
||||||
|
|
||||||
code_helper and gimple_match_op seem like generally useful ways
|
|
||||||
of summing up a gimple_assign or gimple_call (or gimple_cond).
|
|
||||||
This patch adds a gimple_extract_op function that can be used
|
|
||||||
for that.
|
|
||||||
|
|
||||||
gcc/
|
|
||||||
* gimple-match.h (code_helper): Add functions for querying whether
|
|
||||||
the code represents an internal_fn or a built_in_function.
|
|
||||||
Provide explicit conversion operators for both cases.
|
|
||||||
(gimple_extract_op): Declare.
|
|
||||||
* gimple-match-head.c (gimple_extract): New function, extracted from...
|
|
||||||
(gimple_simplify): ...here.
|
|
||||||
(gimple_extract_op): New function.
|
|
||||||
---
|
|
||||||
gcc/gimple-match-head.c | 219 ++++++++++++++++++++--------------------
|
|
||||||
gcc/gimple-match.h | 27 +++++
|
|
||||||
2 files changed, 135 insertions(+), 111 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
|
|
||||||
index 9b3e7298d..c1dea1734 100644
|
|
||||||
--- a/gcc/gimple-match-head.c
|
|
||||||
+++ b/gcc/gimple-match-head.c
|
|
||||||
@@ -884,12 +884,20 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* The main STMT based simplification entry. It is used by the fold_stmt
|
|
||||||
- and the fold_stmt_to_constant APIs. */
|
|
||||||
+/* Common subroutine of gimple_extract_op and gimple_simplify. Try to
|
|
||||||
+ describe STMT in RES_OP, returning true on success. Before recording
|
|
||||||
+ an operand, call:
|
|
||||||
|
|
||||||
-bool
|
|
||||||
-gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
- tree (*valueize)(tree), tree (*top_valueize)(tree))
|
|
||||||
+ - VALUEIZE_CONDITION for a COND_EXPR condition
|
|
||||||
+ - VALUEIZE_OP for every other top-level operand
|
|
||||||
+
|
|
||||||
+ Both routines take a tree argument and returns a tree. */
|
|
||||||
+
|
|
||||||
+template<typename ValueizeOp, typename ValueizeCondition>
|
|
||||||
+inline bool
|
|
||||||
+gimple_extract (gimple *stmt, gimple_match_op *res_op,
|
|
||||||
+ ValueizeOp valueize_op,
|
|
||||||
+ ValueizeCondition valueize_condition)
|
|
||||||
{
|
|
||||||
switch (gimple_code (stmt))
|
|
||||||
{
|
|
||||||
@@ -905,101 +913,50 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
|| code == VIEW_CONVERT_EXPR)
|
|
||||||
{
|
|
||||||
tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
|
||||||
- bool valueized = false;
|
|
||||||
- op0 = do_valueize (op0, top_valueize, valueized);
|
|
||||||
- res_op->set_op (code, type, op0);
|
|
||||||
- return (gimple_resimplify1 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
+ res_op->set_op (code, type, valueize_op (op0));
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
else if (code == BIT_FIELD_REF)
|
|
||||||
{
|
|
||||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
||||||
- tree op0 = TREE_OPERAND (rhs1, 0);
|
|
||||||
- bool valueized = false;
|
|
||||||
- op0 = do_valueize (op0, top_valueize, valueized);
|
|
||||||
+ tree op0 = valueize_op (TREE_OPERAND (rhs1, 0));
|
|
||||||
res_op->set_op (code, type, op0,
|
|
||||||
TREE_OPERAND (rhs1, 1),
|
|
||||||
TREE_OPERAND (rhs1, 2),
|
|
||||||
REF_REVERSE_STORAGE_ORDER (rhs1));
|
|
||||||
- if (res_op->reverse)
|
|
||||||
- return valueized;
|
|
||||||
- return (gimple_resimplify3 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
- else if (code == SSA_NAME
|
|
||||||
- && top_valueize)
|
|
||||||
+ else if (code == SSA_NAME)
|
|
||||||
{
|
|
||||||
tree op0 = gimple_assign_rhs1 (stmt);
|
|
||||||
- tree valueized = top_valueize (op0);
|
|
||||||
- if (!valueized || op0 == valueized)
|
|
||||||
- return false;
|
|
||||||
- res_op->set_op (TREE_CODE (op0), type, valueized);
|
|
||||||
+ res_op->set_op (TREE_CODE (op0), type, valueize_op (op0));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case GIMPLE_UNARY_RHS:
|
|
||||||
{
|
|
||||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
||||||
- bool valueized = false;
|
|
||||||
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
|
|
||||||
- res_op->set_op (code, type, rhs1);
|
|
||||||
- return (gimple_resimplify1 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
+ res_op->set_op (code, type, valueize_op (rhs1));
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
case GIMPLE_BINARY_RHS:
|
|
||||||
{
|
|
||||||
- tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
||||||
- tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
||||||
- bool valueized = false;
|
|
||||||
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
|
|
||||||
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
|
|
||||||
+ tree rhs1 = valueize_op (gimple_assign_rhs1 (stmt));
|
|
||||||
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
|
|
||||||
res_op->set_op (code, type, rhs1, rhs2);
|
|
||||||
- return (gimple_resimplify2 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
case GIMPLE_TERNARY_RHS:
|
|
||||||
{
|
|
||||||
- bool valueized = false;
|
|
||||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
||||||
- /* If this is a [VEC_]COND_EXPR first try to simplify an
|
|
||||||
- embedded GENERIC condition. */
|
|
||||||
- if (code == COND_EXPR
|
|
||||||
- || code == VEC_COND_EXPR)
|
|
||||||
- {
|
|
||||||
- if (COMPARISON_CLASS_P (rhs1))
|
|
||||||
- {
|
|
||||||
- tree lhs = TREE_OPERAND (rhs1, 0);
|
|
||||||
- tree rhs = TREE_OPERAND (rhs1, 1);
|
|
||||||
- lhs = do_valueize (lhs, top_valueize, valueized);
|
|
||||||
- rhs = do_valueize (rhs, top_valueize, valueized);
|
|
||||||
- gimple_match_op res_op2 (res_op->cond, TREE_CODE (rhs1),
|
|
||||||
- TREE_TYPE (rhs1), lhs, rhs);
|
|
||||||
- if ((gimple_resimplify2 (seq, &res_op2, valueize)
|
|
||||||
- || valueized)
|
|
||||||
- && res_op2.code.is_tree_code ())
|
|
||||||
- {
|
|
||||||
- valueized = true;
|
|
||||||
- if (TREE_CODE_CLASS ((enum tree_code) res_op2.code)
|
|
||||||
- == tcc_comparison)
|
|
||||||
- rhs1 = build2 (res_op2.code, TREE_TYPE (rhs1),
|
|
||||||
- res_op2.ops[0], res_op2.ops[1]);
|
|
||||||
- else if (res_op2.code == SSA_NAME
|
|
||||||
- || res_op2.code == INTEGER_CST
|
|
||||||
- || res_op2.code == VECTOR_CST)
|
|
||||||
- rhs1 = res_op2.ops[0];
|
|
||||||
- else
|
|
||||||
- valueized = false;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
||||||
- tree rhs3 = gimple_assign_rhs3 (stmt);
|
|
||||||
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
|
|
||||||
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
|
|
||||||
- rhs3 = do_valueize (rhs3, top_valueize, valueized);
|
|
||||||
+ if (code == COND_EXPR && COMPARISON_CLASS_P (rhs1))
|
|
||||||
+ rhs1 = valueize_condition (rhs1);
|
|
||||||
+ else
|
|
||||||
+ rhs1 = valueize_op (rhs1);
|
|
||||||
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
|
|
||||||
+ tree rhs3 = valueize_op (gimple_assign_rhs3 (stmt));
|
|
||||||
res_op->set_op (code, type, rhs1, rhs2, rhs3);
|
|
||||||
- return (gimple_resimplify3 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
gcc_unreachable ();
|
|
||||||
@@ -1013,7 +970,6 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
&& gimple_call_num_args (stmt) >= 1
|
|
||||||
&& gimple_call_num_args (stmt) <= 5)
|
|
||||||
{
|
|
||||||
- bool valueized = false;
|
|
||||||
combined_fn cfn;
|
|
||||||
if (gimple_call_internal_p (stmt))
|
|
||||||
cfn = as_combined_fn (gimple_call_internal_fn (stmt));
|
|
||||||
@@ -1023,7 +979,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
if (!fn)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- fn = do_valueize (fn, top_valueize, valueized);
|
|
||||||
+ fn = valueize_op (fn);
|
|
||||||
if (TREE_CODE (fn) != ADDR_EXPR
|
|
||||||
|| TREE_CODE (TREE_OPERAND (fn, 0)) != FUNCTION_DECL)
|
|
||||||
return false;
|
|
||||||
@@ -1039,47 +995,17 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
unsigned int num_args = gimple_call_num_args (stmt);
|
|
||||||
res_op->set_op (cfn, TREE_TYPE (gimple_call_lhs (stmt)), num_args);
|
|
||||||
for (unsigned i = 0; i < num_args; ++i)
|
|
||||||
- {
|
|
||||||
- tree arg = gimple_call_arg (stmt, i);
|
|
||||||
- res_op->ops[i] = do_valueize (arg, top_valueize, valueized);
|
|
||||||
- }
|
|
||||||
- if (internal_fn_p (cfn)
|
|
||||||
- && try_conditional_simplification (as_internal_fn (cfn),
|
|
||||||
- res_op, seq, valueize))
|
|
||||||
- return true;
|
|
||||||
- switch (num_args)
|
|
||||||
- {
|
|
||||||
- case 1:
|
|
||||||
- return (gimple_resimplify1 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
- case 2:
|
|
||||||
- return (gimple_resimplify2 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
- case 3:
|
|
||||||
- return (gimple_resimplify3 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
- case 4:
|
|
||||||
- return (gimple_resimplify4 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
- case 5:
|
|
||||||
- return (gimple_resimplify5 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
- default:
|
|
||||||
- gcc_unreachable ();
|
|
||||||
- }
|
|
||||||
+ res_op->ops[i] = valueize_op (gimple_call_arg (stmt, i));
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case GIMPLE_COND:
|
|
||||||
{
|
|
||||||
- tree lhs = gimple_cond_lhs (stmt);
|
|
||||||
- tree rhs = gimple_cond_rhs (stmt);
|
|
||||||
- bool valueized = false;
|
|
||||||
- lhs = do_valueize (lhs, top_valueize, valueized);
|
|
||||||
- rhs = do_valueize (rhs, top_valueize, valueized);
|
|
||||||
+ tree lhs = valueize_op (gimple_cond_lhs (stmt));
|
|
||||||
+ tree rhs = valueize_op (gimple_cond_rhs (stmt));
|
|
||||||
res_op->set_op (gimple_cond_code (stmt), boolean_type_node, lhs, rhs);
|
|
||||||
- return (gimple_resimplify2 (seq, res_op, valueize)
|
|
||||||
- || valueized);
|
|
||||||
+ return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
@@ -1089,6 +1015,77 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Try to describe STMT in RES_OP, returning true on success.
|
|
||||||
+ For GIMPLE_CONDs, describe the condition that is being tested.
|
|
||||||
+ For GIMPLE_ASSIGNs, describe the rhs of the assignment.
|
|
||||||
+ For GIMPLE_CALLs, describe the call. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+gimple_extract_op (gimple *stmt, gimple_match_op *res_op)
|
|
||||||
+{
|
|
||||||
+ auto nop = [](tree op) { return op; };
|
|
||||||
+ return gimple_extract (stmt, res_op, nop, nop);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* The main STMT based simplification entry. It is used by the fold_stmt
|
|
||||||
+ and the fold_stmt_to_constant APIs. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
|
||||||
+ tree (*valueize)(tree), tree (*top_valueize)(tree))
|
|
||||||
+{
|
|
||||||
+ bool valueized = false;
|
|
||||||
+ auto valueize_op = [&](tree op)
|
|
||||||
+ {
|
|
||||||
+ return do_valueize (op, top_valueize, valueized);
|
|
||||||
+ };
|
|
||||||
+ auto valueize_condition = [&](tree op) -> tree
|
|
||||||
+ {
|
|
||||||
+ bool cond_valueized = false;
|
|
||||||
+ tree lhs = do_valueize (TREE_OPERAND (op, 0), top_valueize,
|
|
||||||
+ cond_valueized);
|
|
||||||
+ tree rhs = do_valueize (TREE_OPERAND (op, 1), top_valueize,
|
|
||||||
+ cond_valueized);
|
|
||||||
+ gimple_match_op res_op2 (res_op->cond, TREE_CODE (op),
|
|
||||||
+ TREE_TYPE (op), lhs, rhs);
|
|
||||||
+ if ((gimple_resimplify2 (seq, &res_op2, valueize)
|
|
||||||
+ || cond_valueized)
|
|
||||||
+ && res_op2.code.is_tree_code ())
|
|
||||||
+ {
|
|
||||||
+ if (TREE_CODE_CLASS ((tree_code) res_op2.code) == tcc_comparison)
|
|
||||||
+ {
|
|
||||||
+ valueized = true;
|
|
||||||
+ return build2 (res_op2.code, TREE_TYPE (op),
|
|
||||||
+ res_op2.ops[0], res_op2.ops[1]);
|
|
||||||
+ }
|
|
||||||
+ else if (res_op2.code == SSA_NAME
|
|
||||||
+ || res_op2.code == INTEGER_CST
|
|
||||||
+ || res_op2.code == VECTOR_CST)
|
|
||||||
+ {
|
|
||||||
+ valueized = true;
|
|
||||||
+ return res_op2.ops[0];
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return valueize_op (op);
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ if (!gimple_extract (stmt, res_op, valueize_op, valueize_condition))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ if (res_op->code.is_internal_fn ())
|
|
||||||
+ {
|
|
||||||
+ internal_fn ifn = internal_fn (res_op->code);
|
|
||||||
+ if (try_conditional_simplification (ifn, res_op, seq, valueize))
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!res_op->reverse
|
|
||||||
+ && res_op->num_ops
|
|
||||||
+ && res_op->resimplify (seq, valueize))
|
|
||||||
+ return true;
|
|
||||||
+
|
|
||||||
+ return valueized;
|
|
||||||
+}
|
|
||||||
|
|
||||||
/* Helper for the autogenerated code, valueize OP. */
|
|
||||||
|
|
||||||
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
|
|
||||||
index 097898aed..39858c45f 100644
|
|
||||||
--- a/gcc/gimple-match.h
|
|
||||||
+++ b/gcc/gimple-match.h
|
|
||||||
@@ -33,13 +33,39 @@ public:
|
|
||||||
code_helper (combined_fn fn) : rep (-(int) fn) {}
|
|
||||||
operator tree_code () const { return (tree_code) rep; }
|
|
||||||
operator combined_fn () const { return (combined_fn) -rep; }
|
|
||||||
+ explicit operator internal_fn () const;
|
|
||||||
+ explicit operator built_in_function () const;
|
|
||||||
bool is_tree_code () const { return rep > 0; }
|
|
||||||
bool is_fn_code () const { return rep < 0; }
|
|
||||||
+ bool is_internal_fn () const;
|
|
||||||
+ bool is_builtin_fn () const;
|
|
||||||
int get_rep () const { return rep; }
|
|
||||||
private:
|
|
||||||
int rep;
|
|
||||||
};
|
|
||||||
|
|
||||||
+inline code_helper::operator internal_fn () const
|
|
||||||
+{
|
|
||||||
+ return as_internal_fn (combined_fn (*this));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+inline code_helper::operator built_in_function () const
|
|
||||||
+{
|
|
||||||
+ return as_builtin_fn (combined_fn (*this));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+inline bool
|
|
||||||
+code_helper::is_internal_fn () const
|
|
||||||
+{
|
|
||||||
+ return is_fn_code () && internal_fn_p (combined_fn (*this));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+inline bool
|
|
||||||
+code_helper::is_builtin_fn () const
|
|
||||||
+{
|
|
||||||
+ return is_fn_code () && builtin_fn_p (combined_fn (*this));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* Represents the condition under which an operation should happen,
|
|
||||||
and the value to use otherwise. The condition applies elementwise
|
|
||||||
(as for VEC_COND_EXPR) if the values are vectors. */
|
|
||||||
@@ -333,6 +359,7 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op)
|
|
||||||
|
|
||||||
extern tree (*mprts_hook) (gimple_match_op *);
|
|
||||||
|
|
||||||
+bool gimple_extract_op (gimple *, gimple_match_op *);
|
|
||||||
bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *,
|
|
||||||
tree (*)(tree), tree (*)(tree));
|
|
||||||
tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,31 +0,0 @@
|
|||||||
From b57c55b282e7a9a7b2cc0d3843e58fd7998685e6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Fri, 4 Nov 2022 23:19:44 +0800
|
|
||||||
Subject: [PATCH 23/35] [PHIOPT] Disable the match A?CST1:0 when the CST1 is
|
|
||||||
negitive value
|
|
||||||
|
|
||||||
Fix the regression of gcc.target/aarch64/sve/vcond_3.c
|
|
||||||
|
|
||||||
gcc:
|
|
||||||
* match.pd (A?CST1:CST2): Disable the simplifcations A? (-CST1):0
|
|
||||||
---
|
|
||||||
gcc/match.pd | 3 ++-
|
|
||||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index 79a0228d2..fc1a34dd3 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -3347,7 +3347,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
(if (integer_onep (@1))
|
|
||||||
(convert (convert:boolean_type_node @0)))
|
|
||||||
/* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
|
|
||||||
- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (TREE_TYPE (@1))
|
|
||||||
+ && integer_pow2p (@1))
|
|
||||||
(with {
|
|
||||||
tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,89 +0,0 @@
|
|||||||
From 9af03694082c462bee86c167c78717089a93a188 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Sat, 5 Nov 2022 13:22:33 +0800
|
|
||||||
Subject: [PATCH 25/35] [PHIOPT] Add A ? B op CST : B match and simplify
|
|
||||||
optimizations
|
|
||||||
|
|
||||||
Refer to commit b6bdd7a4, use pattern match to simple
|
|
||||||
A ? B op CST : B (where CST is power of 2) simplifications.
|
|
||||||
Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue.
|
|
||||||
|
|
||||||
gcc/
|
|
||||||
* match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B
|
|
||||||
|
|
||||||
gcc/testsuite/
|
|
||||||
* gcc.dg/pr107190.c: New test.
|
|
||||||
---
|
|
||||||
gcc/match.pd | 21 +++++++++++++++++++++
|
|
||||||
gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++
|
|
||||||
2 files changed, 48 insertions(+)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/pr107190.c
|
|
||||||
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index fc1a34dd3..5c5b5f89e 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -3383,6 +3383,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#if GIMPLE
|
|
||||||
+(if (canonicalize_math_p ())
|
|
||||||
+/* These patterns are mostly used by PHIOPT to move some operations outside of
|
|
||||||
+ the if statements. They should be done late because it gives jump threading
|
|
||||||
+ and few other passes to reduce what is going on. */
|
|
||||||
+/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */
|
|
||||||
+ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate)
|
|
||||||
+ (simplify
|
|
||||||
+ (cond @0 (op:s @1 integer_pow2p@2) @1)
|
|
||||||
+ /* powerof2cst */
|
|
||||||
+ (if (INTEGRAL_TYPE_P (type))
|
|
||||||
+ (with {
|
|
||||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
|
||||||
+ }
|
|
||||||
+ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; })))
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+)
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
|
||||||
be extended. */
|
|
||||||
/* This pattern implements two kinds simplification:
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..235b2761a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/pr107190.c
|
|
||||||
@@ -0,0 +1,27 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
|
||||||
+
|
|
||||||
+# define BN_BITS4 32
|
|
||||||
+# define BN_MASK2 (0xffffffffffffffffL)
|
|
||||||
+# define BN_MASK2l (0xffffffffL)
|
|
||||||
+# define BN_MASK2h (0xffffffff00000000L)
|
|
||||||
+# define BN_MASK2h1 (0xffffffff80000000L)
|
|
||||||
+# define LBITS(a) ((a)&BN_MASK2l)
|
|
||||||
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
|
||||||
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
|
||||||
+
|
|
||||||
+unsigned int test_m(unsigned long in0, unsigned long in1) {
|
|
||||||
+ unsigned long m, m1, lt, ht, bl, bh;
|
|
||||||
+ lt = LBITS(in0);
|
|
||||||
+ ht = HBITS(in0);
|
|
||||||
+ bl = LBITS(in1);
|
|
||||||
+ bh = HBITS(in1);
|
|
||||||
+ m = bh * lt;
|
|
||||||
+ m1 = bl * ht;
|
|
||||||
+ ht = bh * ht;
|
|
||||||
+ m = (m + m1) & BN_MASK2;
|
|
||||||
+ if (m < m1) ht += L2HBITS((unsigned long)1);
|
|
||||||
+ return ht + m;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,130 +0,0 @@
|
|||||||
From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Wed, 9 Nov 2022 17:04:13 +0800
|
|
||||||
Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul
|
|
||||||
|
|
||||||
Merge the low part of series instructions into mul
|
|
||||||
|
|
||||||
gcc/
|
|
||||||
* match.pd: Add simplifcations for low part of mul
|
|
||||||
* common.opt: Add new option fmerge-mull enable with -O2
|
|
||||||
* opts.c: default_options_table
|
|
||||||
|
|
||||||
gcc/testsuite/
|
|
||||||
* g++.dg/tree-ssa/mull64.C: New test.
|
|
||||||
---
|
|
||||||
gcc/common.opt | 4 +++
|
|
||||||
gcc/match.pd | 27 ++++++++++++++++++++
|
|
||||||
gcc/opts.c | 1 +
|
|
||||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
|
|
||||||
4 files changed, 66 insertions(+)
|
|
||||||
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index ad147f7a9..6a7f66624 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -2069,6 +2069,10 @@ fmerge-debug-strings
|
|
||||||
Common Report Var(flag_merge_debug_strings) Init(1)
|
|
||||||
Attempt to merge identical debug strings across compilation units.
|
|
||||||
|
|
||||||
+fmerge-mull
|
|
||||||
+Common Report Var(flag_merge_mull) Init(0) Optimization
|
|
||||||
+Attempt to merge series instructions into mul.
|
|
||||||
+
|
|
||||||
fmessage-length=
|
|
||||||
Common RejectNegative Joined UInteger
|
|
||||||
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index 5c5b5f89e..f6c5befd7 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#if GIMPLE
|
|
||||||
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
|
|
||||||
+ simple IR. The following scenario should be matched:
|
|
||||||
+ In0Lo = In0(D) & 4294967295;
|
|
||||||
+ In0Hi = In0(D) >> 32;
|
|
||||||
+ In1Lo = In1(D) & 4294967295;
|
|
||||||
+ In1Hi = In1(D) >> 32;
|
|
||||||
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
|
|
||||||
+ addc32 = Addc << 32;
|
|
||||||
+ ResLo = In0Lo * In1Lo + addc32 */
|
|
||||||
+(simplify
|
|
||||||
+ (plus:c (mult @4 @5)
|
|
||||||
+ (lshift
|
|
||||||
+ (plus:c
|
|
||||||
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
|
|
||||||
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
|
|
||||||
+ INTEGER_CST@3
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
|
|
||||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
|
|
||||||
+ && TYPE_PRECISION (type) == 64)
|
|
||||||
+ (mult (convert:type @0) (convert:type @1))
|
|
||||||
+ )
|
|
||||||
+)
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
|
||||||
be extended. */
|
|
||||||
/* This pattern implements two kinds simplification:
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index f12b13599..751965e46 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] =
|
|
||||||
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
|
|
||||||
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
|
|
||||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
|
|
||||||
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
|
|
||||||
|
|
||||||
/* -O2 and above optimizations, but not -Os or -Og. */
|
|
||||||
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
|
|
||||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..2a3b74604
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
@@ -0,0 +1,34 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
|
|
||||||
+
|
|
||||||
+# define BN_BITS4 32
|
|
||||||
+# define BN_MASK2 (0xffffffffffffffffL)
|
|
||||||
+# define BN_MASK2l (0xffffffffL)
|
|
||||||
+# define BN_MASK2h (0xffffffff00000000L)
|
|
||||||
+# define BN_MASK2h1 (0xffffffff80000000L)
|
|
||||||
+# define LBITS(a) ((a)&BN_MASK2l)
|
|
||||||
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
|
||||||
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
|
||||||
+
|
|
||||||
+void mul64(unsigned long in0, unsigned long in1,
|
|
||||||
+ unsigned long &retLo, unsigned long &retHi) {
|
|
||||||
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
|
|
||||||
+ unsigned long Addc, addc32, low;
|
|
||||||
+ al = LBITS(in0);
|
|
||||||
+ ah = HBITS(in0);
|
|
||||||
+ bl = LBITS(in1);
|
|
||||||
+ bh = HBITS(in1);
|
|
||||||
+ m10 = bh * al;
|
|
||||||
+ m00 = bl * al;
|
|
||||||
+ m01 = bl * ah;
|
|
||||||
+ m11 = bh * ah;
|
|
||||||
+ Addc = (m10 + m01) & BN_MASK2;
|
|
||||||
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
|
|
||||||
+ m11 += HBITS(Addc);
|
|
||||||
+ addc32 = L2HBITS(Addc);
|
|
||||||
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
|
|
||||||
+ retLo = low;
|
|
||||||
+ retHi = m11;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,105 +0,0 @@
|
|||||||
From 315911bd3ae6f42366779e262ab76d9ed79359a0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Fri, 11 Nov 2022 11:30:37 +0800
|
|
||||||
Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh
|
|
||||||
|
|
||||||
Merge the high part of series instructions into umulh
|
|
||||||
|
|
||||||
gcc/
|
|
||||||
* match.pd: Add simplifcations for high part of umulh
|
|
||||||
|
|
||||||
gcc/testsuite/
|
|
||||||
* g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
|
|
||||||
---
|
|
||||||
gcc/match.pd | 56 ++++++++++++++++++++++++++
|
|
||||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
|
|
||||||
2 files changed, 59 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index f6c5befd7..433682afb 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#if GIMPLE
|
|
||||||
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
|
|
||||||
+ the if statements. They should be done late because it gives jump threading
|
|
||||||
+ and few other passes to reduce what is going on. */
|
|
||||||
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
|
|
||||||
+ integers to one 128-bit integer. Try to match the high part of mul pattern
|
|
||||||
+ after the low part of mul pattern is simplified. The following scenario
|
|
||||||
+ should be matched:
|
|
||||||
+ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
|
|
||||||
+ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
|
|
||||||
+ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
|
|
||||||
+ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
|
|
||||||
+ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
|
|
||||||
+ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
|
|
||||||
+ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
|
|
||||||
+ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
|
|
||||||
+ addc32 = Addc << 32; -- lshift@10 @9 @3
|
|
||||||
+ ResLo = In0(D) * In1(D); -- mult @0 @1
|
|
||||||
+ ResHi = ((long unsigned int) (addc32 > ResLo)) +
|
|
||||||
+ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
|
|
||||||
+ } */
|
|
||||||
+(simplify
|
|
||||||
+ (plus:c
|
|
||||||
+ (plus:c
|
|
||||||
+ (convert
|
|
||||||
+ (gt (lshift@10 @9 @3)
|
|
||||||
+ (mult:c @0 @1)))
|
|
||||||
+ (lshift
|
|
||||||
+ (convert
|
|
||||||
+ (gt @8 @9))
|
|
||||||
+ @3))
|
|
||||||
+ (plus:c@11
|
|
||||||
+ (rshift
|
|
||||||
+ (plus:c@9
|
|
||||||
+ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
|
|
||||||
+ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
|
|
||||||
+ @3)
|
|
||||||
+ (mult:c (rshift@5 SSA_NAME@0 @3)
|
|
||||||
+ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
|
|
||||||
+ )
|
|
||||||
+ )
|
|
||||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
|
|
||||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
|
|
||||||
+ && TYPE_PRECISION (type) == 64)
|
|
||||||
+ (with {
|
|
||||||
+ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
|
|
||||||
+ tree shift = build_int_cst (integer_type_node, 64);
|
|
||||||
+ }
|
|
||||||
+ (convert:type (rshift
|
|
||||||
+ (mult (convert:i128_type @0)
|
|
||||||
+ (convert:i128_type @1))
|
|
||||||
+ { shift; })))
|
|
||||||
+ )
|
|
||||||
+)
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
#if GIMPLE
|
|
||||||
/* These patterns are mostly used by FORWPROP1 to fold some operations into more
|
|
||||||
simple IR. The following scenario should be matched:
|
|
||||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
index 2a3b74604..f61cf5e6f 100644
|
|
||||||
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
|
|
||||||
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
|
||||||
|
|
||||||
# define BN_BITS4 32
|
|
||||||
# define BN_MASK2 (0xffffffffffffffffL)
|
|
||||||
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
|
|
||||||
retHi = m11;
|
|
||||||
}
|
|
||||||
|
|
||||||
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,38 +0,0 @@
|
|||||||
From b669b4512e8425f4d752ef76bf61097cf40d9b35 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zgat <1071107108@qq.com>
|
|
||||||
Date: Thu, 17 Nov 2022 02:55:48 +0000
|
|
||||||
Subject: [PATCH 28/35] [Struct Reorg] Fix speccpu2006 462 double free #I60YUV
|
|
||||||
modify gcc/tree.c. Normal operation speccpu 462 after modifed
|
|
||||||
|
|
||||||
Signed-off-by: zgat <1071107108@qq.com>
|
|
||||||
---
|
|
||||||
gcc/tree.c | 6 ++----
|
|
||||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
||||||
index 2a532d15a..a61788651 100644
|
|
||||||
--- a/gcc/tree.c
|
|
||||||
+++ b/gcc/tree.c
|
|
||||||
@@ -5224,8 +5224,7 @@ fld_simplified_type_name (tree type)
|
|
||||||
optimizations. */
|
|
||||||
if (flag_ipa_struct_reorg
|
|
||||||
&& lang_c_p ()
|
|
||||||
- && flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
- && (in_lto_p || flag_whole_program))
|
|
||||||
+ && flag_lto_partition == LTO_PARTITION_ONE)
|
|
||||||
return TYPE_NAME (type);
|
|
||||||
|
|
||||||
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
|
|
||||||
@@ -5471,8 +5470,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
|
|
||||||
optimizations. */
|
|
||||||
if (flag_ipa_struct_reorg
|
|
||||||
&& lang_c_p ()
|
|
||||||
- && flag_lto_partition == LTO_PARTITION_ONE
|
|
||||||
- && (in_lto_p || flag_whole_program))
|
|
||||||
+ && flag_lto_partition == LTO_PARTITION_ONE)
|
|
||||||
return t;
|
|
||||||
if (POINTER_TYPE_P (t))
|
|
||||||
return fld_incomplete_type_of (t, fld);
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,267 +0,0 @@
|
|||||||
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
|
|
||||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
||||||
Date: Mon, 8 Aug 2022 09:13:53 +0800
|
|
||||||
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
|
|
||||||
loops For vectorized stores in loop, if all succeed loops immediately use the
|
|
||||||
data, transfer data using registers instead of load store to prevent overhead
|
|
||||||
from memory access.
|
|
||||||
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
|
|
||||||
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
|
|
||||||
2 files changed, 226 insertions(+)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..d8b29fbd5
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
|
||||||
@@ -0,0 +1,45 @@
|
|
||||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+static unsigned inline abs2 (unsigned a)
|
|
||||||
+{
|
|
||||||
+ unsigned s = ((a>>15)&0x10001)*0xffff;
|
|
||||||
+ return (a+s)^s;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
|
|
||||||
+{
|
|
||||||
+ unsigned tmp[4][4];
|
|
||||||
+ unsigned a0, a1, a2, a3;
|
|
||||||
+ int sum = 0;
|
|
||||||
+ for (int i = 0; i < 4; i++)
|
|
||||||
+ {
|
|
||||||
+ int t0 = a00[i] + a11[i];
|
|
||||||
+ int t1 = a00[i] - a11[i];
|
|
||||||
+ int t2 = a22[i] + a33[i];
|
|
||||||
+ int t3 = a22[i] - a33[i];
|
|
||||||
+ tmp[i][0] = t0 + t2;
|
|
||||||
+ tmp[i][2] = t0 - t2;
|
|
||||||
+ tmp[i][1] = t1 + t3;
|
|
||||||
+ tmp[i][3] = t1 - t3;
|
|
||||||
+ }
|
|
||||||
+ for (int i = 0; i < 4; i++)
|
|
||||||
+ {
|
|
||||||
+ int t0 = tmp[0][i] + tmp[1][i];
|
|
||||||
+ int t1 = tmp[0][i] - tmp[1][i];
|
|
||||||
+ int t2 = tmp[2][i] + tmp[3][i];
|
|
||||||
+ int t3 = tmp[2][i] - tmp[3][i];
|
|
||||||
+ a0 = t0 + t2;
|
|
||||||
+ a2 = t0 - t2;
|
|
||||||
+ a1 = t1 + t3;
|
|
||||||
+ a3 = t1 - t3;
|
|
||||||
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
|
|
||||||
+ }
|
|
||||||
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
|
|
||||||
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
|
||||||
index 2c2197022..98b233718 100644
|
|
||||||
--- a/gcc/tree-vect-stmts.c
|
|
||||||
+++ b/gcc/tree-vect-stmts.c
|
|
||||||
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
|
|
||||||
return NULL_TREE;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
|
|
||||||
+ BB in DFS. */
|
|
||||||
+
|
|
||||||
+static unsigned
|
|
||||||
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
|
|
||||||
+{
|
|
||||||
+ unsigned num = 0;
|
|
||||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
|
|
||||||
+ !gsi_end_p (gsi); gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ gimple *stmt = gsi_stmt (gsi);
|
|
||||||
+ if (is_gimple_debug (stmt))
|
|
||||||
+ continue;
|
|
||||||
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
|
|
||||||
+ && !gimple_has_volatile_ops (stmt))
|
|
||||||
+ {
|
|
||||||
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
|
|
||||||
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
|
|
||||||
+ {
|
|
||||||
+ stmts.safe_push (stmt);
|
|
||||||
+ num++;
|
|
||||||
+ }
|
|
||||||
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
|
|
||||||
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
|
|
||||||
+ num++;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return num;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
|
|
||||||
+{
|
|
||||||
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
|
|
||||||
+ {
|
|
||||||
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
|
|
||||||
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
|
|
||||||
+ if (TREE_CODE (op1) != TREE_CODE (op2))
|
|
||||||
+ continue;
|
|
||||||
+ if (TREE_CODE (op1) == ADDR_EXPR)
|
|
||||||
+ {
|
|
||||||
+ op1 = TREE_OPERAND (op1, 0);
|
|
||||||
+ op2 = TREE_OPERAND (op2, 0);
|
|
||||||
+ }
|
|
||||||
+ enum tree_code code = TREE_CODE (op1);
|
|
||||||
+ switch (code)
|
|
||||||
+ {
|
|
||||||
+ case VAR_DECL:
|
|
||||||
+ if (DECL_NAME (op1) == DECL_NAME (op2)
|
|
||||||
+ && DR_IS_READ ((*datarefs)[ui]))
|
|
||||||
+ return true;
|
|
||||||
+ break;
|
|
||||||
+ case SSA_NAME:
|
|
||||||
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
|
|
||||||
+ && DR_IS_READ ((*datarefs)[ui]))
|
|
||||||
+ return true;
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
|
|
||||||
+ Otherwise, set false to SUCCESS. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
|
|
||||||
+ stmt_vec_info stmt_info, bool &success)
|
|
||||||
+{
|
|
||||||
+ if (stmt_info == NULL)
|
|
||||||
+ {
|
|
||||||
+ success = false;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
|
|
||||||
+ {
|
|
||||||
+ success = false;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ unsigned ui = 0;
|
|
||||||
+ gimple *candidate = NULL;
|
|
||||||
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
|
|
||||||
+ {
|
|
||||||
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ if (candidate->bb != candidate->bb->loop_father->header)
|
|
||||||
+ {
|
|
||||||
+ success = false;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ auto_vec<data_reference_p> datarefs;
|
|
||||||
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
|
|
||||||
+ candidate->bb, &datarefs);
|
|
||||||
+ if (res == chrec_dont_know)
|
|
||||||
+ {
|
|
||||||
+ success = false;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ success = false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Deep first search from present BB. If succeedor has load STMTS,
|
|
||||||
+ stop further searching. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
|
|
||||||
+ bool &success, vec<basic_block> &visited_bbs)
|
|
||||||
+{
|
|
||||||
+ if (bb == cfun->cfg->x_exit_block_ptr)
|
|
||||||
+ {
|
|
||||||
+ success = false;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ visited_bbs.safe_push (bb);
|
|
||||||
+ auto_vec<gimple *> stmts;
|
|
||||||
+ unsigned num = mem_refs_in_bb (bb, stmts);
|
|
||||||
+ /* Empty BB. */
|
|
||||||
+ if (num == 0)
|
|
||||||
+ {
|
|
||||||
+ edge e;
|
|
||||||
+ edge_iterator ei;
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ {
|
|
||||||
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
|
|
||||||
+ if (!success)
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ /* Non-empty BB. */
|
|
||||||
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* For grouped store, if all succeedors of present BB have vectorized load
|
|
||||||
+ from same base of store. If so, set memory_access_type using
|
|
||||||
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
|
|
||||||
+{
|
|
||||||
+ gimple *stmt = stmt_vinfo->stmt;
|
|
||||||
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ basic_block bb = stmt->bb;
|
|
||||||
+ bool success = true;
|
|
||||||
+ auto_vec<basic_block> visited_bbs;
|
|
||||||
+ visited_bbs.safe_push (bb);
|
|
||||||
+ edge e;
|
|
||||||
+ edge_iterator ei;
|
|
||||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
|
||||||
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
|
|
||||||
+ return success;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/* A subroutine of get_load_store_type, with a subset of the same
|
|
||||||
arguments. Handle the case where STMT_INFO is part of a grouped load
|
|
||||||
or store.
|
|
||||||
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
|
|
||||||
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
|
|
||||||
overrun_p = would_overrun_p;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
|
|
||||||
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
|
|
||||||
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
|
|
||||||
+ loop_vinfo->vectorization_factor)
|
|
||||||
+ && conti_perm (stmt_info, loop_vinfo)
|
|
||||||
+ && (vls_type == VLS_LOAD
|
|
||||||
+ ? vect_grouped_load_supported (vectype, single_element_p,
|
|
||||||
+ group_size)
|
|
||||||
+ : vect_grouped_store_supported (vectype, group_size)))
|
|
||||||
+ {
|
|
||||||
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
|
|
||||||
+ overrun_p = would_overrun_p;
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
/* As a last resort, trying using a gather load or scatter store.
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,826 +0,0 @@
|
|||||||
From ca2a541ed3425bec64f97fe277c6c02bf4f20049 Mon Sep 17 00:00:00 2001
|
|
||||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
||||||
Date: Thu, 27 Oct 2022 10:26:34 +0800
|
|
||||||
Subject: [PATCH 33/35] [Loop-distribution] Insert temp arrays built from
|
|
||||||
isomorphic stmts Use option -ftree-slp-transpose-vectorize Build temp arrays
|
|
||||||
for isomorphic stmt and regard them as new seed_stmts for loop distribution.
|
|
||||||
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c | 67 +++
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c | 17 +
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c | 19 +
|
|
||||||
gcc/tree-loop-distribution.c | 577 +++++++++++++++++++-
|
|
||||||
4 files changed, 663 insertions(+), 17 deletions(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..649463647
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
|
|
||||||
@@ -0,0 +1,67 @@
|
|
||||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-do run { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */
|
|
||||||
+
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+
|
|
||||||
+static unsigned inline abs2 (unsigned a)
|
|
||||||
+{
|
|
||||||
+ unsigned s = ((a>>15)&0x10001)*0xffff;
|
|
||||||
+ return (a+s)^s;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
|
|
||||||
+{
|
|
||||||
+ unsigned tmp[4][4];
|
|
||||||
+ unsigned a0, a1, a2, a3;
|
|
||||||
+ int sum = 0;
|
|
||||||
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
|
|
||||||
+ {
|
|
||||||
+ a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
|
|
||||||
+ a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
|
|
||||||
+ a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
|
|
||||||
+ a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
|
|
||||||
+ int t0 = a0 + a1;
|
|
||||||
+ int t1 = a0 - a1;
|
|
||||||
+ int t2 = a2 + a3;
|
|
||||||
+ int t3 = a2 - a3;
|
|
||||||
+ tmp[i][0] = t0 + t2;
|
|
||||||
+ tmp[i][2] = t0 - t2;
|
|
||||||
+ tmp[i][1] = t1 + t3;
|
|
||||||
+ tmp[i][3] = t1 - t3;
|
|
||||||
+ }
|
|
||||||
+ for (int i = 0; i < 4; i++)
|
|
||||||
+ {
|
|
||||||
+ int t0 = tmp[0][i] + tmp[1][i];
|
|
||||||
+ int t1 = tmp[0][i] - tmp[1][i];
|
|
||||||
+ int t2 = tmp[2][i] + tmp[3][i];
|
|
||||||
+ int t3 = tmp[2][i] - tmp[3][i];
|
|
||||||
+ a0 = t0 + t2;
|
|
||||||
+ a2 = t0 - t2;
|
|
||||||
+ a1 = t1 + t3;
|
|
||||||
+ a3 = t1 - t3;
|
|
||||||
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
|
|
||||||
+ }
|
|
||||||
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int main ()
|
|
||||||
+{
|
|
||||||
+ unsigned char oxa[128] = {0};
|
|
||||||
+ unsigned char oxb[128] = {0};
|
|
||||||
+ for (int i = 0; i < 128; i++)
|
|
||||||
+ {
|
|
||||||
+ oxa[i] += i * 3;
|
|
||||||
+ oxb[i] = i * 2;
|
|
||||||
+ }
|
|
||||||
+ int sum = foo (oxa, 16, oxb, 32);
|
|
||||||
+ if (sum != 736)
|
|
||||||
+ {
|
|
||||||
+ abort ();
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..1b50fd27d
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
|
|
||||||
@@ -0,0 +1,17 @@
|
|
||||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
|
|
||||||
+
|
|
||||||
+unsigned a0[4], a1[4], a2[4], a3[4];
|
|
||||||
+
|
|
||||||
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
|
|
||||||
+{
|
|
||||||
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
|
|
||||||
+ {
|
|
||||||
+ a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
|
|
||||||
+ a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
|
|
||||||
+ a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
|
|
||||||
+ a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..94b992b05
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
|
|
||||||
@@ -0,0 +1,19 @@
|
|
||||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
|
||||||
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
|
|
||||||
+
|
|
||||||
+unsigned a0[4], a1[4], a2[4], a3[4];
|
|
||||||
+
|
|
||||||
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
|
|
||||||
+{
|
|
||||||
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
|
|
||||||
+ {
|
|
||||||
+ a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1;
|
|
||||||
+ a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2;
|
|
||||||
+ a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3;
|
|
||||||
+ a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
|
|
||||||
\ No newline at end of file
|
|
||||||
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
|
|
||||||
index c08af6562..88b56379c 100644
|
|
||||||
--- a/gcc/tree-loop-distribution.c
|
|
||||||
+++ b/gcc/tree-loop-distribution.c
|
|
||||||
@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
| D(I) = A(I-1)*E
|
|
||||||
|ENDDO
|
|
||||||
|
|
||||||
+ If an unvectorizable loop has grouped loads, and calculations from grouped
|
|
||||||
+ loads are isomorphic, build temp arrays using stmts where isomorphic
|
|
||||||
+ calculations end. Afer distribution, the partition built from temp
|
|
||||||
+ arrays can be vectorized in pass SLP after loop unrolling. For example,
|
|
||||||
+
|
|
||||||
+ |DO I = 1, N
|
|
||||||
+ | A = FOO (ARG_1);
|
|
||||||
+ | B = FOO (ARG_2);
|
|
||||||
+ | C = BAR_0 (A);
|
|
||||||
+ | D = BAR_1 (B);
|
|
||||||
+ |ENDDO
|
|
||||||
+
|
|
||||||
+ is transformed to
|
|
||||||
+
|
|
||||||
+ |DO I = 1, N
|
|
||||||
+ | J = FOO (ARG_1);
|
|
||||||
+ | K = FOO (ARG_2);
|
|
||||||
+ | X[I] = J;
|
|
||||||
+ | Y[I] = K;
|
|
||||||
+ | A = X[I];
|
|
||||||
+ | B = Y[I];
|
|
||||||
+ | C = BAR_0 (A);
|
|
||||||
+ | D = BAR_1 (B);
|
|
||||||
+ |ENDDO
|
|
||||||
+
|
|
||||||
+ and is then distributed to
|
|
||||||
+
|
|
||||||
+ |DO I = 1, N
|
|
||||||
+ | J = FOO (ARG_1);
|
|
||||||
+ | K = FOO (ARG_2);
|
|
||||||
+ | X[I] = J;
|
|
||||||
+ | Y[I] = K;
|
|
||||||
+ |ENDDO
|
|
||||||
+
|
|
||||||
+ |DO I = 1, N
|
|
||||||
+ | A = X[I];
|
|
||||||
+ | B = Y[I];
|
|
||||||
+ | C = BAR_0 (A);
|
|
||||||
+ | D = BAR_1 (B);
|
|
||||||
+ |ENDDO
|
|
||||||
+
|
|
||||||
Loop distribution is the dual of loop fusion. It separates statements
|
|
||||||
of a loop (or loop nest) into multiple loops (or loop nests) with the
|
|
||||||
same loop header. The major goal is to separate statements which may
|
|
||||||
@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see
|
|
||||||
|
|
||||||
1) Seed partitions with specific type statements. For now we support
|
|
||||||
two types seed statements: statement defining variable used outside
|
|
||||||
- of loop; statement storing to memory.
|
|
||||||
+ of loop; statement storing to memory. Moreover, for unvectorizable
|
|
||||||
+ loops, we try to find isomorphic stmts from grouped load and build
|
|
||||||
+ temp arrays as new seed statements.
|
|
||||||
2) Build reduced dependence graph (RDG) for loop to be distributed.
|
|
||||||
The vertices (RDG:V) model all statements in the loop and the edges
|
|
||||||
(RDG:E) model flow and control dependencies between statements.
|
|
||||||
@@ -643,7 +686,8 @@ class loop_distribution
|
|
||||||
/* Returns true when PARTITION1 and PARTITION2 access the same memory
|
|
||||||
object in RDG. */
|
|
||||||
bool share_memory_accesses (struct graph *rdg,
|
|
||||||
- partition *partition1, partition *partition2);
|
|
||||||
+ partition *partition1, partition *partition2,
|
|
||||||
+ hash_set<tree> *excluded_arrays);
|
|
||||||
|
|
||||||
/* For each seed statement in STARTING_STMTS, this function builds
|
|
||||||
partition for it by adding depended statements according to RDG.
|
|
||||||
@@ -686,8 +730,9 @@ class loop_distribution
|
|
||||||
|
|
||||||
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
|
|
||||||
ALIAS_DDRS contains ddrs which need runtime alias check. */
|
|
||||||
- void finalize_partitions (class loop *loop, vec<struct partition *>
|
|
||||||
- *partitions, vec<ddr_p> *alias_ddrs);
|
|
||||||
+ void finalize_partitions (class loop *loop,
|
|
||||||
+ vec<struct partition *> *partitions,
|
|
||||||
+ vec<ddr_p> *alias_ddrs, bitmap producers);
|
|
||||||
|
|
||||||
/* Analyze loop form and if it's vectorizable to decide if we need to
|
|
||||||
insert temp arrays to distribute it. */
|
|
||||||
@@ -701,6 +746,28 @@ class loop_distribution
|
|
||||||
|
|
||||||
inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
|
|
||||||
control_dependences *cd);
|
|
||||||
+
|
|
||||||
+ /* If loop is not distributed, remove inserted temp arrays. */
|
|
||||||
+ void remove_insertion (loop_p loop, struct graph *flow_only_rdg,
|
|
||||||
+ bitmap producers, struct partition *partition);
|
|
||||||
+
|
|
||||||
+ /* Insert temp arrays if isomorphic computation exists. Temp arrays will be
|
|
||||||
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
|
|
||||||
+ bool insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
|
|
||||||
+ hash_set<tree> *tmp_array_vars, bitmap producers);
|
|
||||||
+
|
|
||||||
+ void build_producers (loop_p loop, bitmap producers,
|
|
||||||
+ vec<gimple *> &transformed);
|
|
||||||
+
|
|
||||||
+ void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv,
|
|
||||||
+ bitmap cut_points, hash_set <tree> *tmp_array_vars,
|
|
||||||
+ bitmap producers);
|
|
||||||
+
|
|
||||||
+ /* Fuse PARTITIONS built from inserted temp arrays into one partition,
|
|
||||||
+ fuse the rest into another. */
|
|
||||||
+ void merge_remaining_partitions (vec<struct partition *> *partitions,
|
|
||||||
+ bitmap producers);
|
|
||||||
+
|
|
||||||
/* Distributes the code from LOOP in such a way that producer statements
|
|
||||||
are placed before consumer statements. Tries to separate only the
|
|
||||||
statements from STMTS into separate loops. Returns the number of
|
|
||||||
@@ -1913,7 +1980,8 @@ loop_distribution::classify_partition (loop_p loop,
|
|
||||||
|
|
||||||
bool
|
|
||||||
loop_distribution::share_memory_accesses (struct graph *rdg,
|
|
||||||
- partition *partition1, partition *partition2)
|
|
||||||
+ partition *partition1, partition *partition2,
|
|
||||||
+ hash_set <tree> *excluded_arrays)
|
|
||||||
{
|
|
||||||
unsigned i, j;
|
|
||||||
bitmap_iterator bi, bj;
|
|
||||||
@@ -1947,7 +2015,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg,
|
|
||||||
if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
|
|
||||||
&& operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
|
|
||||||
&& operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
|
|
||||||
- && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
|
|
||||||
+ && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)
|
|
||||||
+ /* An exception, if PARTITION1 and PARTITION2 contain the
|
|
||||||
+ temp array we inserted, do not merge them. */
|
|
||||||
+ && !excluded_arrays->contains (DR_REF (dr1)))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -2909,13 +2980,47 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+void
|
|
||||||
+loop_distribution::merge_remaining_partitions
|
|
||||||
+ (vec<struct partition *> *partitions,
|
|
||||||
+ bitmap producers)
|
|
||||||
+{
|
|
||||||
+ struct partition *partition = NULL;
|
|
||||||
+ struct partition *p1 = NULL, *p2 = NULL;
|
|
||||||
+ for (unsigned i = 0; partitions->iterate (i, &partition); i++)
|
|
||||||
+ {
|
|
||||||
+ if (bitmap_intersect_p (producers, partition->stmts))
|
|
||||||
+ {
|
|
||||||
+ if (p1 == NULL)
|
|
||||||
+ {
|
|
||||||
+ p1 = partition;
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ partition_merge_into (NULL, p1, partition, FUSE_FINALIZE);
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ if (p2 == NULL)
|
|
||||||
+ {
|
|
||||||
+ p2 = partition;
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ partition_merge_into (NULL, p2, partition, FUSE_FINALIZE);
|
|
||||||
+ }
|
|
||||||
+ partitions->unordered_remove (i);
|
|
||||||
+ partition_free (partition);
|
|
||||||
+ i--;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void
|
|
||||||
loop_distribution::finalize_partitions (class loop *loop,
|
|
||||||
vec<struct partition *> *partitions,
|
|
||||||
- vec<ddr_p> *alias_ddrs)
|
|
||||||
+ vec<ddr_p> *alias_ddrs,
|
|
||||||
+ bitmap producers)
|
|
||||||
{
|
|
||||||
unsigned i;
|
|
||||||
- struct partition *partition, *a;
|
|
||||||
+ struct partition *partition;
|
|
||||||
|
|
||||||
if (partitions->length () == 1
|
|
||||||
|| alias_ddrs->length () > 0)
|
|
||||||
@@ -2947,13 +3052,7 @@ loop_distribution::finalize_partitions (class loop *loop,
|
|
||||||
|| (loop->inner == NULL
|
|
||||||
&& i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
|
|
||||||
{
|
|
||||||
- a = (*partitions)[0];
|
|
||||||
- for (i = 1; partitions->iterate (i, &partition); ++i)
|
|
||||||
- {
|
|
||||||
- partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
|
|
||||||
- partition_free (partition);
|
|
||||||
- }
|
|
||||||
- partitions->truncate (1);
|
|
||||||
+ merge_remaining_partitions (partitions, producers);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fuse memset builtins if possible. */
|
|
||||||
@@ -3758,6 +3857,404 @@ find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
|
|
||||||
return decide_stmts_by_profit (candi_stmts, stmts);
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index
|
|
||||||
+ and all indices are the same. */
|
|
||||||
+
|
|
||||||
+static tree
|
|
||||||
+find_index (vec<gimple *> seed_stmts)
|
|
||||||
+{
|
|
||||||
+ if (seed_stmts.length () == 0)
|
|
||||||
+ return NULL;
|
|
||||||
+ bool found_index = false;
|
|
||||||
+ tree index = NULL;
|
|
||||||
+ unsigned ui = 0;
|
|
||||||
+ for (ui = 0; ui < seed_stmts.length (); ui++)
|
|
||||||
+ {
|
|
||||||
+ if (!gimple_vdef (seed_stmts[ui]))
|
|
||||||
+ return NULL;
|
|
||||||
+ tree lhs = gimple_assign_lhs (seed_stmts[ui]);
|
|
||||||
+ unsigned num_index = 0;
|
|
||||||
+ while (TREE_CODE (lhs) == ARRAY_REF)
|
|
||||||
+ {
|
|
||||||
+ if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME)
|
|
||||||
+ {
|
|
||||||
+ num_index++;
|
|
||||||
+ if (num_index > 1)
|
|
||||||
+ return NULL;
|
|
||||||
+ if (index == NULL)
|
|
||||||
+ {
|
|
||||||
+ index = TREE_OPERAND (lhs, 1);
|
|
||||||
+ found_index = true;
|
|
||||||
+ }
|
|
||||||
+ else if (index != TREE_OPERAND (lhs, 1))
|
|
||||||
+ return NULL;
|
|
||||||
+ }
|
|
||||||
+ lhs = TREE_OPERAND (lhs, 0);
|
|
||||||
+ }
|
|
||||||
+ if (!found_index)
|
|
||||||
+ return NULL;
|
|
||||||
+ }
|
|
||||||
+ return index;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Check if expression of phi is an increament of a const. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc)
|
|
||||||
+{
|
|
||||||
+ struct graph_edge *e_phi;
|
|
||||||
+ for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next)
|
|
||||||
+ {
|
|
||||||
+ struct vertex *v_inc = &(rdg->vertices[e_phi->dest]);
|
|
||||||
+ if (!is_gimple_assign (RDGV_STMT (v_inc))
|
|
||||||
+ || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR)
|
|
||||||
+ continue;
|
|
||||||
+ tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc));
|
|
||||||
+ tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc));
|
|
||||||
+ if (!(integer_onep (rhs1) || integer_onep (rhs2)))
|
|
||||||
+ continue;
|
|
||||||
+ struct graph_edge *e_inc;
|
|
||||||
+ /* find cycle with only two vertices inc and phi: inc <--> phi. */
|
|
||||||
+ bool found_cycle = false;
|
|
||||||
+ for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next)
|
|
||||||
+ {
|
|
||||||
+ if (e_inc->dest == e_phi->src)
|
|
||||||
+ {
|
|
||||||
+ found_cycle = true;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ if (!found_cycle)
|
|
||||||
+ continue;
|
|
||||||
+ found_inc = true;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Check if phi satisfies form like PHI <0, i>. */
|
|
||||||
+
|
|
||||||
+static inline bool
|
|
||||||
+iv_check_phi_stmt (gimple *phi_stmt)
|
|
||||||
+{
|
|
||||||
+ return gimple_phi_num_args (phi_stmt) == 2
|
|
||||||
+ && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0))
|
|
||||||
+ || integer_zerop (gimple_phi_arg_def (phi_stmt, 1)));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Make sure the iteration varible is a phi. */
|
|
||||||
+
|
|
||||||
+static tree
|
|
||||||
+get_iv_from_seed (struct graph *flow_only_rdg, vec<gimple *> seed_stmts)
|
|
||||||
+{
|
|
||||||
+ tree index = find_index (seed_stmts);
|
|
||||||
+ if (index == NULL)
|
|
||||||
+ return NULL;
|
|
||||||
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
|
|
||||||
+ {
|
|
||||||
+ struct vertex *v = &(flow_only_rdg->vertices[i]);
|
|
||||||
+ if (RDGV_STMT (v) != seed_stmts[0])
|
|
||||||
+ continue;
|
|
||||||
+ struct graph_edge *e;
|
|
||||||
+ bool found_phi = false;
|
|
||||||
+ for (e = v->pred; e; e = e->pred_next)
|
|
||||||
+ {
|
|
||||||
+ struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]);
|
|
||||||
+ gimple *phi_stmt = RDGV_STMT (v_phi);
|
|
||||||
+ if (gimple_code (phi_stmt) != GIMPLE_PHI
|
|
||||||
+ || gimple_phi_result (phi_stmt) != index)
|
|
||||||
+ continue;
|
|
||||||
+ if (!iv_check_phi_stmt (phi_stmt))
|
|
||||||
+ return NULL;
|
|
||||||
+ /* find inc expr in succ of phi. */
|
|
||||||
+ bool found_inc = false;
|
|
||||||
+ check_phi_inc (v_phi, flow_only_rdg, found_inc);
|
|
||||||
+ if (!found_inc)
|
|
||||||
+ return NULL;
|
|
||||||
+ found_phi = true;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ if (!found_phi)
|
|
||||||
+ return NULL;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ return index;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in
|
|
||||||
+ FLOW_ONLY_RDG. */
|
|
||||||
+
|
|
||||||
+static bool
|
|
||||||
+check_no_dependency (struct graph *flow_only_rdg, bitmap root_map)
|
|
||||||
+{
|
|
||||||
+ bitmap_iterator bi;
|
|
||||||
+ unsigned ui;
|
|
||||||
+ auto_vec<unsigned, 16> visited_nodes;
|
|
||||||
+ auto_bitmap visited_map;
|
|
||||||
+ EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi)
|
|
||||||
+ visited_nodes.safe_push (ui);
|
|
||||||
+ for (ui = 0; ui < visited_nodes.length (); ui++)
|
|
||||||
+ {
|
|
||||||
+ struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]);
|
|
||||||
+ struct graph_edge *e;
|
|
||||||
+ for (e = v->succ; e; e = e->succ_next)
|
|
||||||
+ {
|
|
||||||
+ if (bitmap_bit_p (root_map, e->dest))
|
|
||||||
+ return false;
|
|
||||||
+ if (bitmap_bit_p (visited_map, e->dest))
|
|
||||||
+ continue;
|
|
||||||
+ visited_nodes.safe_push (e->dest);
|
|
||||||
+ bitmap_set_bit (visited_map, e->dest);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure
|
|
||||||
+ there is no dependency among those STMT we found. */
|
|
||||||
+
|
|
||||||
+static unsigned
|
|
||||||
+get_cut_points (struct graph *flow_only_rdg, bitmap cut_points,
|
|
||||||
+ loop_vec_info vinfo)
|
|
||||||
+{
|
|
||||||
+ unsigned n_stmts = 0;
|
|
||||||
+
|
|
||||||
+ /* STMTS that may be CUT_POINTS. */
|
|
||||||
+ auto_vec<gimple *> stmts;
|
|
||||||
+ if (!find_isomorphic_stmts (vinfo, stmts))
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "No temp array insertion: no isomorphic stmts"
|
|
||||||
+ " were found.\n");
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
|
|
||||||
+ {
|
|
||||||
+ if (stmts.contains (RDG_STMT (flow_only_rdg, i)))
|
|
||||||
+ bitmap_set_bit (cut_points, i);
|
|
||||||
+ }
|
|
||||||
+ n_stmts = bitmap_count_bits (cut_points);
|
|
||||||
+
|
|
||||||
+ bool succ = check_no_dependency (flow_only_rdg, cut_points);
|
|
||||||
+ if (!succ)
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "No temp array inserted: data dependency"
|
|
||||||
+ " among isomorphic stmts.\n");
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+ return n_stmts;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi,
|
|
||||||
+ poly_uint64 array_extent, tree iv,
|
|
||||||
+ hash_set<tree> *tmp_array_vars, vec<gimple *> *transformed)
|
|
||||||
+{
|
|
||||||
+ gimple *stmt = RDGV_STMT (v);
|
|
||||||
+ tree lhs = gimple_assign_lhs (stmt);
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "original stmt:\t");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
|
|
||||||
+ }
|
|
||||||
+ tree var_ssa = duplicate_ssa_name (lhs, stmt);
|
|
||||||
+ gimple_assign_set_lhs (stmt, var_ssa);
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "changed to:\t");
|
|
||||||
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
|
|
||||||
+ }
|
|
||||||
+ gimple_set_uid (gsi_stmt (gsi), -1);
|
|
||||||
+ tree vect_elt_type = TREE_TYPE (lhs);
|
|
||||||
+ tree array_type = build_array_type_nelts (vect_elt_type, array_extent);
|
|
||||||
+ tree array = create_tmp_var (array_type);
|
|
||||||
+ tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
|
|
||||||
+ tmp_array_vars->add (array_ssa);
|
|
||||||
+ gimple *store = gimple_build_assign (array_ssa, var_ssa);
|
|
||||||
+ tree new_vdef = make_ssa_name (gimple_vop (cfun), store);
|
|
||||||
+ gsi_insert_after (&gsi, store, GSI_NEW_STMT);
|
|
||||||
+ gimple_set_vdef (store, new_vdef);
|
|
||||||
+ transformed->safe_push (store);
|
|
||||||
+ gimple_set_uid (gsi_stmt (gsi), -1);
|
|
||||||
+ tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
|
|
||||||
+ tmp_array_vars->add (array_ssa2);
|
|
||||||
+ gimple *load = gimple_build_assign (lhs, array_ssa2);
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ {
|
|
||||||
+ fprintf (dump_file, "insert stmt:\t");
|
|
||||||
+ print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS);
|
|
||||||
+ fprintf (dump_file, " and stmt:\t");
|
|
||||||
+ print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS);
|
|
||||||
+ }
|
|
||||||
+ gimple_set_vuse (load, new_vdef);
|
|
||||||
+ gsi_insert_after (&gsi, load, GSI_NEW_STMT);
|
|
||||||
+ gimple_set_uid (gsi_stmt (gsi), -1);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Set bitmap PRODUCERS based on vec TRANSFORMED. */
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+loop_distribution::build_producers (loop_p loop, bitmap producers,
|
|
||||||
+ vec<gimple *> &transformed)
|
|
||||||
+{
|
|
||||||
+ auto_vec<gimple *, 10> stmts;
|
|
||||||
+ stmts_from_loop (loop, &stmts);
|
|
||||||
+ int i = 0;
|
|
||||||
+ gimple *stmt = NULL;
|
|
||||||
+
|
|
||||||
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
|
|
||||||
+ gimple_set_uid (stmt, i);
|
|
||||||
+ i = 0;
|
|
||||||
+ FOR_EACH_VEC_ELT (transformed, i, stmt)
|
|
||||||
+ bitmap_set_bit (producers, stmt->uid);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Transform stmt
|
|
||||||
+
|
|
||||||
+ A = FOO (ARG_1);
|
|
||||||
+
|
|
||||||
+ to
|
|
||||||
+
|
|
||||||
+ STMT_1: A1 = FOO (ARG_1);
|
|
||||||
+ STMT_2: X[I] = A1;
|
|
||||||
+ STMT_3: A = X[I];
|
|
||||||
+
|
|
||||||
+ Producer is STMT_2 who defines the temp array and consumer is
|
|
||||||
+ STMT_3 who uses the temp array. */
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg,
|
|
||||||
+ tree iv, bitmap cut_points,
|
|
||||||
+ hash_set<tree> *tmp_array_vars,
|
|
||||||
+ bitmap producers)
|
|
||||||
+{
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "=== do insertion ===\n");
|
|
||||||
+
|
|
||||||
+ auto_vec<gimple *> transformed;
|
|
||||||
+
|
|
||||||
+ /* Execution times of loop. */
|
|
||||||
+ poly_uint64 array_extent
|
|
||||||
+ = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1;
|
|
||||||
+
|
|
||||||
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
|
|
||||||
+ bb_top_order_cmp_r);
|
|
||||||
+
|
|
||||||
+ for (int i = 0; i < int (loop->num_nodes); i++)
|
|
||||||
+ {
|
|
||||||
+ basic_block bb = bbs[i];
|
|
||||||
+
|
|
||||||
+ /* Find all cut points in bb and transform them. */
|
|
||||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
|
||||||
+ gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ unsigned j = gimple_uid (gsi_stmt (gsi));
|
|
||||||
+ if (bitmap_bit_p (cut_points, j))
|
|
||||||
+ {
|
|
||||||
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
|
|
||||||
+ build_temp_array (v, gsi, array_extent, iv, tmp_array_vars,
|
|
||||||
+ &transformed);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ build_producers (loop, producers, transformed);
|
|
||||||
+ update_ssa (TODO_update_ssa);
|
|
||||||
+ free (bbs);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* After temp array insertion, given stmts
|
|
||||||
+ STMT_1: M = FOO (ARG_1);
|
|
||||||
+ STMT_2: X[I] = M;
|
|
||||||
+ STMT_3: A = X[I];
|
|
||||||
+ STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next.
|
|
||||||
+ Replace M with A, and remove STMT_2 and STMT_3. */
|
|
||||||
+
|
|
||||||
+static void
|
|
||||||
+reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition,
|
|
||||||
+ gimple_stmt_iterator &gsi, int j)
|
|
||||||
+{
|
|
||||||
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
|
|
||||||
+ gimple *stmt = RDGV_STMT (v);
|
|
||||||
+ gimple *prev = stmt->prev;
|
|
||||||
+ gimple *next = stmt->next;
|
|
||||||
+ tree n_lhs = gimple_assign_lhs (next);
|
|
||||||
+ gimple_assign_set_lhs (prev, n_lhs);
|
|
||||||
+ unlink_stmt_vdef (stmt);
|
|
||||||
+ if (partition)
|
|
||||||
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
|
|
||||||
+ gsi_remove (&gsi, true);
|
|
||||||
+ release_defs (stmt);
|
|
||||||
+ if (partition)
|
|
||||||
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
|
|
||||||
+ gsi_remove (&gsi, true);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg,
|
|
||||||
+ bitmap producers, struct partition *partition)
|
|
||||||
+{
|
|
||||||
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
|
|
||||||
+ bb_top_order_cmp_r);
|
|
||||||
+ for (int i = 0; i < int (loop->num_nodes); i++)
|
|
||||||
+ {
|
|
||||||
+ basic_block bb = bbs[i];
|
|
||||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
|
||||||
+ gsi_next (&gsi))
|
|
||||||
+ {
|
|
||||||
+ unsigned j = gimple_uid (gsi_stmt (gsi));
|
|
||||||
+ if (bitmap_bit_p (producers, j))
|
|
||||||
+ reset_gimple_assign (flow_only_rdg, partition, gsi, j);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ update_ssa (TODO_update_ssa);
|
|
||||||
+ free (bbs);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* Insert temp arrays if isomorphic computation exists. Temp arrays will be
|
|
||||||
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
|
|
||||||
+
|
|
||||||
+bool
|
|
||||||
+loop_distribution::insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
|
|
||||||
+ hash_set<tree> *tmp_array_vars, bitmap producers)
|
|
||||||
+{
|
|
||||||
+ struct graph *flow_only_rdg = build_rdg (loop, NULL);
|
|
||||||
+ gcc_checking_assert (flow_only_rdg != NULL);
|
|
||||||
+ tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts);
|
|
||||||
+ if (iv == NULL)
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "Loop %d no temp array insertion: failed to get"
|
|
||||||
+ " iteration variable.\n", loop->num);
|
|
||||||
+ free_rdg (flow_only_rdg);
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ auto_bitmap cut_points;
|
|
||||||
+ loop_vec_info vinfo = loop_vec_info_for_loop (loop);
|
|
||||||
+ unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo);
|
|
||||||
+ delete vinfo;
|
|
||||||
+ loop->aux = NULL;
|
|
||||||
+ if (n_cut_points == 0)
|
|
||||||
+ {
|
|
||||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
+ fprintf (dump_file, "Loop %d no temp array insertion: no cut points"
|
|
||||||
+ " found.\n", loop->num);
|
|
||||||
+ free_rdg (flow_only_rdg);
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers);
|
|
||||||
+ if (dump_enabled_p ())
|
|
||||||
+ {
|
|
||||||
+ dump_user_location_t loc = find_loop_location (loop);
|
|
||||||
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:"
|
|
||||||
+ " %d temp arrays inserted in Loop %d.\n",
|
|
||||||
+ n_cut_points, loop->num);
|
|
||||||
+ }
|
|
||||||
+ free_rdg (flow_only_rdg);
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static bool find_seed_stmts_for_distribution (class loop *, vec<gimple *> *);
|
|
||||||
+
|
|
||||||
/* Distributes the code from LOOP in such a way that producer statements
|
|
||||||
are placed before consumer statements. Tries to separate only the
|
|
||||||
statements from STMTS into separate loops. Returns the number of
|
|
||||||
@@ -3814,6 +4311,34 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize.
|
|
||||||
+ If LOOP has grouped loads, recursively find isomorphic stmts and insert
|
|
||||||
+ temp arrays, rebuild RDG and call find_seed_stmts_for_distribution
|
|
||||||
+ to replace STMTS. */
|
|
||||||
+
|
|
||||||
+ hash_set<tree> tmp_array_vars;
|
|
||||||
+
|
|
||||||
+ /* STMTs that define those inserted TMP_ARRAYs. */
|
|
||||||
+ auto_bitmap producers;
|
|
||||||
+
|
|
||||||
+ /* New SEED_STMTS after insertion. */
|
|
||||||
+ auto_vec<gimple *> work_list;
|
|
||||||
+ bool insert_success = false;
|
|
||||||
+ if (may_insert_temp_arrays (loop, rdg, cd))
|
|
||||||
+ {
|
|
||||||
+ if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers))
|
|
||||||
+ {
|
|
||||||
+ if (find_seed_stmts_for_distribution (loop, &work_list))
|
|
||||||
+ {
|
|
||||||
+ insert_success = true;
|
|
||||||
+ stmts = work_list;
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ remove_insertion (loop, rdg, producers, NULL);
|
|
||||||
+ rebuild_rdg (loop, rdg, cd);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
data_reference_p dref;
|
|
||||||
for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
|
|
||||||
dref->aux = (void *) (uintptr_t) i;
|
|
||||||
@@ -3894,7 +4419,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
|
||||||
for (int j = i + 1;
|
|
||||||
partitions.iterate (j, &partition); ++j)
|
|
||||||
{
|
|
||||||
- if (share_memory_accesses (rdg, into, partition))
|
|
||||||
+ if (share_memory_accesses (rdg, into, partition, &tmp_array_vars))
|
|
||||||
{
|
|
||||||
partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
|
|
||||||
partitions.unordered_remove (j);
|
|
||||||
@@ -3944,7 +4469,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- finalize_partitions (loop, &partitions, &alias_ddrs);
|
|
||||||
+ finalize_partitions (loop, &partitions, &alias_ddrs, producers);
|
|
||||||
|
|
||||||
/* If there is a reduction in all partitions make sure the last one
|
|
||||||
is not classified for builtin code generation. */
|
|
||||||
@@ -3962,6 +4487,24 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
|
||||||
}
|
|
||||||
|
|
||||||
nbp = partitions.length ();
|
|
||||||
+
|
|
||||||
+ /* If we have inserted TMP_ARRAYs but there is only one partition left in
|
|
||||||
+ the succeeding processes, remove those inserted TMP_ARRAYs back to the
|
|
||||||
+ original version. */
|
|
||||||
+
|
|
||||||
+ if (nbp == 1 && insert_success)
|
|
||||||
+ {
|
|
||||||
+ struct partition *partition = NULL;
|
|
||||||
+ partitions.iterate (0, &partition);
|
|
||||||
+ remove_insertion (loop, rdg, producers, partition);
|
|
||||||
+ if (dump_enabled_p ())
|
|
||||||
+ {
|
|
||||||
+ dump_user_location_t loc = find_loop_location (loop);
|
|
||||||
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:"
|
|
||||||
+ " unable to distribute loop %d.\n", loop->num);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
if (nbp == 0
|
|
||||||
|| (nbp == 1 && !partition_builtin_p (partitions[0]))
|
|
||||||
|| (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,206 +0,0 @@
|
|||||||
From 717782ec36469eb81650b07e8b5536281a59993d Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Tue, 29 Nov 2022 22:12:29 +0800
|
|
||||||
Subject: [PATCH 34/35] Revert "[Backport] tree-optimization/102880 - make
|
|
||||||
PHI-OPT recognize more CFGs"
|
|
||||||
|
|
||||||
This reverts commit 77398954ce517aa011b7a254c7aa2858521b2093.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 ---------
|
|
||||||
gcc/tree-ssa-phiopt.c | 73 +++++++++-------------
|
|
||||||
2 files changed, 29 insertions(+), 75 deletions(-)
|
|
||||||
delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
|
||||||
deleted file mode 100644
|
|
||||||
index 21aa66e38..000000000
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
|
||||||
+++ /dev/null
|
|
||||||
@@ -1,31 +0,0 @@
|
|
||||||
-/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
|
|
||||||
-
|
|
||||||
-int __GIMPLE (ssa,startwith("phiopt"))
|
|
||||||
-foo (int a, int b, int flag)
|
|
||||||
-{
|
|
||||||
- int res;
|
|
||||||
-
|
|
||||||
- __BB(2):
|
|
||||||
- if (flag_2(D) != 0)
|
|
||||||
- goto __BB6;
|
|
||||||
- else
|
|
||||||
- goto __BB4;
|
|
||||||
-
|
|
||||||
- __BB(4):
|
|
||||||
- if (a_3(D) > b_4(D))
|
|
||||||
- goto __BB7;
|
|
||||||
- else
|
|
||||||
- goto __BB6;
|
|
||||||
-
|
|
||||||
- __BB(6):
|
|
||||||
- goto __BB7;
|
|
||||||
-
|
|
||||||
- __BB(7):
|
|
||||||
- res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
|
|
||||||
- return res_1;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-/* We should be able to detect MAX despite the extra edge into
|
|
||||||
- the middle BB. */
|
|
||||||
-/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 079d29e74..21ac08145 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -219,6 +219,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
|
|
||||||
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
|
|
||||||
if (EDGE_COUNT (bb1->succs) == 0
|
|
||||||
+ || bb2 == NULL
|
|
||||||
|| EDGE_COUNT (bb2->succs) == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
@@ -278,14 +279,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
|| (e1->flags & EDGE_FALLTHRU) == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
+ /* Also make sure that bb1 only have one predecessor and that it
|
|
||||||
+ is bb. */
|
|
||||||
+ if (!single_pred_p (bb1)
|
|
||||||
+ || single_pred (bb1) != bb)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
if (do_store_elim)
|
|
||||||
{
|
|
||||||
- /* Also make sure that bb1 only have one predecessor and that it
|
|
||||||
- is bb. */
|
|
||||||
- if (!single_pred_p (bb1)
|
|
||||||
- || single_pred (bb1) != bb)
|
|
||||||
- continue;
|
|
||||||
-
|
|
||||||
/* bb1 is the middle block, bb2 the join block, bb the split block,
|
|
||||||
e1 the fallthrough edge from bb1 to bb2. We can't do the
|
|
||||||
optimization if the join block has more than two predecessors. */
|
|
||||||
@@ -330,11 +331,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
node. */
|
|
||||||
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
|
|
||||||
|
|
||||||
- gphi *newphi;
|
|
||||||
- if (single_pred_p (bb1)
|
|
||||||
- && (newphi = factor_out_conditional_conversion (e1, e2, phi,
|
|
||||||
- arg0, arg1,
|
|
||||||
- cond_stmt)))
|
|
||||||
+ gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
|
|
||||||
+ arg0, arg1,
|
|
||||||
+ cond_stmt);
|
|
||||||
+ if (newphi != NULL)
|
|
||||||
{
|
|
||||||
phi = newphi;
|
|
||||||
/* factor_out_conditional_conversion may create a new PHI in
|
|
||||||
@@ -355,14 +355,12 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (!early_p
|
|
||||||
- && single_pred_p (bb1)
|
|
||||||
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
|
||||||
phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
- else if (single_pred_p (bb1)
|
|
||||||
- && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
+ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -393,41 +391,35 @@ replace_phi_edge_with_variable (basic_block cond_block,
|
|
||||||
edge e, gphi *phi, tree new_tree)
|
|
||||||
{
|
|
||||||
basic_block bb = gimple_bb (phi);
|
|
||||||
+ basic_block block_to_remove;
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
|
|
||||||
/* Change the PHI argument to new. */
|
|
||||||
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
|
|
||||||
|
|
||||||
/* Remove the empty basic block. */
|
|
||||||
- edge edge_to_remove;
|
|
||||||
if (EDGE_SUCC (cond_block, 0)->dest == bb)
|
|
||||||
- edge_to_remove = EDGE_SUCC (cond_block, 1);
|
|
||||||
- else
|
|
||||||
- edge_to_remove = EDGE_SUCC (cond_block, 0);
|
|
||||||
- if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
|
|
||||||
{
|
|
||||||
- e->flags |= EDGE_FALLTHRU;
|
|
||||||
- e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
|
||||||
- e->probability = profile_probability::always ();
|
|
||||||
- delete_basic_block (edge_to_remove->dest);
|
|
||||||
-
|
|
||||||
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
|
||||||
- gsi = gsi_last_bb (cond_block);
|
|
||||||
- gsi_remove (&gsi, true);
|
|
||||||
+ EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
|
|
||||||
+ EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
|
||||||
+ EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
|
|
||||||
+
|
|
||||||
+ block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- /* If there are other edges into the middle block make
|
|
||||||
- CFG cleanup deal with the edge removal to avoid
|
|
||||||
- updating dominators here in a non-trivial way. */
|
|
||||||
- gcond *cond = as_a <gcond *> (last_stmt (cond_block));
|
|
||||||
- if (edge_to_remove->flags & EDGE_TRUE_VALUE)
|
|
||||||
- gimple_cond_make_false (cond);
|
|
||||||
- else
|
|
||||||
- gimple_cond_make_true (cond);
|
|
||||||
+ EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
|
|
||||||
+ EDGE_SUCC (cond_block, 1)->flags
|
|
||||||
+ &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
|
||||||
+ EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
|
|
||||||
+
|
|
||||||
+ block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
|
|
||||||
}
|
|
||||||
+ delete_basic_block (block_to_remove);
|
|
||||||
|
|
||||||
- statistics_counter_event (cfun, "Replace PHI with variable", 1);
|
|
||||||
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
|
||||||
+ gsi = gsi_last_bb (cond_block);
|
|
||||||
+ gsi_remove (&gsi, true);
|
|
||||||
|
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
||||||
fprintf (dump_file,
|
|
||||||
@@ -854,9 +846,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
allow it and move it once the transformation is done. */
|
|
||||||
if (!empty_block_p (middle_bb))
|
|
||||||
{
|
|
||||||
- if (!single_pred_p (middle_bb))
|
|
||||||
- return false;
|
|
||||||
-
|
|
||||||
stmt_to_move = last_and_only_stmt (middle_bb);
|
|
||||||
if (!stmt_to_move)
|
|
||||||
return false;
|
|
||||||
@@ -1236,11 +1225,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- if (!single_pred_p (middle_bb))
|
|
||||||
- return 0;
|
|
||||||
- statistics_counter_event (cfun, "Replace PHI with "
|
|
||||||
- "variable/value_replacement", 1);
|
|
||||||
-
|
|
||||||
/* Replace the PHI arguments with arg. */
|
|
||||||
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
|
|
||||||
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
|
|
||||||
@@ -1255,6 +1239,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Now optimize (x != 0) ? x + y : y to just x + y. */
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,64 +0,0 @@
|
|||||||
From fb86109ebb10cdb82e1e3ffa37bb7e770fb7c066 Mon Sep 17 00:00:00 2001
|
|
||||||
From: eastb233 <xiezhiheng@huawei.com>
|
|
||||||
Date: Wed, 7 Dec 2022 09:43:15 +0800
|
|
||||||
Subject: [PATCH] [MULL64] Disable mull64 transformation by default
|
|
||||||
|
|
||||||
This commit disables mull64 transformation by default since
|
|
||||||
it shows some runtime failure in workloads.
|
|
||||||
---
|
|
||||||
gcc/match.pd | 2 +-
|
|
||||||
gcc/opts.c | 1 -
|
|
||||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/pr107190.c | 2 +-
|
|
||||||
4 files changed, 3 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
|
||||||
index 433682afb..01f81b063 100644
|
|
||||||
--- a/gcc/match.pd
|
|
||||||
+++ b/gcc/match.pd
|
|
||||||
@@ -3393,7 +3393,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|
||||||
(simplify
|
|
||||||
(cond @0 (op:s @1 integer_pow2p@2) @1)
|
|
||||||
/* powerof2cst */
|
|
||||||
- (if (INTEGRAL_TYPE_P (type))
|
|
||||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type))
|
|
||||||
(with {
|
|
||||||
tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
|
||||||
}
|
|
||||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
|
||||||
index 751965e46..f12b13599 100644
|
|
||||||
--- a/gcc/opts.c
|
|
||||||
+++ b/gcc/opts.c
|
|
||||||
@@ -511,7 +511,6 @@ static const struct default_options default_options_table[] =
|
|
||||||
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
|
|
||||||
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
|
|
||||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
|
|
||||||
- { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
|
|
||||||
|
|
||||||
/* -O2 and above optimizations, but not -Os or -Og. */
|
|
||||||
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
|
|
||||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
index f61cf5e6f..cad891e62 100644
|
|
||||||
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
|
||||||
+/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
|
||||||
|
|
||||||
# define BN_BITS4 32
|
|
||||||
# define BN_MASK2 (0xffffffffffffffffL)
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
|
|
||||||
index 235b2761a..d1e72e5df 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/pr107190.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/pr107190.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
|
||||||
+/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
|
||||||
|
|
||||||
# define BN_BITS4 32
|
|
||||||
# define BN_MASK2 (0xffffffffffffffffL)
|
|
||||||
--
|
|
||||||
2.25.1
|
|
||||||
|
|
||||||
@ -1,58 +0,0 @@
|
|||||||
From d73cd8783ca930724def3e9909fc484ec15404f5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
||||||
Date: Mon, 19 Dec 2022 11:48:12 +0800
|
|
||||||
Subject: [PATCH 1/3] [loop-distribution] Bugfix for loop-distribution Add
|
|
||||||
exception in function BUILD_QUEUE when there is a null pointer in
|
|
||||||
grouped_loads.
|
|
||||||
|
|
||||||
---
|
|
||||||
gcc/tree-loop-distribution.c | 16 ++++++++++++++++
|
|
||||||
1 file changed, 16 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
|
|
||||||
index 88b56379c..b68b9c7eb 100644
|
|
||||||
--- a/gcc/tree-loop-distribution.c
|
|
||||||
+++ b/gcc/tree-loop-distribution.c
|
|
||||||
@@ -3208,16 +3208,31 @@ build_queue (loop_vec_info vinfo, unsigned vf,
|
|
||||||
{
|
|
||||||
unsigned group_size = stmt_info->size;
|
|
||||||
stmt_vec_info c_stmt_info = stmt_info;
|
|
||||||
+ bool succ = true;
|
|
||||||
while (group_size >= vf)
|
|
||||||
{
|
|
||||||
vec_alloc (worklist, vf);
|
|
||||||
for (unsigned j = 0; j < vf; ++j)
|
|
||||||
{
|
|
||||||
+ if (c_stmt_info == NULL)
|
|
||||||
+ {
|
|
||||||
+ succ = false;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
ginfo = new _group_info ();
|
|
||||||
ginfo->stmt = c_stmt_info->stmt;
|
|
||||||
worklist->safe_push (ginfo);
|
|
||||||
c_stmt_info = c_stmt_info->next_element;
|
|
||||||
}
|
|
||||||
+ if (!succ)
|
|
||||||
+ {
|
|
||||||
+ unsigned k = 0;
|
|
||||||
+ ginfo = NULL;
|
|
||||||
+ FOR_EACH_VEC_ELT (*worklist, k, ginfo)
|
|
||||||
+ delete ginfo;
|
|
||||||
+ vec_free (worklist);
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
worklists.safe_push (worklist);
|
|
||||||
group_size -= vf;
|
|
||||||
}
|
|
||||||
@@ -3711,6 +3726,7 @@ free_ginfos (vec<vec<group_info> *> &worklists)
|
|
||||||
unsigned j = 0;
|
|
||||||
FOR_EACH_VEC_ELT (*worklist, j, ginfo)
|
|
||||||
delete ginfo;
|
|
||||||
+ vec_free (worklist);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,26 +0,0 @@
|
|||||||
From b2b710238e13eb2fced77d89cd8dcc86f77b6c6c Mon Sep 17 00:00:00 2001
|
|
||||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
||||||
Date: Mon, 19 Dec 2022 15:12:24 +0800
|
|
||||||
Subject: [PATCH 2/3] [semi-relayout] Bugfix for struct semi-relayout Bugfix
|
|
||||||
when relayout candidate type is null.
|
|
||||||
|
|
||||||
---
|
|
||||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 2 ++
|
|
||||||
1 file changed, 2 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
index 4751711fe..2cac340c7 100644
|
|
||||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
|
||||||
@@ -6408,6 +6408,8 @@ ipa_struct_reorg::is_semi_relayout_candidate (tree xhs)
|
|
||||||
{
|
|
||||||
tree type = TREE_TYPE (mem);
|
|
||||||
srtype *old_type = get_relayout_candidate_type (type);
|
|
||||||
+ if (!old_type)
|
|
||||||
+ return false;
|
|
||||||
if (types_compatible_p (type, old_type->type)
|
|
||||||
&& old_type->semi_relayout)
|
|
||||||
return true;
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,55 +0,0 @@
|
|||||||
From ae15300352b0fa47a533af852f88e7244c2820cc Mon Sep 17 00:00:00 2001
|
|
||||||
From: Richard Biener <rguenther@suse.de>
|
|
||||||
Date: Tue, 29 Sep 2020 14:38:06 +0200
|
|
||||||
Subject: [PATCH 3/3] [Backport] tree-optimization/97238 - fix typo causing ICE
|
|
||||||
|
|
||||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=29aef377d814bd342dd5a306f99e0d614623ce0e
|
|
||||||
|
|
||||||
This fixes a typo causing a NULL dereference.
|
|
||||||
|
|
||||||
2020-09-29 Richard Biener <rguenther@suse.de>
|
|
||||||
|
|
||||||
PR tree-optimization/97238
|
|
||||||
* tree-ssa-reassoc.c (ovce_extract_ops): Fix typo.
|
|
||||||
|
|
||||||
* gcc.dg/pr97238.c: New testcase.
|
|
||||||
---
|
|
||||||
gcc/testsuite/gcc.dg/pr97238.c | 12 ++++++++++++
|
|
||||||
gcc/tree-ssa-reassoc.c | 2 +-
|
|
||||||
2 files changed, 13 insertions(+), 1 deletion(-)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/pr97238.c
|
|
||||||
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/pr97238.c b/gcc/testsuite/gcc.dg/pr97238.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..746e93a97
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/pr97238.c
|
|
||||||
@@ -0,0 +1,12 @@
|
|
||||||
+/* { dg-do compile } */
|
|
||||||
+/* { dg-options "-O -Wno-psabi -w" } */
|
|
||||||
+
|
|
||||||
+typedef int __attribute__ ((__vector_size__ (8))) V;
|
|
||||||
+int b, c, e;
|
|
||||||
+V d;
|
|
||||||
+
|
|
||||||
+V
|
|
||||||
+foo (void)
|
|
||||||
+{
|
|
||||||
+ return (b || e) | c > d | ((b || e) | c > d);
|
|
||||||
+}
|
|
||||||
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
|
|
||||||
index 5f978ac78..62e7c8dca 100644
|
|
||||||
--- a/gcc/tree-ssa-reassoc.c
|
|
||||||
+++ b/gcc/tree-ssa-reassoc.c
|
|
||||||
@@ -3853,7 +3853,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
|
|
||||||
return ERROR_MARK;
|
|
||||||
|
|
||||||
gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
|
|
||||||
- if (stmt == NULL
|
|
||||||
+ if (assign == NULL
|
|
||||||
|| TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
|
|
||||||
return ERROR_MARK;
|
|
||||||
|
|
||||||
--
|
|
||||||
2.27.0.windows.1
|
|
||||||
|
|
||||||
@ -1,25 +0,0 @@
|
|||||||
From d631be52d401d834261f86113b3a738014540b6c Mon Sep 17 00:00:00 2001
|
|
||||||
From: xiongzhou4 <xiongzhou4@huawei.com>
|
|
||||||
Date: Fri, 30 Dec 2022 20:15:11 +0800
|
|
||||||
Subject: [PATCH] Replace *vcond with vcond as we check for NULL pointer.
|
|
||||||
|
|
||||||
---
|
|
||||||
gcc/tree-ssa-reassoc.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
|
|
||||||
index 62e7c8dca..1ad43dba1 100644
|
|
||||||
--- a/gcc/tree-ssa-reassoc.c
|
|
||||||
+++ b/gcc/tree-ssa-reassoc.c
|
|
||||||
@@ -3839,7 +3839,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
|
|
||||||
gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
|
|
||||||
if (stmt == NULL)
|
|
||||||
return ERROR_MARK;
|
|
||||||
- if (*vcond)
|
|
||||||
+ if (vcond)
|
|
||||||
*vcond = stmt;
|
|
||||||
|
|
||||||
/* ??? If we start creating more COND_EXPR, we could perform
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,46 +0,0 @@
|
|||||||
From 655fcd3bad37d6d601c869a6471284cc8255e220 Mon Sep 17 00:00:00 2001
|
|
||||||
From: liyancheng <412998149@qq.com>
|
|
||||||
Date: Wed, 23 Nov 2022 21:12:51 +0800
|
|
||||||
Subject: [PATCH] [Backport] libsanitizer: cherry-pick 9cf13067cb5088626ba7
|
|
||||||
from upstream
|
|
||||||
|
|
||||||
[Reference] https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=2701442d0cf6292f6624443c15813d6d1a3562fe
|
|
||||||
|
|
||||||
9cf13067cb5088626ba7ee1ec4c42ec59c7995a0
|
|
||||||
[sanitizer] Remove #include <linux/fs.h> to resolve fsconfig_command/mount_attr conflict with glibc 2.36
|
|
||||||
---
|
|
||||||
.../sanitizer_platform_limits_posix.cpp | 10 ++++++----
|
|
||||||
1 file changed, 6 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
|
|
||||||
index aa845df4d..8b4162bcd 100644
|
|
||||||
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
|
|
||||||
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
|
|
||||||
@@ -72,7 +72,9 @@
|
|
||||||
#include <sys/vt.h>
|
|
||||||
#include <linux/cdrom.h>
|
|
||||||
#include <linux/fd.h>
|
|
||||||
+#if SANITIZER_ANDROID
|
|
||||||
#include <linux/fs.h>
|
|
||||||
+#endif
|
|
||||||
#include <linux/hdreg.h>
|
|
||||||
#include <linux/input.h>
|
|
||||||
#include <linux/ioctl.h>
|
|
||||||
@@ -833,10 +835,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
|
|
||||||
unsigned IOCTL_EVIOCGPROP = IOCTL_NOT_PRESENT;
|
|
||||||
unsigned IOCTL_EVIOCSKEYCODE_V2 = IOCTL_NOT_PRESENT;
|
|
||||||
#endif
|
|
||||||
- unsigned IOCTL_FS_IOC_GETFLAGS = FS_IOC_GETFLAGS;
|
|
||||||
- unsigned IOCTL_FS_IOC_GETVERSION = FS_IOC_GETVERSION;
|
|
||||||
- unsigned IOCTL_FS_IOC_SETFLAGS = FS_IOC_SETFLAGS;
|
|
||||||
- unsigned IOCTL_FS_IOC_SETVERSION = FS_IOC_SETVERSION;
|
|
||||||
+ unsigned IOCTL_FS_IOC_GETFLAGS = _IOR('f', 1, long);
|
|
||||||
+ unsigned IOCTL_FS_IOC_GETVERSION = _IOR('v', 1, long);
|
|
||||||
+ unsigned IOCTL_FS_IOC_SETFLAGS = _IOW('f', 2, long);
|
|
||||||
+ unsigned IOCTL_FS_IOC_SETVERSION = _IOW('v', 2, long);
|
|
||||||
unsigned IOCTL_GIO_CMAP = GIO_CMAP;
|
|
||||||
unsigned IOCTL_GIO_FONT = GIO_FONT;
|
|
||||||
unsigned IOCTL_GIO_UNIMAP = GIO_UNIMAP;
|
|
||||||
--
|
|
||||||
2.25.1
|
|
||||||
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
From a7c23eb36641d605df37f5942d188a764a2480f9 Mon Sep 17 00:00:00 2001
|
|
||||||
From: huitailangzju <804544223@qq.com>
|
|
||||||
Date: Tue, 14 Feb 2023 10:54:10 +0800
|
|
||||||
Subject: [PATCH] State --sysroot option as validated once processed
|
|
||||||
|
|
||||||
[Reference] https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8e86086bd33134467cc9c2a75327d1238dc71df9
|
|
||||||
|
|
||||||
Since we now save the option in the "switches" table
|
|
||||||
to let specs use it more generally, we need to explicitly
|
|
||||||
state that the option was validated else the driver
|
|
||||||
will consider it "unrecognized".
|
|
||||||
---
|
|
||||||
gcc/gcc.c | 1 +
|
|
||||||
1 file changed, 1 insertion(+)
|
|
||||||
|
|
||||||
diff --git a/gcc/gcc.c b/gcc/gcc.c
|
|
||||||
index 655beffcc..efa0b53ce 100644
|
|
||||||
--- a/gcc/gcc.c
|
|
||||||
+++ b/gcc/gcc.c
|
|
||||||
@@ -4193,6 +4193,7 @@ driver_handle_option (struct gcc_options *opts,
|
|
||||||
/* Saving this option is useful to let self-specs decide to
|
|
||||||
provide a default one. */
|
|
||||||
do_save = true;
|
|
||||||
+ validated = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OPT_time_:
|
|
||||||
--
|
|
||||||
2.28.0.windows.1
|
|
||||||
|
|
||||||
@ -1,129 +0,0 @@
|
|||||||
From bf537e82d452ee9b79f438df721c2e0dfaae12a0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Xiong Zhou <xiongzhou4@huawei.com>
|
|
||||||
Date: Fri, 5 May 2023 11:57:40 +0800
|
|
||||||
Subject: [PATCH 1/2] - bogus -Wstringop-overflow with VLA of elements larger
|
|
||||||
than byte
|
|
||||||
|
|
||||||
---
|
|
||||||
gcc/calls.c | 5 ++
|
|
||||||
gcc/testsuite/gcc.dg/Wstringop-overflow-67.c | 92 ++++++++++++++++++++
|
|
||||||
2 files changed, 97 insertions(+)
|
|
||||||
create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-67.c
|
|
||||||
|
|
||||||
diff --git a/gcc/calls.c b/gcc/calls.c
|
|
||||||
index 26894342c..45c137cee 100644
|
|
||||||
--- a/gcc/calls.c
|
|
||||||
+++ b/gcc/calls.c
|
|
||||||
@@ -2112,6 +2112,11 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp)
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
+ /* If the size cannot be determined clear it to keep it from
|
|
||||||
+ being taken as real (and excessive). */
|
|
||||||
+ if (objsize && integer_all_onesp (objsize))
|
|
||||||
+ objsize = NULL_TREE;
|
|
||||||
+
|
|
||||||
/* For read-only and read-write attributes also set the source
|
|
||||||
size. */
|
|
||||||
srcsize = objsize;
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-67.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-67.c
|
|
||||||
new file mode 100644
|
|
||||||
index 000000000..7b8f3f014
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-67.c
|
|
||||||
@@ -0,0 +1,92 @@
|
|
||||||
+/* PR middle-end/100571 - bogus -Wstringop-overflow with VLA of elements
|
|
||||||
+ larger than byte
|
|
||||||
+ { dg-do compile }
|
|
||||||
+ { dg-options "-O2 -Wall" } */
|
|
||||||
+
|
|
||||||
+__attribute__ ((access (read_only, 1, 2))) void fro (int *, int);
|
|
||||||
+__attribute__ ((access (write_only, 1, 2))) void fwo (int *, int);
|
|
||||||
+__attribute__ ((access (read_write, 1, 2))) void frw (int *, int);
|
|
||||||
+
|
|
||||||
+extern __SIZE_TYPE__ n;
|
|
||||||
+
|
|
||||||
+void alloca_ro (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_alloca (n * sizeof *a);
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ fro (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void alloca_wo (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_alloca (n * sizeof *a);
|
|
||||||
+ fwo (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void alloca_rw (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_alloca (n * sizeof *a);
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ frw (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+void calloc_ro (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_calloc (n, sizeof *a);
|
|
||||||
+ fro (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void calloc_wo (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_calloc (n, sizeof *a);
|
|
||||||
+ fwo (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void calloc_rw (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_calloc (n, sizeof *a);
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ frw (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+void malloc_ro (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_malloc (n * sizeof *a);
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ fro (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void malloc_wo (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_malloc (n * sizeof *a);
|
|
||||||
+ fwo (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void malloc_rw (void)
|
|
||||||
+{
|
|
||||||
+ int *a = __builtin_malloc (n * sizeof *a);
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ frw (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+void vla_ro (void)
|
|
||||||
+{
|
|
||||||
+ int a[n];
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ fro (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void vla_wo (void)
|
|
||||||
+{
|
|
||||||
+ int a[n];
|
|
||||||
+ fwo (a, n);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void vla_rw (void)
|
|
||||||
+{
|
|
||||||
+ int a[n];
|
|
||||||
+ a[0] = 0;
|
|
||||||
+ frw (a, n);
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
@ -1,183 +0,0 @@
|
|||||||
From bc6537191e91c854cc6bee3319290d7a86768957 Mon Sep 17 00:00:00 2001
|
|
||||||
From: zhongyunde <zhongyunde@huawei.com>
|
|
||||||
Date: Wed, 10 May 2023 18:39:47 +0800
|
|
||||||
Subject: [PATCH 2/2] [phiopt2] Add option to control the simplify
|
|
||||||
|
|
||||||
The phiopt is brought in https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791
|
|
||||||
But may be also has some bug fixed by later commit, so disable it default temporary.
|
|
||||||
This optimization is expected to enable after we update the gcc'base to gcc12's release version.
|
|
||||||
---
|
|
||||||
gcc/common.opt | 4 ++++
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/bool-1.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/bool-2.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr18134.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr21829.c | 2 +-
|
|
||||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 4 ++--
|
|
||||||
gcc/tree-ssa-phiopt.c | 3 +++
|
|
||||||
13 files changed, 19 insertions(+), 12 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
||||||
index be7bfee60..5ad2def18 100644
|
|
||||||
--- a/gcc/common.opt
|
|
||||||
+++ b/gcc/common.opt
|
|
||||||
@@ -2781,6 +2781,10 @@ ftree-store-ccp
|
|
||||||
Common Ignore
|
|
||||||
Does nothing. Preserved for backward compatibility.
|
|
||||||
|
|
||||||
+ftree-fold-phiopt
|
|
||||||
+Common Report Var(flag_fold_phiopt) Init(0) Optimization
|
|
||||||
+Attempt to simply the phi node with ssa form.
|
|
||||||
+
|
|
||||||
ftree-ch
|
|
||||||
Common Report Var(flag_tree_ch) Optimization
|
|
||||||
Enable loop header copying on trees.
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c
|
|
||||||
index 364ce6a69..b04316d55 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-phiopt2-details" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-phiopt2-details" } */
|
|
||||||
|
|
||||||
int t( int i)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c b/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c
|
|
||||||
index 401357f2f..892654108 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
int f(_Bool x)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c b/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c
|
|
||||||
index add9cca1e..5ead90f06 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
int f(_Bool x)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c
|
|
||||||
index 4c190e6af..7b678fafc 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
int nem1_phi (unsigned long a) { return a ? -1 : 0; }
|
|
||||||
int eqm1_phi (unsigned long a) { return a ? 0 : -1; }
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
|
||||||
index fd3706666..23b679644 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
|
||||||
@@ -1,6 +1,6 @@
|
|
||||||
/* PR tree-optimization/97690 */
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-options "-O2 -ftree-fold-phiopt -fdump-tree-phiopt2" } */
|
|
||||||
|
|
||||||
int foo (_Bool d) { return d ? 2 : 0; }
|
|
||||||
int bar (_Bool d) { return d ? 1 : 0; }
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c
|
|
||||||
index 3bdb85609..4efd9afc4 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
_Bool t();
|
|
||||||
_Bool t1();
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c
|
|
||||||
index 18ecbd52a..60dcc6733 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
int g(int,int);
|
|
||||||
int f(int t, int c)
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c
|
|
||||||
index 98c596b6a..aaa71a317 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O -fdump-tree-optimized -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-options "-O -ftree-fold-phiopt -fdump-tree-optimized -fdump-tree-phiopt2" } */
|
|
||||||
|
|
||||||
int g(int,int);
|
|
||||||
int f(int t, int c)
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c b/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c
|
|
||||||
index cd40ab2c1..efb1907cf 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
int foo (int a)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c b/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c
|
|
||||||
index 8f5ae5127..8c8ada905 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fdump-tree-optimized" } */
|
|
||||||
+/* { dg-options "-O2 -ftree-fold-phiopt -fdump-tree-optimized" } */
|
|
||||||
|
|
||||||
int test(int v)
|
|
||||||
{
|
|
||||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
|
||||||
index a2770e5e8..88c13806a 100644
|
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
|
||||||
@@ -1,9 +1,9 @@
|
|
||||||
/* PR tree-optimization/96928 */
|
|
||||||
/* { dg-do compile } */
|
|
||||||
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
|
||||||
+/* { dg-options "-O2 -ftree-fold-phiopt -fdump-tree-phiopt2 -fdump-tree-optimized" } */
|
|
||||||
/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
|
||||||
/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
|
||||||
-/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
|
||||||
+/* { dg-final { scan-tree-dump-times " = ~" 1 "optimized" } } */
|
|
||||||
/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
|
||||||
/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
|
||||||
|
|
||||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
|
||||||
index 51a2d3684..b7012932f 100644
|
|
||||||
--- a/gcc/tree-ssa-phiopt.c
|
|
||||||
+++ b/gcc/tree-ssa-phiopt.c
|
|
||||||
@@ -839,6 +839,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
tree result;
|
|
||||||
gimple *stmt_to_move = NULL;
|
|
||||||
|
|
||||||
+ if (!flag_fold_phiopt)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
/* Special case A ? B : B as this will always simplify to B. */
|
|
||||||
if (operand_equal_for_phi_arg_p (arg0, arg1))
|
|
||||||
return false;
|
|
||||||
--
|
|
||||||
2.33.0
|
|
||||||
|
|
||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user