!315 [Init] Init GCC 12.3.0 repository

From: @huang-xiaoquan 
Reviewed-by: @eastb233 
Signed-off-by: @eastb233
This commit is contained in:
openeuler-ci-bot 2023-07-12 01:21:06 +00:00 committed by Gitee
commit 577a463f57
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
93 changed files with 281 additions and 69282 deletions

View File

@ -0,0 +1,19 @@
From 73ee6351353b036f466ba1aab9a9e7d7865bf972 Mon Sep 17 00:00:00 2001
From: eastb233 <xiezhiheng@huawei.com>
Date: Tue, 11 Jul 2023 16:07:51 +0800
Subject: [PATCH] [Version] Set version to 12.3.1
---
gcc/BASE-VER | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/BASE-VER b/gcc/BASE-VER
index 4d23cb8e0..9c028e25d 100644
--- a/gcc/BASE-VER
+++ b/gcc/BASE-VER
@@ -1 +1 @@
-12.3.0
+12.3.1
--
2.33.0

View File

@ -1,473 +0,0 @@
From 85740d3cc56fda699beae689b5d73233d16097af Mon Sep 17 00:00:00 2001
From: bule <bule1@huawei.com>
Date: Thu, 8 Jul 2021 11:52:47 +0800
Subject: [PATCH 01/13] [libquadmath] Enable libquadmath on kunpeng
This enable libquadmath on kunpeng platform to convenient
users that migrating from x86 platform. libquadmath uses "__float128"
as quad precision floating point type and with math functions with "q"
suffix like "cosq". For those who do not need to adapt to x86 platform,
you can use "long double" as quad precision floating point type and math
functions with "l" suffix like "cosl" in libm for quad precision math.
diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in
index 8c011212258..66df9c922f8 100644
--- a/libquadmath/Makefile.in
+++ b/libquadmath/Makefile.in
@@ -90,7 +90,7 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
-@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
+#libquadmath_la_DEPENDENCIES =
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -147,68 +147,68 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
"$(DESTDIR)$(libsubincludedir)"
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
am__dirstamp = $(am__leading_dot)dirstamp
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \
-@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \
-@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \
-@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \
-@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo
+am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
+ math/acoshq.lo math/fmodq.lo \
+ math/acosq.lo math/frexpq.lo \
+ math/rem_pio2q.lo math/asinhq.lo \
+ math/hypotq.lo math/remainderq.lo \
+ math/asinq.lo math/rintq.lo \
+ math/atan2q.lo math/isinfq.lo \
+ math/roundq.lo math/atanhq.lo \
+ math/isnanq.lo math/scalblnq.lo \
+ math/atanq.lo math/j0q.lo \
+ math/scalbnq.lo math/cbrtq.lo \
+ math/j1q.lo math/signbitq.lo \
+ math/ceilq.lo math/jnq.lo \
+ math/sincos_table.lo math/complex.lo \
+ math/ldexpq.lo math/sincosq.lo \
+ math/copysignq.lo math/lgammaq.lo \
+ math/sincosq_kernel.lo math/coshq.lo \
+ math/llroundq.lo math/sinhq.lo \
+ math/cosq.lo math/log10q.lo \
+ math/sinq.lo math/cosq_kernel.lo \
+ math/log1pq.lo math/sinq_kernel.lo \
+ math/erfq.lo math/logq.lo \
+ math/sqrtq.lo math/expm1q.lo \
+ math/lroundq.lo math/tanhq.lo \
+ math/expq.lo math/modfq.lo \
+ math/tanq.lo math/fabsq.lo \
+ math/nanq.lo math/tgammaq.lo \
+ math/finiteq.lo math/nextafterq.lo \
+ math/truncq.lo math/floorq.lo \
+ math/powq.lo math/fmaq.lo \
+ math/logbq.lo math/exp2q.lo \
+ math/issignalingq.lo \
+ math/lgammaq_neg.lo \
+ math/lgammaq_product.lo \
+ math/tanq_kernel.lo \
+ math/tgammaq_product.lo \
+ math/casinhq_kernel.lo math/cacoshq.lo \
+ math/cacosq.lo math/casinhq.lo \
+ math/casinq.lo math/catanhq.lo \
+ math/catanq.lo math/cimagq.lo \
+ math/conjq.lo math/cprojq.lo \
+ math/crealq.lo math/fdimq.lo \
+ math/fmaxq.lo math/fminq.lo \
+ math/ilogbq.lo math/llrintq.lo \
+ math/log2q.lo math/lrintq.lo \
+ math/nearbyintq.lo math/remquoq.lo \
+ math/ccoshq.lo math/cexpq.lo \
+ math/clog10q.lo math/clogq.lo \
+ math/csinq.lo math/csinhq.lo \
+ math/csqrtq.lo math/ctanq.lo \
+ math/ctanhq.lo printf/addmul_1.lo \
+ printf/add_n.lo printf/cmp.lo \
+ printf/divrem.lo printf/flt1282mpn.lo \
+ printf/fpioconst.lo printf/lshift.lo \
+ printf/mul_1.lo printf/mul_n.lo \
+ printf/mul.lo printf/printf_fphex.lo \
+ printf/printf_fp.lo \
+ printf/quadmath-printf.lo \
+ printf/rshift.lo printf/submul_1.lo \
+ printf/sub_n.lo strtod/strtoflt128.lo \
+ strtod/mpn2flt128.lo \
+ strtod/tens_in_limb.lo
libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -218,8 +218,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
$(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \
$@
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \
-@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir)
+am_libquadmath_la_rpath = -rpath \
+ $(toolexeclibdir)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -337,7 +337,7 @@ CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
+DEFS = @DEFS@ -D__float128="long double"
DEPDIR = @DEPDIR@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
@@ -409,7 +409,7 @@ datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
-enable_shared = @enable_shared@
+enable_shared = yes
enable_static = @enable_static@
exec_prefix = @exec_prefix@
get_gcc_base_ver = @get_gcc_base_ver@
@@ -451,109 +451,109 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign info-in-builddir
-@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config
-@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include
-@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS)
-@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg =
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep =
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
-@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD =
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
-@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm
-
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
-@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
-@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \
-@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
-@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
-@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
-@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
-@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
+ACLOCAL_AMFLAGS = -I .. -I ../config
+AM_CPPFLAGS = -I $(top_srcdir)/../include
+AM_CFLAGS = $(XCFLAGS)
+gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
+@LIBQUAD_USE_SYMVER_FALSE@version_arg =
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
+@LIBQUAD_USE_SYMVER_FALSE@version_dep =
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
+toolexeclib_LTLIBRARIES = libquadmath.la
+libquadmath_la_LIBADD =
+libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
+ $(version_arg) $(lt_host_flags) -lm
+
+libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
+nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
+libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
+libquadmath_la_SOURCES = \
+ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
+ math/acosq.c math/frexpq.c \
+ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
+ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
+ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
+ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
+ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
+ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
+ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
+ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
+ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
+ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
+ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
+ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
+ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
+ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
+ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
+ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
+ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
+ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
+ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
+ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
+ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
+ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
+ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
+ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
+ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
# Work around what appears to be a GNU make bug handling MAKEFLAGS
# values defined in terms of make variables, as is the case for CC and
# friends when we are called from the top level Makefile.
-@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \
-@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
-@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
-@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \
-@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
-@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \
-@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \
-@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \
-@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \
-@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \
-@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \
-@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \
-@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \
-@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \
-@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \
-@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
-@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
-@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \
-@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)"
+AM_MAKEFLAGS = \
+ "AR_FLAGS=$(AR_FLAGS)" \
+ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
+ "CFLAGS=$(CFLAGS)" \
+ "CXXFLAGS=$(CXXFLAGS)" \
+ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
+ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
+ "INSTALL=$(INSTALL)" \
+ "INSTALL_DATA=$(INSTALL_DATA)" \
+ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
+ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
+ "JC1FLAGS=$(JC1FLAGS)" \
+ "LDFLAGS=$(LDFLAGS)" \
+ "LIBCFLAGS=$(LIBCFLAGS)" \
+ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
+ "MAKE=$(MAKE)" \
+ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
+ "PICFLAG=$(PICFLAG)" \
+ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
+ "SHELL=$(SHELL)" \
+ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
+ "exec_prefix=$(exec_prefix)" \
+ "infodir=$(infodir)" \
+ "libdir=$(libdir)" \
+ "prefix=$(prefix)" \
+ "includedir=$(includedir)" \
+ "AR=$(AR)" \
+ "AS=$(AS)" \
+ "CC=$(CC)" \
+ "CXX=$(CXX)" \
+ "LD=$(LD)" \
+ "LIBCFLAGS=$(LIBCFLAGS)" \
+ "NM=$(NM)" \
+ "PICFLAG=$(PICFLAG)" \
+ "RANLIB=$(RANLIB)" \
+ "DESTDIR=$(DESTDIR)"
# Subdir rules rely on $(FLAGS_TO_PASS)
-@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS)
-@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES =
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC =
+FLAGS_TO_PASS = $(AM_MAKEFLAGS)
+MAKEOVERRIDES =
+@GENINSRC_FALSE@STAMP_GENINSRC =
# AM_CONDITIONAL on configure option --generated-files-in-srcdir
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
-@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
-@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO =
+@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
+ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
+@BUILD_INFO_FALSE@STAMP_BUILD_INFO =
# AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO])
-@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info
-@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
-@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
+@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info
+CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
+MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
# Automake Documentation:
# If your package has Texinfo files in many directories, you can use the
@@ -564,8 +564,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo.tex
# Defines info, dvi, pdf and html targets
MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include
-@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS =
-@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi
+info_TEXINFOS =
+info_TEXINFOS = libquadmath.texi
libquadmath_TEXINFOS = libquadmath-vers.texi
MULTISRCTOP =
MULTIBUILDTOP =
@@ -1187,6 +1187,7 @@ distclean-tags:
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
check-am: all-am
check: check-am
+#all-local
all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \
all-local
installdirs:
@@ -1425,22 +1426,22 @@ uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \
.PRECIOUS: Makefile
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
-
-@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info
-@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
-
-@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
-@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
+
+stamp-geninsrc: libquadmath.info
+ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
+ @touch $@
+
+stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
+ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
+ @touch $@
all-local: $(ALL_LOCAL_DEPS)
diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h
index 81eb957d2fa..faa5977cbc9 100644
--- a/libquadmath/quadmath.h
+++ b/libquadmath/quadmath.h
@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */
extern "C" {
#endif
+#ifdef AARCH64_QUADMATH
+typedef long double __float128;
+#endif
/* Define the complex type corresponding to __float128
("_Complex __float128" is not allowed) */
#if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
@@ -160,10 +163,9 @@ extern int quadmath_snprintf (char *str, size_t size,
#define FLT128_MAX_10_EXP 4932
-#define HUGE_VALQ __builtin_huge_valq()
/* The following alternative is valid, but brings the warning:
(floating constant exceeds range of __float128) */
-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
+ #define HUGE_VALQ (__extension__ 0x1.0p32767Q)
#define M_Eq 2.718281828459045235360287471352662498Q /* e */
#define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */
--
2.21.0.windows.1

View File

@ -1,318 +0,0 @@
From d1e1ec0cd539f96be5a86b369b8c20b36ce9567f Mon Sep 17 00:00:00 2001
From: yangyang <yangyang305@huawei.com>
Date: Thu, 8 Jul 2021 14:38:39 +0800
Subject: [PATCH 02/13] [Backport] cselim: Extend to check non-trapping for
more references
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=54ecfb182bc32140722022c1d9818dee4bdc0e45
If there is a dominating store, a store to the same reference can not be
trapped. But previously, it only supports such check on MEM_REFs.
So this patch extends it to support ARRAY_REFs and COMPONENT_REFs.
This patch also supports a special case: if there is a dominating load of
local variable without address escape, a store is not trapped, as local
stack is always writable. Other loads are ignored for simplicity, as they
don't help to check if a store can be trapped (the memory may be read-only).
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
index ce242ba569b..8ee1850ac63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
return a[0]+a[1];
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
index 90ae36bfce2..9b96875ac7a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
return a[0]+a[1];
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
index c633cbe947d..b2d04119381 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -13,4 +13,4 @@ int test(int b, int k) {
return a.data[0] + a.data[1];
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
index 7cad563128d..8d3c4f7cc6a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -16,4 +16,4 @@ int test(int b, int k) {
return a.data[0].x + a.data[1].x;
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
new file mode 100644
index 00000000000..c35a2afc70b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+typedef union {
+ int i;
+ float f;
+} U;
+
+int foo(U *u, int b, int i)
+{
+ u->i = 0;
+ if (b)
+ u->i = i;
+ return u->i;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
new file mode 100644
index 00000000000..f9e66aefb13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int *t;
+
+int f1 (int tt)
+{
+ int *t1 = t;
+ *t1 = -5;
+ if (*t1 < tt)
+ *((unsigned *) t1) = 5;
+ return *t1;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
index 09313716598..a06f339f0bb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
typedef union {
int i;
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index b1e0dce93d8..3b5b6907679 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1986,26 +1986,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
??? We currently are very conservative and assume that a load might
trap even if a store doesn't (write-only memory). This probably is
- overly conservative. */
+ overly conservative.
-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
- through it was seen, which would constitute a no-trap region for
- same accesses. */
-struct name_to_bb
+ We currently support a special case that for !TREE_ADDRESSABLE automatic
+ variables, it could ignore whether something is a load or store because the
+ local stack should be always writable. */
+
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
+ basic block an *_REF through it was seen, which would constitute a
+ no-trap region for same accesses.
+
+ Size is needed to support 2 MEM_REFs of different types, like
+ MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with
+ OEP_ADDRESS_OF. */
+struct ref_to_bb
{
- unsigned int ssa_name_ver;
+ tree exp;
+ HOST_WIDE_INT size;
unsigned int phase;
- bool store;
- HOST_WIDE_INT offset, size;
basic_block bb;
};
/* Hashtable helpers. */
-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+struct refs_hasher : free_ptr_hash<ref_to_bb>
{
- static inline hashval_t hash (const name_to_bb *);
- static inline bool equal (const name_to_bb *, const name_to_bb *);
+ static inline hashval_t hash (const ref_to_bb *);
+ static inline bool equal (const ref_to_bb *, const ref_to_bb *);
};
/* Used for quick clearing of the hash-table when we see calls.
@@ -2015,28 +2022,29 @@ static unsigned int nt_call_phase;
/* The hash function. */
inline hashval_t
-ssa_names_hasher::hash (const name_to_bb *n)
+refs_hasher::hash (const ref_to_bb *n)
{
- return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
- ^ (n->offset << 6) ^ (n->size << 3);
+ inchash::hash hstate;
+ inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF);
+ hstate.add_hwi (n->size);
+ return hstate.end ();
}
/* The equality function of *P1 and *P2. */
inline bool
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
{
- return n1->ssa_name_ver == n2->ssa_name_ver
- && n1->store == n2->store
- && n1->offset == n2->offset
- && n1->size == n2->size;
+ return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF)
+ && n1->size == n2->size;
}
class nontrapping_dom_walker : public dom_walker
{
public:
nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
- : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
+ : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
+ {}
virtual edge before_dom_children (basic_block);
virtual void after_dom_children (basic_block);
@@ -2053,7 +2061,7 @@ private:
hash_set<tree> *m_nontrapping;
/* The hash table for remembering what we've seen. */
- hash_table<ssa_names_hasher> m_seen_ssa_names;
+ hash_table<refs_hasher> m_seen_refs;
};
/* Called by walk_dominator_tree, when entering the block BB. */
@@ -2102,65 +2110,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)
}
/* We see the expression EXP in basic block BB. If it's an interesting
- expression (an MEM_REF through an SSA_NAME) possibly insert the
- expression into the set NONTRAP or the hash table of seen expressions.
- STORE is true if this expression is on the LHS, otherwise it's on
- the RHS. */
+ expression of:
+ 1) MEM_REF
+ 2) ARRAY_REF
+ 3) COMPONENT_REF
+ possibly insert the expression into the set NONTRAP or the hash table
+ of seen expressions. STORE is true if this expression is on the LHS,
+ otherwise it's on the RHS. */
void
nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
{
HOST_WIDE_INT size;
- if (TREE_CODE (exp) == MEM_REF
- && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
- && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
+ if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
+ || TREE_CODE (exp) == COMPONENT_REF)
&& (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
{
- tree name = TREE_OPERAND (exp, 0);
- struct name_to_bb map;
- name_to_bb **slot;
- struct name_to_bb *n2bb;
+ struct ref_to_bb map;
+ ref_to_bb **slot;
+ struct ref_to_bb *r2bb;
basic_block found_bb = 0;
- /* Try to find the last seen MEM_REF through the same
- SSA_NAME, which can trap. */
- map.ssa_name_ver = SSA_NAME_VERSION (name);
- map.phase = 0;
- map.bb = 0;
- map.store = store;
- map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
- map.size = size;
+ if (!store)
+ {
+ tree base = get_base_address (exp);
+ /* Only record a LOAD of a local variable without address-taken, as
+ the local stack is always writable. This allows cselim on a STORE
+ with a dominating LOAD. */
+ if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+ return;
+ }
- slot = m_seen_ssa_names.find_slot (&map, INSERT);
- n2bb = *slot;
- if (n2bb && n2bb->phase >= nt_call_phase)
- found_bb = n2bb->bb;
+ /* Try to find the last seen *_REF, which can trap. */
+ map.exp = exp;
+ map.size = size;
+ slot = m_seen_refs.find_slot (&map, INSERT);
+ r2bb = *slot;
+ if (r2bb && r2bb->phase >= nt_call_phase)
+ found_bb = r2bb->bb;
- /* If we've found a trapping MEM_REF, _and_ it dominates EXP
- (it's in a basic block on the path from us to the dominator root)
+ /* If we've found a trapping *_REF, _and_ it dominates EXP
+ (it's in a basic block on the path from us to the dominator root)
then we can't trap. */
if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
{
m_nontrapping->add (exp);
}
else
- {
+ {
/* EXP might trap, so insert it into the hash table. */
- if (n2bb)
+ if (r2bb)
{
- n2bb->phase = nt_call_phase;
- n2bb->bb = bb;
+ r2bb->phase = nt_call_phase;
+ r2bb->bb = bb;
}
else
{
- n2bb = XNEW (struct name_to_bb);
- n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
- n2bb->phase = nt_call_phase;
- n2bb->bb = bb;
- n2bb->store = store;
- n2bb->offset = map.offset;
- n2bb->size = size;
- *slot = n2bb;
+ r2bb = XNEW (struct ref_to_bb);
+ r2bb->phase = nt_call_phase;
+ r2bb->bb = bb;
+ r2bb->exp = exp;
+ r2bb->size = size;
+ *slot = r2bb;
}
}
}
--
2.21.0.windows.1

View File

@ -1,31 +0,0 @@
From 309f459021a3681d728e5cf644a288ecf2b95175 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Mon, 12 Jul 2021 09:42:11 +0800
Subject: [PATCH 03/13] [version] Set version to 10.3.1
Set version to 10.3.1 and clear DATESTAMP_s.
diff --git a/gcc/BASE-VER b/gcc/BASE-VER
index 0719d810258..a9368325816 100644
--- a/gcc/BASE-VER
+++ b/gcc/BASE-VER
@@ -1 +1 @@
-10.3.0
+10.3.1
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 646db219460..fdc2857d44a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -885,8 +885,7 @@ PATCHLEVEL_c := \
# significant - do not remove it.
BASEVER_s := "\"$(BASEVER_c)\""
DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\""
-DATESTAMP_s := \
- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
+DATESTAMP_s := "\"\""
PKGVERSION_s:= "\"@PKGVERSION@\""
BUGURL_s := "\"@REPORT_BUGS_TO@\""
--
2.21.0.windows.1

View File

@ -1,138 +0,0 @@
From bdb0f40cea4aa1a92ead381b645363ae0571c065 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Mon, 12 Jul 2021 10:36:15 +0800
Subject: [PATCH 04/13] [Backport]tree-optimization: Avoid issueing loads in SM
when possible
Reference:https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9e1ea10e657af9fb02fafecf1a600740fd34409
Currently store-motion emits a load of the value in the loop
preheader even when the original loop does not contain any read
of the reference. This avoids doing this. In the conditional
store-motion case we need to mark the sunk stores with no-warning
since the control dependence is too tricky to figure out for
the uninit warning.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
new file mode 100755
index 00000000000..884f905148f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */
+
+void foo(int *);
+void f2(int dst[3], int R)
+{
+ int i, inter[2];
+
+ for (i = 1; i < R; i++) {
+ if (i & 8)
+ {
+ inter[0] = 1;
+ inter[1] = 1;
+ }
+ }
+
+ foo(inter);
+}
+
+/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */
+/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index abd5f702b91..b3fd1647fbd 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -127,6 +127,8 @@ public:
bitmap stored; /* The set of loops in that this memory location
is stored to. */
+ bitmap loaded; /* The set of loops in that this memory location
+ is loaded from. */
vec<mem_ref_loc> accesses_in_loop;
/* The locations of the accesses. Vector
indexed by the loop number. */
@@ -1395,6 +1397,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
ref->ref_decomposed = false;
ref->hash = hash;
ref->stored = NULL;
+ ref->loaded = NULL;
bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
ref->accesses_in_loop.create (1);
@@ -1435,6 +1438,27 @@ mark_ref_stored (im_mem_ref *ref, class loop *loop)
loop = loop_outer (loop);
}
+/* Set the LOOP bit in REF loaded bitmap and allocate that if
+ necessary. Return whether a bit was changed. */
+
+static bool
+set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
+{
+ if (!ref->loaded)
+ ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
+ return bitmap_set_bit (ref->loaded, loop->num);
+}
+
+/* Marks reference REF as loaded in LOOP. */
+
+static void
+mark_ref_loaded (im_mem_ref *ref, class loop *loop)
+{
+ while (loop != current_loops->tree_root
+ && set_ref_loaded_in_loop (ref, loop))
+ loop = loop_outer (loop);
+}
+
/* Gathers memory references in statement STMT in LOOP, storing the
information about them in the memory_accesses structure. Marks
the vops accessed through unrecognized statements there as
@@ -1571,6 +1595,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
mark_ref_stored (ref, loop);
}
+ else
+ mark_ref_loaded (ref, loop);
init_lim_data (stmt)->ref = ref->id;
return;
}
@@ -1968,6 +1994,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
gsi = gsi_start_bb (then_bb);
/* Insert actual store. */
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
+ /* Make sure to not warn about maybe-uninit uses of tmp_var here. */
+ gimple_set_no_warning (stmt, true);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
edge e1 = single_succ_edge (new_bb);
@@ -2115,14 +2143,17 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
by move_computations after all dependencies. */
gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
- /* FIXME/TODO: For the multi-threaded variant, we could avoid this
- load altogether, since the store is predicated by a flag. We
- could, do the load only if it was originally in the loop. */
- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
- lim_data = init_lim_data (load);
- lim_data->max_loop = loop;
- lim_data->tgt_loop = loop;
- gsi_insert_before (&gsi, load, GSI_SAME_STMT);
+ /* Avoid doing a load if there was no load of the ref in the loop.
+ Esp. when the ref is not always stored we cannot optimize it
+ away later. */
+ if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
+ {
+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+ lim_data = init_lim_data (load);
+ lim_data->max_loop = loop;
+ lim_data->tgt_loop = loop;
+ gsi_insert_before (&gsi, load, GSI_SAME_STMT);
+ }
if (multi_threaded_model_p)
{
--
2.21.0.windows.1

View File

@ -1,66 +0,0 @@
From dc238e97a75835231939e77e8568ccd9bc5187d5 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Mon, 12 Jul 2021 10:46:16 +0800
Subject: [PATCH 05/13] [Backport]tree-optimization: Fix load eliding in SM
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0424a5ece5307cc22bbc0fe97edf4707d7a798ed
This fixes the case of not using the multithreaded model when
only conditionally storing to the destination. We cannot elide
the load in this case.
diff --git a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c
new file mode 100755
index 00000000000..6182d77b3cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr94949.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fallow-store-data-races" } */
+
+static int x = 1;
+static volatile int y = -1;
+int
+main()
+{
+ for (int i = 0; i < 128; ++i)
+ {
+ if (i == y)
+ x = i;
+ }
+ if (x != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index b3fd1647fbd..8c33735b1fa 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2128,9 +2128,9 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
fmt_data.orig_loop = loop;
for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
+ bool always_stored = ref_always_accessed_p (loop, ref, true);
if (bb_in_transaction (loop_preheader_edge (loop)->src)
- || (! flag_store_data_races
- && ! ref_always_accessed_p (loop, ref, true)))
+ || (! flag_store_data_races && ! always_stored))
multi_threaded_model_p = true;
if (multi_threaded_model_p)
@@ -2145,8 +2145,10 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
/* Avoid doing a load if there was no load of the ref in the loop.
Esp. when the ref is not always stored we cannot optimize it
- away later. */
- if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
+ away later. But when it is not always stored we must use a conditional
+ store then. */
+ if ((!always_stored && !multi_threaded_model_p)
+ || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
{
load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
lim_data = init_lim_data (load);
--
2.21.0.windows.1

View File

@ -1,289 +0,0 @@
From cfd6920125f7968f0c1f5cb225f9fbd5bc8988b9 Mon Sep 17 00:00:00 2001
From: bule <bule1@huawei.com>
Date: Tue, 13 Jul 2021 15:26:54 +0800
Subject: [PATCH 06/13] [simdmath] Enable simdmath on kunpeng
This enable simd math function supported by libmathlib on fortran/c/c++.
Use -fsimdmath to turn on the generation of simdmath function. The
supported functions can be found in simdmath.h. Add more simd declaration
if you need more kinds of math functions. -msimdmath-64 is used to turn
on 64-bit simd math functions which is not supported by libmathlib.
Therefore, this option is default to off.
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index c51d6d34726..dc1a8984871 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -780,6 +780,10 @@ c_common_post_options (const char **pfilename)
if (cpp_opts->deps.style == DEPS_NONE)
check_deps_environment_vars ();
+ if (flag_simdmath)
+ {
+ defer_opt (OPT_include, "simdmath.h");
+ }
handle_deferred_opts ();
sanitize_cpp_opts ();
diff --git a/gcc/common.opt b/gcc/common.opt
index ec5235c3a41..8eb05570418 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1977,6 +1977,10 @@ fmath-errno
Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined
Set errno after built-in math functions.
+fsimdmath
+Common Report Var(flag_simdmath) Init(0) Optimization
+Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd.
+
fmax-errors=
Common Joined RejectNegative UInteger Var(flag_max_errors)
-fmax-errors=<number> Maximum number of errors to report.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9b400c49ac6..79dc8f186f4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23077,8 +23077,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
if (clonei->simdlen == 0)
{
- count = 2;
- vec_bits = (num == 0 ? 64 : 128);
+ /* Currently mathlib or sleef hasn't provide function for V2SF mode
+ simdclone of single precision functions. (e.g._ZCVnN2v_expf)
+ Therefore this mode is disabled by default to avoid link error.
+ Use -msimdmath-64 option to enable this mode. */
+ count = flag_simdmath_64 ? 2 : 1;
+ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128);
clonei->simdlen = vec_bits / elt_bits;
}
else
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 1b3d942e0f5..4539156d6f4 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for
single precision and to 32 bits for double precision.
If enabled, it implies -mlow-precision-recip-sqrt.
+msimdmath-64
+Target Var(flag_simdmath_64) Optimization
+Allow compiler to generate V2SF 64 bits simdclone of math functions,
+which is not currently supported in mathlib or sleef.
+Therefore this option is disabled by default.
+
mlow-precision-div
Target Var(flag_mlow_precision_div) Optimization
Enable the division approximation. Enabling this reduces
diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c
index 6f93508f934..42fd5a8be1e 100644
--- a/gcc/fortran/scanner.c
+++ b/gcc/fortran/scanner.c
@@ -2737,6 +2737,10 @@ gfc_new_file (void)
&& !load_file (flag_pre_include, NULL, false))
exit (FATAL_EXIT_CODE);
+ if (flag_simdmath
+ && !load_file ("simdmath_f.h", NULL, false))
+ exit (FATAL_EXIT_CODE);
+
if (gfc_cpp_enabled ())
{
result = gfc_cpp_preprocess (gfc_source_file);
diff --git a/gcc/opts.c b/gcc/opts.c
index 73162528938..e31aa560564 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -189,6 +189,7 @@ static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed.")
typedef char *char_p; /* For DEF_VEC_P. */
+static void set_simdmath_flags (struct gcc_options *opts, int set);
static void set_debug_level (enum debug_info_type type, int extended,
const char *arg, struct gcc_options *opts,
struct gcc_options *opts_set,
@@ -2469,6 +2470,10 @@ common_handle_option (struct gcc_options *opts,
dc->min_margin_width = value;
break;
+ case OPT_fsimdmath:
+ set_simdmath_flags (opts, value);
+ break;
+
case OPT_fdump_:
/* Deferred. */
break;
@@ -2847,6 +2852,18 @@ common_handle_option (struct gcc_options *opts,
return true;
}
+/* The following routines are used to set -fno-math-errno and -fopenmp-simd
+ to enable vector mathlib. */
+static void
+set_simdmath_flags (struct gcc_options *opts, int set)
+{
+ if (set)
+ {
+ opts->x_flag_errno_math = 0;
+ opts->x_flag_openmp_simd = 1;
+ }
+}
+
/* Used to set the level of strict aliasing warnings in OPTS,
when no level is specified (i.e., when -Wstrict-aliasing, and not
-Wstrict-aliasing=level was given).
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 669b9e4defd..0d9cc96481c 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -74,10 +74,10 @@ libgomp_la_SOURCES += openacc.f90
endif
nodist_noinst_HEADERS = libgomp_f.h
-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
+nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
if USE_FORTRAN
nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
- openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
+ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
endif
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index ae5d9d54705..dd4b334895e 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -148,7 +148,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
CONFIG_HEADER = config.h
-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \
+CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \
libgomp.spec
CONFIG_CLEAN_VPATH_FILES =
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
@@ -609,9 +609,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS)
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static
nodist_noinst_HEADERS = libgomp_f.h
-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
+nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
-@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
+@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -702,6 +702,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in
cd $(top_builddir) && $(SHELL) ./config.status $@
omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in
cd $(top_builddir) && $(SHELL) ./config.status $@
+simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in
+ cd $(top_builddir) && $(SHELL) ./config.status $@
+simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in
+ cd $(top_builddir) && $(SHELL) ./config.status $@
omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in
cd $(top_builddir) && $(SHELL) ./config.status $@
libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in
diff --git a/libgomp/configure b/libgomp/configure
index 5240f7e9d39..b03036c2738 100644
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -17050,7 +17050,7 @@ fi
-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h"
+ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h"
ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
@@ -18205,6 +18205,8 @@ do
"libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
"omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;;
"omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;;
+ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;;
+ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;;
"omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;;
"libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;;
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
index ef5d293c31e..569c2065a66 100644
--- a/libgomp/configure.ac
+++ b/libgomp/configure.ac
@@ -433,7 +433,7 @@ CFLAGS="$save_CFLAGS"
# Determine what GCC version number to use in filesystem paths.
GCC_BASE_VER
-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
+AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h)
AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp])
diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in
new file mode 100644
index 00000000000..ab91a4ec317
--- /dev/null
+++ b/libgomp/simdmath.h.in
@@ -0,0 +1,40 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#pragma omp declare simd simdlen(2) notinbranch
+double cos (double x);
+
+#pragma omp declare simd simdlen(4) notinbranch
+float cosf (float x);
+
+#pragma omp declare simd simdlen(2) notinbranch
+double sin (double x);
+
+#pragma omp declare simd simdlen(4) notinbranch
+float sinf (float x);
+
+#pragma omp declare simd simdlen(2) notinbranch
+double exp (double x);
+
+#pragma omp declare simd simdlen(4) notinbranch
+float expf (float x);
+
+#pragma omp declare simd simdlen(2) notinbranch
+double log (double x);
+
+#pragma omp declare simd simdlen(4) notinbranch
+float logf (float x);
+
+#pragma omp declare simd simdlen(2) notinbranch
+double pow (double x, double y);
+
+#pragma omp declare simd simdlen(4) notinbranch
+float powf (float x, float y);
+
+#pragma omp declare simd simdlen(4) notinbranch
+float exp2f (float x);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in
new file mode 100644
index 00000000000..550595015db
--- /dev/null
+++ b/libgomp/simdmath_f.h.in
@@ -0,0 +1,11 @@
+!GCC$ builtin (cos) attributes simd (notinbranch)
+!GCC$ builtin (cosf) attributes simd (notinbranch)
+!GCC$ builtin (sin) attributes simd (notinbranch)
+!GCC$ builtin (sinf) attributes simd (notinbranch)
+!GCC$ builtin (exp) attributes simd (notinbranch)
+!GCC$ builtin (expf) attributes simd (notinbranch)
+!GCC$ builtin (exp2f) attributes simd (notinbranch)
+!GCC$ builtin (log) attributes simd (notinbranch)
+!GCC$ builtin (logf) attributes simd (notinbranch)
+!GCC$ builtin (pow) attributes simd (notinbranch)
+!GCC$ builtin (powf) attributes simd (notinbranch)
--
2.21.0.windows.1

View File

@ -1,68 +0,0 @@
From 07033bcc5b9e4c03846cd84b4587cd493fcf7d53 Mon Sep 17 00:00:00 2001
From: zhoukaipeng <zhoukaipeng3@huawei.com>
Date: Wed, 14 Jul 2021 11:24:06 +0800
Subject: [PATCH 07/13] [Vect] Enable skipping vectorization on reduction
chains
Sometimes either vectorization on reduction chains or reductions is
possible. But the latter is better. The option "-ftree-vect-analyze
-slp-group" skips the former.
diff --git a/gcc/common.opt b/gcc/common.opt
index 8eb05570418..55d4eb5a351 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2968,6 +2968,10 @@ ftree-slp-vectorize
Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
Enable basic block vectorization (SLP) on trees.
+ftree-vect-analyze-slp-group
+Common Report Var(flag_tree_slp_group) Init(0)
+Disable SLP vectorization for reduction chain on tree.
+
fvect-cost-model=
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
-fvect-cost-model=[unlimited|dynamic|cheap] Specifies the cost model for vectorization.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
new file mode 100644
index 00000000000..913f1ef28df
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */
+void f(double *a, double *res, double m) {
+ double res1, res0;
+ res1 = 0;
+ res0 = 0;
+ for (int i = 0; i < 1000; i+=8) {
+ res0 += a[i] * m;
+ res1 += a[i+1] * m;
+ res0 += a[i+2] * m;
+ res1 += a[i+3] * m;
+ res0 += a[i+4] * m;
+ res1 += a[i+5] * m;
+ res0 += a[i+6] * m;
+ res1 += a[i+7] * m;
+ }
+ res[0] += res0;
+ res[1] += res1;
+}
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index adc579ff544..476b3237054 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2480,7 +2480,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
{
/* Find SLP sequences starting from reduction chains. */
FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
- if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+ if (flag_tree_slp_group
+ || ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
max_tree_size))
{
/* Dissolve reduction chain group. */
--
2.21.0.windows.1

View File

@ -1,97 +0,0 @@
From 79d1ed2d7f166a498662f6111a4defc55f0061c7 Mon Sep 17 00:00:00 2001
From: yangyang <yangyang305@huawei.com>
Date: Thu, 15 Jul 2021 09:27:27 +0800
Subject: [PATCH 08/13] [Backport]tree-optimization: Add checks to avoid
spoiling if-conversion
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=33d114f570b4a3583421c700396fd5945acebc28
Add some checks in pass_splits_paths, so that pass_split_paths can recognize
the missed if-conversion opportunity and do not duplicate the corresponding
block.
diff --git a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c
index b3efd43c7ef..9c32da76369 100644
--- a/gcc/gimple-ssa-split-paths.c
+++ b/gcc/gimple-ssa-split-paths.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-ssa.h"
#include "tree-phinodes.h"
#include "ssa-iterators.h"
+#include "fold-const.h"
/* Given LATCH, the latch block in a loop, see if the shape of the
path reaching LATCH is suitable for being split by duplication.
@@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb)
}
}
+ /* Canonicalize the form. */
+ if (single_pred_p (pred1) && single_pred (pred1) == pred2
+ && num_stmts_in_pred1 == 0)
+ std::swap (pred1, pred2);
+
+ /* This is meant to catch another kind of cases that are likely opportunities
+ for if-conversion. After canonicalizing, PRED2 must be an empty block and
+ PRED1 must be the only predecessor of PRED2. Moreover, PRED1 is supposed
+ to end with a cond_stmt which has the same args with the PHI in BB. */
+ if (single_pred_p (pred2) && single_pred (pred2) == pred1
+ && num_stmts_in_pred2 == 0)
+ {
+ gimple *cond_stmt = last_stmt (pred1);
+ if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND)
+ {
+ tree lhs = gimple_cond_lhs (cond_stmt);
+ tree rhs = gimple_cond_rhs (cond_stmt);
+
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *phi = gsi_stmt (gsi);
+ if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs)
+ && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs))
+ || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs)
+ && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs))))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Block %d appears to be optimized to a join "
+ "point for if-convertable half-diamond.\n",
+ bb->index);
+ return false;
+ }
+ }
+ }
+ }
+
/* If the joiner has no PHIs with useful uses there is zero chance
of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
bool found_useful_phi = false;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
new file mode 100644
index 00000000000..19a130d9bf1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */
+
+double
+foo(double *d1, double *d2, double *d3, int num, double *ip)
+{
+ double dmax[3];
+
+ for (int i = 0; i < num; i++) {
+ dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i];
+ dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i];
+ dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i];
+ ip[i] = dmax[2];
+ }
+
+ return dmax[0] + dmax[1] + dmax[2];
+}
+
+/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
--
2.21.0.windows.1

View File

@ -1,141 +0,0 @@
From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Thu, 15 Jul 2021 09:04:55 +0800
Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when
expanding a copy [PR95254]
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
In rtl expand, if we have a copy that matches one of the following patterns:
(set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
(set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
(set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
(set (subreg:M1 (reg:M2 ...)) (constant C))
where mode M1 is equal in size to M2, try to detect whether the mode change
involves an implicit round trip through memory. If so, see if we can avoid
that by removing the subregs and doing the move in mode M2 instead.
diff --git a/gcc/expr.c b/gcc/expr.c
index 991b26f3341..d66fdd4e93d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y)
gcc_assert (mode != BLKmode
&& (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ /* If we have a copy that looks like one of the following patterns:
+ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
+ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (constant C))
+ where mode M1 is equal in size to M2, try to detect whether the
+ mode change involves an implicit round trip through memory.
+ If so, see if we can avoid that by removing the subregs and
+ doing the move in mode M2 instead. */
+
+ rtx x_inner = NULL_RTX;
+ rtx y_inner = NULL_RTX;
+
+#define CANDIDATE_SUBREG_P(subreg) \
+ (REG_P (SUBREG_REG (subreg)) \
+ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
+ GET_MODE_SIZE (GET_MODE (subreg))) \
+ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
+ != CODE_FOR_nothing)
+
+#define CANDIDATE_MEM_P(innermode, mem) \
+ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
+ && !push_operand ((mem), GET_MODE (mem)) \
+ /* Not a candiate if innermode requires too much alignment. */ \
+ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \
+ || targetm.slow_unaligned_access (GET_MODE (mem), \
+ MEM_ALIGN (mem)) \
+ || !targetm.slow_unaligned_access ((innermode), \
+ MEM_ALIGN (mem))))
+
+ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
+ x_inner = SUBREG_REG (x);
+
+ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
+ y_inner = SUBREG_REG (y);
+
+ if (x_inner != NULL_RTX
+ && y_inner != NULL_RTX
+ && GET_MODE (x_inner) == GET_MODE (y_inner)
+ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
+ {
+ x = x_inner;
+ y = y_inner;
+ mode = GET_MODE (x_inner);
+ }
+ else if (x_inner != NULL_RTX
+ && MEM_P (y)
+ && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
+ {
+ x = x_inner;
+ y = adjust_address (y, GET_MODE (x_inner), 0);
+ mode = GET_MODE (x_inner);
+ }
+ else if (y_inner != NULL_RTX
+ && MEM_P (x)
+ && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
+ {
+ x = adjust_address (x, GET_MODE (y_inner), 0);
+ y = y_inner;
+ mode = GET_MODE (y_inner);
+ }
+ else if (x_inner != NULL_RTX
+ && CONSTANT_P (y)
+ && !targetm.can_change_mode_class (GET_MODE (x_inner),
+ mode, ALL_REGS)
+ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
+ {
+ x = x_inner;
+ y = y_inner;
+ mode = GET_MODE (x_inner);
+ }
+
if (CONSTANT_P (y))
{
if (optimize
diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
new file mode 100644
index 00000000000..10bfc868197
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
+
+typedef short __attribute__((vector_size (8))) v4hi;
+
+typedef union U4HI { v4hi v; short a[4]; } u4hi;
+
+short b[4];
+
+void pass_v4hi (v4hi v)
+{
+ int i;
+ u4hi u;
+ u.v = v;
+ for (i = 0; i < 4; i++)
+ b[i] = u.a[i];
+};
+
+/* { dg-final { scan-assembler-not "ptrue" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
index 518071bdd86..398cdba5d5f 100644
--- a/gcc/testsuite/gcc.target/i386/pr67609.c
+++ b/gcc/testsuite/gcc.target/i386/pr67609.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
/* { dg-require-effective-target lp64 } */
-/* { dg-final { scan-assembler "movdqa" } } */
+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
#include <emmintrin.h>
__m128d reg;
--
2.21.0.windows.1

View File

@ -1,98 +0,0 @@
From b8b3e29e4cceae2bab6e0774b1af994dbe713d97 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Thu, 15 Jul 2021 09:13:11 +0800
Subject: [PATCH 10/13] [Backport]tree-optimization/94963 - avoid bogus uninit
warning with store-motion
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=371905d12259c180efb9b1f1b5716e969feb60f9
Eliding the load for store-motion causes an uninitialized variable
flowing into the loop, conditionally initialized and used. The
uninit warning cannot relate the flag used to guard the initialization
and use with the actual initialization so the following robustifies
the previous approach of marking the conditional store as not to
be warned on by instead initializing the variable on loop entry
from an uninitialized variable we mark as not to be warned for.
diff --git a/gcc/testsuite/gcc.dg/pr94963.c b/gcc/testsuite/gcc.dg/pr94963.c
new file mode 100644
index 00000000000..09c0524fb3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94963.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wall" } */
+
+typedef struct
+{
+ int p1;
+ int p2;
+ int p3;
+} P;
+struct S
+{
+ int field;
+};
+extern int v2;
+extern void foo (struct S *map);
+static struct S var;
+const P *pv;
+int ps;
+void
+f (void)
+{
+ if (pv != 0)
+ for (const P *ph = pv; ph < &pv[ps]; ++ph)
+ switch (ph->p1)
+ {
+ case 1:
+ v2 = ph->p2;
+ break;
+ case 2:
+ var.field = ph->p3;
+ break;
+ }
+ if (var.field != 0) /* { dg-bogus "uninitialized" } */
+ foo (&var);
+}
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 8c33735b1fa..d74a46ef352 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -1994,8 +1994,6 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
gsi = gsi_start_bb (then_bb);
/* Insert actual store. */
stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
- /* Make sure to not warn about maybe-uninit uses of tmp_var here. */
- gimple_set_no_warning (stmt, true);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
edge e1 = single_succ_edge (new_bb);
@@ -2149,13 +2147,19 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
store then. */
if ((!always_stored && !multi_threaded_model_p)
|| (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+ else
{
- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
- lim_data = init_lim_data (load);
- lim_data->max_loop = loop;
- lim_data->tgt_loop = loop;
- gsi_insert_before (&gsi, load, GSI_SAME_STMT);
+ /* If not emitting a load mark the uninitialized state on the
+ loop entry as not to be warned for. */
+ tree uninit = create_tmp_reg (TREE_TYPE (tmp_var));
+ TREE_NO_WARNING (uninit) = 1;
+ load = gimple_build_assign (tmp_var, uninit);
}
+ lim_data = init_lim_data (load);
+ lim_data->max_loop = loop;
+ lim_data->tgt_loop = loop;
+ gsi_insert_before (&gsi, load, GSI_SAME_STMT);
if (multi_threaded_model_p)
{
--
2.21.0.windows.1

View File

@ -1,23 +0,0 @@
From 78cf3b95d7b895cfe8d6f1c2a48ebc08a662eef0 Mon Sep 17 00:00:00 2001
From: bule <bule1@huawei.com>
Date: Sat, 17 Jul 2021 16:38:10 +0800
Subject: [PATCH 11/13] [simdmath] Enable 64-bits simd when test
simd_pcs_attribute-3
Enable 64-bits simd when test simd_pcs_attribute-3. The 64-bits simd
is default to off without specify the -msimdmath-64.
diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
index 95f6a6803e8..e0e0efa9d7e 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-Ofast" } */
+/* { dg-options "-Ofast -msimdmath-64" } */
__attribute__ ((__simd__))
__attribute__ ((__nothrow__ , __leaf__ , __const__))
--
2.21.0.windows.1

View File

@ -1,397 +0,0 @@
From 26ea42402eede6a441c9d74ec6b6086e5bf0bf79 Mon Sep 17 00:00:00 2001
From: bule <bule1@huawei.com>
Date: Mon, 19 Jul 2021 12:04:08 +0800
Subject: [PATCH 12/13] [fp-model] Enable fp-model on kunpeng
Enable fp-model options on kunpeng for precision control.
diff --git a/gcc/common.opt b/gcc/common.opt
index 55d4eb5a351..79c9ef6615b 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1545,6 +1545,32 @@ ffp-int-builtin-inexact
Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization
Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions.
+fftz
+Common Report Var(flag_ftz) Optimization
+Control fpcr register for flush to zero.
+
+fp-model=
+Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization
+-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control.
+
+Enum
+Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs)
+
+EnumValue
+Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL)
+
+EnumValue
+Enum(fp_model) String(fast) Value(FP_MODEL_FAST)
+
+EnumValue
+Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE)
+
+EnumValue
+Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT)
+
+EnumValue
+Enum(fp_model) String(strict) Value(FP_MODEL_STRICT)
+
; Nonzero means don't put addresses of constant functions in registers.
; Used for compiling the Unix kernel, where strange substitutions are
; done on the assembly output.
diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
index e587e2e9ad6..331b12c8702 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -50,7 +50,8 @@
#define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
#define GNU_USER_TARGET_MATHFILE_SPEC \
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\
+ %{!fno-ftz:crtfastmath.o%s}}"
#undef ENDFILE_SPEC
#define ENDFILE_SPEC \
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 852ea76eaa2..5832298251e 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -223,6 +223,15 @@ enum fp_contract_mode {
FP_CONTRACT_FAST = 2
};
+/* Floating-point precision mode. */
+enum fp_model {
+ FP_MODEL_NORMAL = 0,
+ FP_MODEL_FAST = 1,
+ FP_MODEL_PRECISE = 2,
+ FP_MODEL_EXCEPT = 3,
+ FP_MODEL_STRICT = 4
+};
+
/* Scalar storage order kind. */
enum scalar_storage_order_kind {
SSO_NATIVE = 0,
diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c
index 4cc8a908417..c59dcf63781 100644
--- a/gcc/fortran/options.c
+++ b/gcc/fortran/options.c
@@ -250,6 +250,7 @@ form_from_filename (const char *filename)
return f_form;
}
+static void gfc_handle_fpe_option (const char *arg, bool trap);
/* Finalize commandline options. */
@@ -277,6 +278,13 @@ gfc_post_options (const char **pfilename)
if (flag_protect_parens == -1)
flag_protect_parens = !optimize_fast;
+ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */
+ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT)
+ {
+ gfc_handle_fpe_option ("all", false);
+ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true);
+ }
+
/* -Ofast sets implies -fstack-arrays unless an explicit size is set for
stack arrays. */
if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2)
diff --git a/gcc/opts-common.c b/gcc/opts-common.c
index de9510abd64..bf82b05c8a2 100644
--- a/gcc/opts-common.c
+++ b/gcc/opts-common.c
@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic.h"
#include "spellcheck.h"
-static void prune_options (struct cl_decoded_option **, unsigned int *);
+static void prune_options (struct cl_decoded_option **, unsigned int *,
+ unsigned int);
/* An option that is undocumented, that takes a joined argument, and
that doesn't fit any of the classes of uses (language/common,
@@ -988,7 +989,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
*decoded_options = opt_array;
*decoded_options_count = num_decoded_options;
- prune_options (decoded_options, decoded_options_count);
+ prune_options (decoded_options, decoded_options_count, lang_mask);
}
/* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the
@@ -1009,11 +1010,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx)
return false;
}
+/* Check whether opt_idx exists in decoded_options array bewteen index
+ start and end. If found, return its index in decoded_options,
+ else return end. */
+static unsigned int
+find_opt_idx (const struct cl_decoded_option *decoded_options,
+ unsigned int decoded_options_count,
+ unsigned int start, unsigned int end, unsigned int opt_idx)
+{
+ gcc_assert (end <= decoded_options_count);
+ gcc_assert (opt_idx < cl_options_count);
+ unsigned int k;
+ for (k = start; k < end; k++)
+ {
+ if (decoded_options[k].opt_index == opt_idx)
+ {
+ return k;
+ }
+ }
+ return k;
+}
+
+/* remove the opt_index element from decoded_options array. */
+static unsigned int
+remove_option (struct cl_decoded_option *decoded_options,
+ unsigned int decoded_options_count,
+ unsigned int opt_index)
+{
+ gcc_assert (opt_index < decoded_options_count);
+ unsigned int i;
+ for (i = opt_index; i < decoded_options_count - 1; i++)
+ {
+ decoded_options[i] = decoded_options[i + 1];
+ }
+ return decoded_options_count - 1;
+}
+
+/* Handle the priority between fp-model, Ofast, and
+ ffast-math. */
+static unsigned int
+handle_fp_model_driver (struct cl_decoded_option *decoded_options,
+ unsigned int decoded_options_count,
+ unsigned int fp_model_index,
+ unsigned int lang_mask)
+{
+ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index];
+ enum fp_model model = (enum fp_model) fp_model_opt.value;
+ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT)
+ {
+ /* If found Ofast, override Ofast with O3. */
+ unsigned int Ofast_index;
+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
+ 0, decoded_options_count, OPT_Ofast);
+ while (Ofast_index != decoded_options_count)
+ {
+ const char *tmp_argv = "-O3";
+ decode_cmdline_option (&tmp_argv, lang_mask,
+ &decoded_options[Ofast_index]);
+ warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs",
+ fp_model_opt.orig_option_with_args_text);
+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
+ 0, decoded_options_count, OPT_Ofast);
+ }
+ /* If found ffast-math before fp-model=precise/strict
+ it, cancel it. */
+ unsigned int ffast_math_index;
+ ffast_math_index
+ = find_opt_idx (decoded_options, decoded_options_count, 0,
+ fp_model_index, OPT_ffast_math);
+ if (ffast_math_index != fp_model_index)
+ {
+ decoded_options_count
+ = remove_option (decoded_options, decoded_options_count,
+ ffast_math_index);
+ warning (0, "%<-ffast-math%> before %qs is canceled",
+ fp_model_opt.orig_option_with_args_text);
+ }
+ }
+ if (model == FP_MODEL_FAST)
+ {
+ /* If found -fno-fast-math after fp-model=fast, cancel this one. */
+ unsigned int fno_fast_math_index;
+ fno_fast_math_index
+ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index,
+ decoded_options_count, OPT_ffast_math);
+ if (fno_fast_math_index != decoded_options_count
+ && decoded_options[fno_fast_math_index].value == 0)
+ {
+ decoded_options_count
+ = remove_option (decoded_options, decoded_options_count,
+ fp_model_index);
+ warning (0,
+ "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled");
+ }
+ }
+ return decoded_options_count;
+}
+
/* Filter out options canceled by the ones after them. */
static void
prune_options (struct cl_decoded_option **decoded_options,
- unsigned int *decoded_options_count)
+ unsigned int *decoded_options_count,
+ unsigned int lang_mask)
{
unsigned int old_decoded_options_count = *decoded_options_count;
struct cl_decoded_option *old_decoded_options = *decoded_options;
@@ -1024,7 +1123,12 @@ prune_options (struct cl_decoded_option **decoded_options,
const struct cl_option *option;
unsigned int fdiagnostics_color_idx = 0;
+ if (!diagnostic_ready_p ())
+ diagnostic_initialize (global_dc, 0);
+
/* Remove arguments which are negated by others after them. */
+
+ unsigned int fp_model_index = old_decoded_options_count;
new_decoded_options_count = 0;
for (i = 0; i < old_decoded_options_count; i++)
{
@@ -1048,6 +1152,34 @@ prune_options (struct cl_decoded_option **decoded_options,
fdiagnostics_color_idx = i;
continue;
+ case OPT_fp_model_:
+ /* Only the last fp-model option will take effect. */
+ unsigned int next_fp_model_idx;
+ next_fp_model_idx = find_opt_idx (old_decoded_options,
+ old_decoded_options_count,
+ i + 1,
+ old_decoded_options_count,
+ OPT_fp_model_);
+ if (next_fp_model_idx != old_decoded_options_count)
+ {
+ /* Found more than one fp-model, cancel this one. */
+ if (old_decoded_options[i].value
+ != old_decoded_options[next_fp_model_idx].value)
+ {
+ warning (0, "%qs is overrided by %qs",
+ old_decoded_options[i].
+ orig_option_with_args_text,
+ old_decoded_options[next_fp_model_idx].
+ orig_option_with_args_text);
+ }
+ break;
+ }
+ else
+ {
+ /* Found the last fp-model option. */
+ fp_model_index = new_decoded_options_count;
+ }
+ /* FALLTHRU. */
default:
gcc_assert (opt_idx < cl_options_count);
option = &cl_options[opt_idx];
@@ -1087,6 +1219,14 @@ keep:
break;
}
}
+ if (fp_model_index < new_decoded_options_count)
+ {
+ new_decoded_options_count
+ = handle_fp_model_driver (new_decoded_options,
+ new_decoded_options_count,
+ fp_model_index,
+ lang_mask);
+ }
if (fdiagnostics_color_idx >= 1)
{
diff --git a/gcc/opts.c b/gcc/opts.c
index e31aa560564..6924a973a5b 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -195,6 +195,7 @@ static void set_debug_level (enum debug_info_type type, int extended,
struct gcc_options *opts_set,
location_t loc);
static void set_fast_math_flags (struct gcc_options *opts, int set);
+static void set_fp_model_flags (struct gcc_options *opts, int set);
static void decode_d_option (const char *arg, struct gcc_options *opts,
location_t loc, diagnostic_context *dc);
static void set_unsafe_math_optimizations_flags (struct gcc_options *opts,
@@ -2482,6 +2483,10 @@ common_handle_option (struct gcc_options *opts,
set_fast_math_flags (opts, value);
break;
+ case OPT_fp_model_:
+ set_fp_model_flags (opts, value);
+ break;
+
case OPT_funsafe_math_optimizations:
set_unsafe_math_optimizations_flags (opts, value);
break;
@@ -2908,6 +2913,69 @@ set_fast_math_flags (struct gcc_options *opts, int set)
}
}
+/* Handle fp-model options. */
+static void
+set_fp_model_flags (struct gcc_options *opts, int set)
+{
+ enum fp_model model = (enum fp_model) set;
+ switch (model)
+ {
+ case FP_MODEL_FAST:
+ /* Equivalent to open ffast-math. */
+ set_fast_math_flags (opts, 1);
+ break;
+
+ case FP_MODEL_PRECISE:
+ /* Equivalent to close ffast-math. */
+ set_fast_math_flags (opts, 0);
+ /* Turn on -frounding-math -fsignaling-nans. */
+ if (!opts->frontend_set_flag_signaling_nans)
+ opts->x_flag_signaling_nans = 1;
+ if (!opts->frontend_set_flag_rounding_math)
+ opts->x_flag_rounding_math = 1;
+ opts->x_flag_expensive_optimizations = 0;
+ opts->x_flag_code_hoisting = 0;
+ opts->x_flag_predictive_commoning = 0;
+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
+ break;
+
+ case FP_MODEL_EXCEPT:
+ if (!opts->frontend_set_flag_signaling_nans)
+ opts->x_flag_signaling_nans = 1;
+ if (!opts->frontend_set_flag_errno_math)
+ opts->x_flag_errno_math = 1;
+ if (!opts->frontend_set_flag_trapping_math)
+ opts->x_flag_trapping_math = 1;
+ opts->x_flag_fp_int_builtin_inexact = 1;
+ /* Also turn on ffpe-trap in fortran. */
+ break;
+
+ case FP_MODEL_STRICT:
+ /* Turn on both precise and except. */
+ if (!opts->frontend_set_flag_signaling_nans)
+ opts->x_flag_signaling_nans = 1;
+ if (!opts->frontend_set_flag_rounding_math)
+ opts->x_flag_rounding_math = 1;
+ opts->x_flag_expensive_optimizations = 0;
+ opts->x_flag_code_hoisting = 0;
+ opts->x_flag_predictive_commoning = 0;
+ if (!opts->frontend_set_flag_errno_math)
+ opts->x_flag_errno_math = 1;
+ if (!opts->frontend_set_flag_trapping_math)
+ opts->x_flag_trapping_math = 1;
+ opts->x_flag_fp_int_builtin_inexact = 1;
+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
+ break;
+
+ case FP_MODEL_NORMAL:
+ /* Do nothing. */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* When -funsafe-math-optimizations is set the following
flags are set as well. */
static void
--
2.21.0.windows.1

View File

@ -1,499 +0,0 @@
From 0d14a2b7a3defc82ed16c99a18c2bc2e6be9f5b1 Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Fri, 16 Jul 2021 23:21:38 -0400
Subject: [PATCH 13/13] [LoopElim] Redundant loop elimination optimization
Introduce redundant loop elimination optimization controlled
by -floop-elim. And it's often used with -ffinite-loops.
diff --git a/gcc/common.opt b/gcc/common.opt
index 79c9ef6615b..b2b0aac7fdf 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1169,6 +1169,10 @@ fcompare-elim
Common Report Var(flag_compare_elim_after_reload) Optimization
Perform comparison elimination after register allocation has finished.
+floop-elim
+Common Report Var(flag_loop_elim) Init(0) Optimization
+Perform redundant loop elimination.
+
fconserve-stack
Common Var(flag_conserve_stack) Optimization
Do not perform optimizations increasing noticeably stack usage.
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 3b5b6907679..591b6435f78 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -69,6 +69,7 @@ static hash_set<tree> * get_non_trapping ();
static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
static void hoist_adjacent_loads (basic_block, basic_block,
basic_block, basic_block);
+static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
static bool gate_hoist_loads (void);
/* This pass tries to transform conditional stores into unconditional
@@ -257,6 +258,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
hoist_adjacent_loads (bb, bb1, bb2, bb3);
continue;
}
+ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2))
+ {
+ continue;
+ }
else
continue;
@@ -2819,6 +2824,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1,
}
}
+static bool check_uses (tree, hash_set<tree> *);
+
+/* Check SSA_NAME is used in
+ if (SSA_NAME == 0)
+ ...
+ or
+ if (SSA_NAME != 0)
+ ...
+*/
+static bool
+check_uses_cond (const_tree ssa_name, gimple *stmt,
+ hash_set<tree> *hset ATTRIBUTE_UNUSED)
+{
+ tree_code code = gimple_cond_code (stmt);
+ if (code != EQ_EXPR && code != NE_EXPR)
+ {
+ return false;
+ }
+
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+ if ((lhs == ssa_name && integer_zerop (rhs))
+ || (rhs == ssa_name && integer_zerop (lhs)))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/* Check SSA_NAME is used in
+ _tmp = SSA_NAME == 0;
+ or
+ _tmp = SSA_NAME != 0;
+ or
+ _tmp = SSA_NAME | _tmp2;
+*/
+static bool
+check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
+{
+ tree_code code = gimple_assign_rhs_code (stmt);
+ tree lhs, rhs1, rhs2;
+
+ switch (code)
+ {
+ case EQ_EXPR:
+ case NE_EXPR:
+ rhs1 = gimple_assign_rhs1 (stmt);
+ rhs2 = gimple_assign_rhs2 (stmt);
+ if ((rhs1 == ssa_name && integer_zerop (rhs2))
+ || (rhs2 == ssa_name && integer_zerop (rhs1)))
+ {
+ return true;
+ }
+ break;
+
+ case BIT_IOR_EXPR:
+ lhs = gimple_assign_lhs (stmt);
+ if (hset->contains (lhs))
+ {
+ return false;
+ }
+ /* We should check the use of _tmp further. */
+ return check_uses (lhs, hset);
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Check SSA_NAME is used in
+ # result = PHI <SSA_NAME (bb1), 0 (bb2), 0 (bb3)>
+*/
+static bool
+check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
+{
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
+ {
+ tree arg = gimple_phi_arg_def (stmt, i);
+ if (!integer_zerop (arg) && arg != ssa_name)
+ {
+ return false;
+ }
+ }
+
+ tree result = gimple_phi_result (stmt);
+
+ /* It is used to avoid infinite recursion,
+ <bb 1>
+ if (cond)
+ goto <bb 2>
+ else
+ goto <bb 3>
+
+ <bb 2>
+ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)>
+ {BODY}
+ if (cond)
+ goto <bb 3>
+ else
+ goto <bb 4>
+
+ <bb 3>
+ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)>
+ {BODY}
+ if (cond)
+ goto <bb 2>
+ else
+ goto <bb 4>
+
+ <bb 4>
+ ...
+ */
+ if (hset->contains (result))
+ {
+ return false;
+ }
+
+ return check_uses (result, hset);
+}
+
+/* Check the use of SSA_NAME, it should only be used in comparison
+ operation and PHI node. HSET is used to record the ssa_names
+ that have been already checked. */
+static bool
+check_uses (tree ssa_name, hash_set<tree> *hset)
+{
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+
+ if (TREE_CODE (ssa_name) != SSA_NAME)
+ {
+ return false;
+ }
+
+ if (SSA_NAME_VAR (ssa_name)
+ && is_global_var (SSA_NAME_VAR (ssa_name)))
+ {
+ return false;
+ }
+
+ hset->add (ssa_name);
+
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
+ {
+ gimple *stmt = USE_STMT (use_p);
+
+ /* Ignore debug gimple statements. */
+ if (is_gimple_debug (stmt))
+ {
+ continue;
+ }
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_COND:
+ if (!check_uses_cond (ssa_name, stmt, hset))
+ {
+ return false;
+ }
+ break;
+
+ case GIMPLE_ASSIGN:
+ if (!check_uses_assign (ssa_name, stmt, hset))
+ {
+ return false;
+ }
+ break;
+
+ case GIMPLE_PHI:
+ if (!check_uses_phi (ssa_name, stmt, hset))
+ {
+ return false;
+ }
+ break;
+
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool
+check_def_gimple (gimple *def1, gimple *def2, const_tree result)
+{
+ /* def1 and def2 should be POINTER_PLUS_EXPR. */
+ if (!is_gimple_assign (def1) || !is_gimple_assign (def2)
+ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR
+ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR)
+ {
+ return false;
+ }
+
+ tree rhs12 = gimple_assign_rhs2 (def1);
+
+ tree rhs21 = gimple_assign_rhs1 (def2);
+ tree rhs22 = gimple_assign_rhs2 (def2);
+
+ if (rhs21 != result)
+ {
+ return false;
+ }
+
+ /* We should have a positive pointer-plus constant to ensure
+ that the pointer value is continuously increasing. */
+ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST
+ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+check_loop_body (basic_block bb0, basic_block bb2, const_tree result)
+{
+ gimple *g01 = first_stmt (bb0);
+ if (!g01 || !is_gimple_assign (g01)
+ || gimple_assign_rhs_code (g01) != MEM_REF
+ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result)
+ {
+ return false;
+ }
+
+ gimple *g02 = g01->next;
+ /* GIMPLE_COND would be the last gimple in a basic block,
+ and have no other side effects on RESULT. */
+ if (!g02 || gimple_code (g02) != GIMPLE_COND)
+ {
+ return false;
+ }
+
+ if (first_stmt (bb2) != last_stmt (bb2))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+/* Pattern is like
+ <pre bb>
+ arg1 = base (rhs11) + cst (rhs12); [def1]
+ goto <bb 0>
+
+ <bb 2>
+ arg2 = result (rhs21) + cst (rhs22); [def2]
+
+ <bb 0>
+ # result = PHI <arg1 (pre bb), arg2 (bb 2)>
+ _v = *result; [g01]
+ if (_v == 0) [g02]
+ goto <bb 1>
+ else
+ goto <bb 2>
+
+ <bb 1>
+ _1 = result - base; [g1]
+ _2 = _1 /[ex] cst; [g2]
+ _3 = (unsigned int) _2; [g3]
+ if (_3 == 0)
+ ...
+*/
+static bool
+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
+ gphi *phi_stmt, gimple *&output)
+{
+ /* Start check from PHI node in BB0. */
+ if (gimple_phi_num_args (phi_stmt) != 2
+ || virtual_operand_p (gimple_phi_result (phi_stmt)))
+ {
+ return false;
+ }
+
+ tree result = gimple_phi_result (phi_stmt);
+ tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
+ tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
+
+ if (TREE_CODE (arg1) != SSA_NAME
+ || TREE_CODE (arg2) != SSA_NAME
+ || SSA_NAME_IS_DEFAULT_DEF (arg1)
+ || SSA_NAME_IS_DEFAULT_DEF (arg2))
+ {
+ return false;
+ }
+
+ gimple *def1 = SSA_NAME_DEF_STMT (arg1);
+ gimple *def2 = SSA_NAME_DEF_STMT (arg2);
+
+ /* Swap bb1 and bb2 if pattern is like
+ if (_v != 0)
+ goto <bb 2>
+ else
+ goto <bb 1>
+ */
+ if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
+ {
+ std::swap (bb1, bb2);
+ }
+
+ /* prebb[def1] --> bb0 <-- bb2[def2] */
+ if (!gimple_bb (def1)
+ || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
+ || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
+ {
+ return false;
+ }
+
+ /* Check whether define gimple meets the pattern requirements. */
+ if (!check_def_gimple (def1, def2, result))
+ {
+ return false;
+ }
+
+ if (!check_loop_body (bb0, bb2, result))
+ {
+ return false;
+ }
+
+ output = def1;
+ return true;
+}
+
+/* Check pattern
+ <bb 1>
+ _1 = result - base; [g1]
+ _2 = _1 /[ex] cst; [g2]
+ _3 = (unsigned int) _2; [g3]
+ if (_3 == 0)
+ ...
+*/
+static bool
+check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
+ const_tree result, gimple *&output)
+{
+ gimple *g1 = first_stmt (bb1);
+ if (!g1 || !is_gimple_assign (g1)
+ || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
+ || gimple_assign_rhs1 (g1) != result
+ || gimple_assign_rhs2 (g1) != base)
+ {
+ return false;
+ }
+
+ gimple *g2 = g1->next;
+ if (!g2 || !is_gimple_assign (g2)
+ || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
+ || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
+ || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
+ {
+ return false;
+ }
+
+ /* INTEGER_CST cst in gimple def1. */
+ HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
+ /* INTEGER_CST cst in gimple g2. */
+ HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
+ /* _2 must be at least a positive number. */
+ if (num2 == 0 || num1 / num2 <= 0)
+ {
+ return false;
+ }
+
+ gimple *g3 = g2->next;
+ if (!g3 || !is_gimple_assign (g3)
+ || gimple_assign_rhs_code (g3) != NOP_EXPR
+ || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
+ || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
+ {
+ return false;
+ }
+
+ /* _3 should only be used in comparison operation or PHI node. */
+ hash_set<tree> *hset = new hash_set<tree>;
+ if (!check_uses (gimple_assign_lhs (g3), hset))
+ {
+ delete hset;
+ return false;
+ }
+ delete hset;
+
+ output = g3;
+ return true;
+}
+
+static bool
+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
+{
+ gphi_iterator gsi;
+
+ for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gphi *phi_stmt = gsi.phi ();
+ gimple *def1 = NULL;
+ tree base, cst, result;
+
+ if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
+ {
+ continue;
+ }
+
+ base = gimple_assign_rhs1 (def1);
+ cst = gimple_assign_rhs2 (def1);
+ result = gimple_phi_result (phi_stmt);
+
+ gimple *stmt = NULL;
+ if (!check_gimple_order (bb1, base, cst, result, stmt))
+ {
+ continue;
+ }
+
+ gcc_assert (stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "to\n");
+ }
+
+ /* Rewrite statement
+ _3 = (unsigned int) _2;
+ to
+ _3 = (unsigned int) 1;
+ */
+ tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+ gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
+ update_stmt (stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ return true;
+ }
+ return false;
+}
+
/* Determine whether we should attempt to hoist adjacent loads out of
diamond patterns in pass_phiopt. Always hoist loads if
-fhoist-adjacent-loads is specified and the target machine has
--
2.21.0.windows.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,486 +0,0 @@
From 2194d59a20be1ab627089d2f0c082b5a0a217f52 Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Tue, 3 Aug 2021 03:49:52 -0400
Subject: [PATCH 16/22] [StructReorg] Bugfix in certain scenarios
Some bugfix in certain scenarios,
1. disable type simplify in LTO within optimizations
2. only enable optimizations in C language
3. use new to initialize allocated memory in symbol-summary.h
4. cover escape scenarios not considered
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 5a19ea0bb40..1cb544ec3b0 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -97,6 +97,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "bitmap.h"
#include "cfgloop.h"
+#include "langhooks.h"
#include "ipa-param-manipulation.h"
#include "tree-ssa-live.h" /* For remove_unused_locals. */
@@ -161,6 +162,44 @@ handled_type (tree type)
return false;
}
+/* Check whether in C language or LTO with only C language. */
+bool
+lang_c_p (void)
+{
+ const char *language_string = lang_hooks.name;
+
+ if (!language_string)
+ {
+ return false;
+ }
+
+ if (strcmp (language_string, "GNU GIMPLE") == 0)
+ {
+ unsigned i = 0;
+ tree t = NULL;
+ const char *unit_string = NULL;
+
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
+ {
+ unit_string = TRANSLATION_UNIT_LANGUAGE (t);
+ if (!unit_string
+ || (strncmp (unit_string, "GNU C", 5) != 0)
+ || (!ISDIGIT (unit_string[5])))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ else if (strncmp (language_string, "GNU C", 5) == 0
+ && ISDIGIT (language_string[5]))
+ {
+ return true;
+ }
+
+ return false;
+}
+
enum srmode
{
NORMAL = 0,
@@ -999,7 +1038,6 @@ public:
void analyze_types (void);
void clear_visited (void);
bool create_new_types (void);
- void restore_field_type (void);
void create_new_decls (void);
srdecl *find_decl (tree);
void create_new_functions (void);
@@ -2127,7 +2165,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
srdecl *d = find_decl (lhs);
if (!d && t)
- current_function->record_decl (t, lhs, -1);
+ {
+ current_function->record_decl (t, lhs, -1);
+ tree var = SSA_NAME_VAR (lhs);
+ if (var && VOID_POINTER_P (TREE_TYPE (var)))
+ current_function->record_decl (t, var, -1);
+ }
}
if (TREE_CODE (rhs) == SSA_NAME
&& VOID_POINTER_P (TREE_TYPE (rhs))
@@ -2136,7 +2179,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
srdecl *d = find_decl (rhs);
if (!d && t)
- current_function->record_decl (t, rhs, -1);
+ {
+ current_function->record_decl (t, rhs, -1);
+ tree var = SSA_NAME_VAR (rhs);
+ if (var && VOID_POINTER_P (TREE_TYPE (var)))
+ current_function->record_decl (t, var, -1);
+ }
}
}
else
@@ -2816,8 +2864,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
if (escapes != does_not_escape)
{
for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
- mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
- escapes);
+ {
+ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
+ escapes);
+ srdecl *d = current_function->find_decl (
+ gimple_call_arg (stmt, i));
+ if (d)
+ d->type->mark_escape (escapes, stmt);
+ }
return;
}
@@ -3753,49 +3807,6 @@ ipa_struct_reorg::analyze_types (void)
}
}
-/* When struct A has a struct B member, B's type info
- is not stored in
- TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA)))
- Try to restore B's type information. */
-void
-ipa_struct_reorg::restore_field_type (void)
-{
- for (unsigned i = 0; i < types.length (); i++)
- {
- for (unsigned j = 0; j < types[i]->fields.length (); j++)
- {
- srfield *field = types[i]->fields[j];
- if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE)
- {
- /* If field type has TYPE_FIELDS information,
- we do not need to do this. */
- if (TYPE_FIELDS (field->type->type) != NULL)
- {
- continue;
- }
- for (unsigned k = 0; k < types.length (); k++)
- {
- if (i == k)
- {
- continue;
- }
- const char *type1 = get_type_name (field->type->type);
- const char *type2 = get_type_name (types[k]->type);
- if (type1 == NULL || type2 == NULL)
- {
- continue;
- }
- if (type1 == type2
- && TYPE_FIELDS (types[k]->type))
- {
- field->type = types[k];
- }
- }
- }
- }
- }
-}
-
/* Create all new types we want to create. */
bool
@@ -4652,7 +4663,6 @@ ipa_struct_reorg::rewrite_functions (void)
{
unsigned retval = 0;
- restore_field_type ();
/* Create new types, if we did not create any new types,
then don't rewrite any accesses. */
if (!create_new_types ())
@@ -4887,7 +4897,10 @@ pass_ipa_struct_reorg::gate (function *)
&& flag_ipa_struct_reorg
/* Don't bother doing anything if the program has errors. */
&& !seen_error ()
- && flag_lto_partition == LTO_PARTITION_ONE);
+ && flag_lto_partition == LTO_PARTITION_ONE
+ /* Only enable struct optimizations in C since other
+ languages' grammar forbid. */
+ && lang_c_p ());
}
} // anon namespace
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
index fa1df5c8015..a223b4dadea 100644
--- a/gcc/symbol-summary.h
+++ b/gcc/symbol-summary.h
@@ -59,6 +59,12 @@ protected:
/* Allocates new data that are stored within map. */
T* allocate_new ()
{
+ /* In structure optimizatons, we call new to ensure that
+ the allocated memory is initialized to 0. */
+ if (flag_ipa_struct_reorg)
+ return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
+ : new T ();
+
/* Call gcc_internal_because we do not want to call finalizer for
a type T. We call dtor explicitly. */
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
@@ -71,7 +77,12 @@ protected:
if (is_ggc ())
ggc_delete (item);
else
- m_allocator.remove (item);
+ {
+ if (flag_ipa_struct_reorg)
+ delete item;
+ else
+ m_allocator.remove (item);
+ }
}
/* Unregister all call-graph hooks. */
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
new file mode 100644
index 00000000000..273baa9a368
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
+
+struct D
+{
+ int n;
+ int c [8];
+};
+
+struct A
+{
+ int i;
+ char *p;
+};
+
+struct B
+{
+ struct A *a;
+ struct D *d;
+};
+
+int dtInsert1 (struct B *b)
+{
+ struct A a = { 0, 0 };
+ struct D *d;
+ b->a = &a;
+ d = b->d;
+ &d->c [d->n];
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
new file mode 100644
index 00000000000..455f9b501d6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
+
+typedef struct basic_block_def *basic_block;
+typedef struct gimple_seq_node_d *gimple_seq_node;
+typedef struct gimple_seq_d *gimple_seq;
+typedef struct
+{
+ gimple_seq_node ptr;
+ gimple_seq seq;
+ basic_block bb;
+} gimple_stmt_iterator;
+typedef void *gimple;
+extern void exit(int);
+struct gimple_seq_node_d
+{
+ gimple stmt;
+ struct gimple_seq_node_d *next;
+};
+struct gimple_seq_d
+{
+};
+static __inline__ gimple_stmt_iterator
+gsi_start (gimple_seq seq)
+{
+ gimple_stmt_iterator i;
+ i.seq = seq;
+ return i;
+}
+static __inline__ unsigned char
+gsi_end_p (gimple_stmt_iterator i)
+{
+ return i.ptr == ((void *)0);
+}
+static __inline__ void
+gsi_next (gimple_stmt_iterator *i)
+{
+ i->ptr = i->ptr->next;
+}
+static __inline__ gimple
+gsi_stmt (gimple_stmt_iterator i)
+{
+ return i.ptr->stmt;
+}
+void
+c_warn_unused_result (gimple_seq seq)
+{
+ gimple_stmt_iterator i;
+ for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
+ {
+ gimple g = gsi_stmt (i);
+ if (!g) exit(0);
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
new file mode 100644
index 00000000000..afc0bd86ca5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct gki_elem {
+ char *key;
+ int idx;
+};
+
+typedef struct {
+ struct gki_elem *table;
+
+ int primelevel;
+ int nhash;
+ int nkeys;
+} GKI;
+
+void *
+sre_malloc(size_t size)
+{
+ void *ptr = malloc (size);
+ return ptr;
+}
+
+__attribute__((noinline)) int
+GKIStoreKey(GKI *hash)
+{
+ hash->table = sre_malloc(sizeof(struct gki_elem));
+}
+
+int
+main ()
+{
+ GKI *hash = malloc (sizeof(GKI));
+ GKIStoreKey(hash);
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
new file mode 100644
index 00000000000..9bcfaf3681b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+ unsigned char blue;
+ unsigned char green;
+} Pixel;
+
+typedef struct {
+ unsigned short colormaplength;
+ Pixel *colormapdata;
+} TargaImage;
+
+TargaImage *img;
+
+int main() {
+ img = (TargaImage *) malloc( sizeof(TargaImage) );
+ if (img->colormaplength > 0) {
+ img->colormapdata = (Pixel *) malloc(sizeof(Pixel) * img->colormaplength);
+ memset(img->colormapdata, 0, (sizeof(Pixel) * img->colormaplength) );
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
new file mode 100644
index 00000000000..052f4e3bdc1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+
+extern void abort(void);
+
+struct packed_ushort {
+ unsigned short ucs;
+} __attribute__((packed));
+
+struct source {
+ int pos, length;
+};
+
+static int flag;
+
+static void __attribute__((noinline)) fetch(struct source *p)
+{
+ p->length = 128;
+}
+
+static struct packed_ushort __attribute__((noinline)) next(struct source *p)
+{
+ struct packed_ushort rv;
+
+ if (p->pos >= p->length) {
+ if (flag) {
+ flag = 0;
+ fetch(p);
+ return next(p);
+ }
+ flag = 1;
+ rv.ucs = 0xffff;
+ return rv;
+ }
+ rv.ucs = 0;
+ return rv;
+}
+
+int main(void)
+{
+ struct source s;
+ int i;
+
+ s.pos = 0;
+ s.length = 0;
+ flag = 0;
+
+ for (i = 0; i < 16; i++) {
+ struct packed_ushort rv = next(&s);
+ if ((i == 0 && rv.ucs != 0xffff)
+ || (i > 0 && rv.ucs != 0))
+ abort();
+ }
+ return 0;
+}
diff --git a/gcc/tree.c b/gcc/tree.c
index 3c17694c703..5c1374d6fb1 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -5216,6 +5216,12 @@ fld_worklist_push (tree t, class free_lang_data_d *fld)
static tree
fld_simplified_type_name (tree type)
{
+ /* Simplify type will cause that struct A and struct A within
+ struct B are different type pointers, so skip it in structure
+ optimizations. */
+ if (flag_ipa_struct_reorg)
+ return TYPE_NAME (type);
+
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
return TYPE_NAME (type);
/* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
@@ -5454,6 +5460,11 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
{
if (!t)
return t;
+ /* Simplify type will cause that struct A and struct A within
+ struct B are different type pointers, so skip it in structure
+ optimizations. */
+ if (flag_ipa_struct_reorg)
+ return t;
if (POINTER_TYPE_P (t))
return fld_incomplete_type_of (t, fld);
/* FIXME: This triggers verification error, see PR88140. */
--
2.21.0.windows.1

View File

@ -1,622 +0,0 @@
From 4d76b521d9bb539556011304b8a76dea1e2657a1 Mon Sep 17 00:00:00 2001
From: bule <bule1@huawei.com>
Date: Fri, 6 Aug 2021 10:20:54 +0800
Subject: [PATCH 17/22] [mcmodel] Enable mcmodel=medium on kunpeng
Enable mcmodel=medium on kunpeng
diff --git a/gcc/combine.c b/gcc/combine.c
index 35505cc5311..497e53289ca 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -1923,6 +1923,12 @@ can_combine_p (rtx_insn *insn, rtx_insn *i3, rtx_insn *pred ATTRIBUTE_UNUSED,
break;
case SET:
+ /* If the set is a symbol loaded by medium code model unspec
+ escape this combine. */
+ if (GET_CODE (SET_SRC (elt)) == UNSPEC
+ && XVECLEN (SET_SRC (elt), 0) != 0
+ && targetm.medium_symbol_p (SET_SRC (elt)))
+ return 0;
/* Ignore SETs whose result isn't used but not those that
have side-effects. */
if (find_reg_note (insn, REG_UNUSED, SET_DEST (elt))
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
index ee7bed34924..21828803480 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -66,6 +66,10 @@ enum aarch64_code_model {
/* -fpic for small memory model.
GOT size to 28KiB (4K*8-4K) or 3580 entries. */
AARCH64_CMODEL_SMALL_SPIC,
+ /* Using movk insn sequence to do 64bit PC relative relocation. */
+ AARCH64_CMODEL_MEDIUM,
+ /* Using movk insn sequence to do 64bit PC relative got relocation. */
+ AARCH64_CMODEL_MEDIUM_PIC,
/* No assumptions about addresses of code and data.
The PIC variant is not yet implemented. */
AARCH64_CMODEL_LARGE
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index bebd1b36228..226f3a8ff01 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -95,9 +95,11 @@
*/
enum aarch64_symbol_type
{
+ SYMBOL_MEDIUM_ABSOLUTE,
SYMBOL_SMALL_ABSOLUTE,
SYMBOL_SMALL_GOT_28K,
SYMBOL_SMALL_GOT_4G,
+ SYMBOL_MEDIUM_GOT_4G,
SYMBOL_SMALL_TLSGD,
SYMBOL_SMALL_TLSDESC,
SYMBOL_SMALL_TLSIE,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 79dc8f186f4..f78942b04c6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3127,6 +3127,29 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
emit_insn (gen_add_losym (dest, tmp_reg, imm));
return;
}
+ case SYMBOL_MEDIUM_ABSOLUTE:
+ {
+ rtx tmp_reg = dest;
+ machine_mode mode = GET_MODE (dest);
+
+ gcc_assert (mode == Pmode || mode == ptr_mode);
+ if (can_create_pseudo_p ())
+ tmp_reg = gen_reg_rtx (mode);
+
+ if (mode == DImode)
+ {
+ emit_insn (gen_load_symbol_medium_di (dest, tmp_reg, imm));
+ }
+ else
+ {
+ emit_insn (gen_load_symbol_medium_si (dest, tmp_reg, imm));
+ }
+ if (REG_P (dest))
+ {
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (imm));
+ }
+ return;
+ }
case SYMBOL_TINY_ABSOLUTE:
emit_insn (gen_rtx_SET (dest, imm));
@@ -3249,6 +3272,60 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
return;
}
+ case SYMBOL_MEDIUM_GOT_4G:
+ {
+ rtx tmp_reg = dest;
+ machine_mode mode = GET_MODE (dest);
+ if (can_create_pseudo_p ())
+ {
+ tmp_reg = gen_reg_rtx (mode);
+ }
+ rtx insn;
+ rtx mem;
+ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+ if (mode == DImode)
+ {
+ emit_insn (gen_load_symbol_medium_di (tmp_reg, dest, s));
+ }
+ else
+ {
+ emit_insn (gen_load_symbol_medium_si (tmp_reg, dest, s));
+ }
+ if (REG_P (dest))
+ {
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (s));
+ }
+
+ if (mode == ptr_mode)
+ {
+ if (mode == DImode)
+ {
+ emit_insn (gen_get_gotoff_di (dest, imm));
+ insn = gen_ldr_got_medium_di (dest, tmp_reg, dest);
+ }
+ else
+ {
+ emit_insn (gen_get_gotoff_si (dest, imm));
+ insn = gen_ldr_got_medium_si (dest, tmp_reg, dest);
+ }
+ mem = XVECEXP (SET_SRC (insn), 0, 0);
+ }
+ else
+ {
+ gcc_assert (mode == Pmode);
+ emit_insn (gen_get_gotoff_di (dest, imm));
+ insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest);
+ mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
+ }
+
+ gcc_assert (GET_CODE (mem) == MEM);
+ MEM_READONLY_P (mem) = 1;
+ MEM_NOTRAP_P (mem) = 1;
+ emit_insn (insn);
+ return;
+ }
+
case SYMBOL_SMALL_TLSGD:
{
rtx_insn *insns;
@@ -5256,11 +5333,12 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
- case SYMBOL_SMALL_TLSGD:
- case SYMBOL_SMALL_TLSDESC:
+ case SYMBOL_SMALL_TLSGD:
+ case SYMBOL_SMALL_TLSDESC:
case SYMBOL_SMALL_TLSIE:
case SYMBOL_SMALL_GOT_28K:
case SYMBOL_SMALL_GOT_4G:
+ case SYMBOL_MEDIUM_GOT_4G:
case SYMBOL_TINY_GOT:
case SYMBOL_TINY_TLSIE:
if (const_offset != 0)
@@ -5279,6 +5357,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
case SYMBOL_TLSLE24:
case SYMBOL_TLSLE32:
case SYMBOL_TLSLE48:
+ case SYMBOL_MEDIUM_ABSOLUTE:
aarch64_load_symref_appropriately (dest, imm, sty);
return;
@@ -9389,7 +9468,14 @@ aarch64_classify_address (struct aarch64_address_info *info,
if (GET_CODE (sym) == SYMBOL_REF
&& offset.is_constant (&const_offset)
&& (aarch64_classify_symbol (sym, const_offset)
- == SYMBOL_SMALL_ABSOLUTE))
+ == SYMBOL_SMALL_ABSOLUTE
+ /* Fix fail on dbl_mov_immediate_1.c. If end up here with
+ MEDIUM_ABSOLUTE, the symbol is a constant number that is
+ forced to memory in reload pass, which is ok to go on with
+ the original design that subtitude the mov to
+ 'adrp and ldr :losum'. */
+ || aarch64_classify_symbol (sym, const_offset)
+ == SYMBOL_MEDIUM_ABSOLUTE))
{
/* The symbol and offset must be aligned to the access size. */
unsigned int align;
@@ -11346,7 +11432,13 @@ static inline bool
aarch64_can_use_per_function_literal_pools_p (void)
{
return (aarch64_pcrelative_literal_loads
- || aarch64_cmodel == AARCH64_CMODEL_LARGE);
+ || aarch64_cmodel == AARCH64_CMODEL_LARGE
+ /* Fix const9.C so that constants goes to function_literal_pools.
+ According to the orignal design of aarch64 mcmodel=medium, we
+ don't care where this symbol is put. For the benefit of code size
+ and behaviour consistent with other mcmodel, put it into
+ function_literal_pools. */
+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM);
}
static bool
@@ -13003,6 +13095,13 @@ cost_plus:
if (speed)
*cost += extra_cost->alu.arith;
}
+ else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM
+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
+ {
+ /* 4 movs adr sub add 2movs ldr. */
+ if (speed)
+ *cost += 7*extra_cost->alu.arith;
+ }
if (flag_pic)
{
@@ -13010,6 +13109,8 @@ cost_plus:
*cost += COSTS_N_INSNS (1);
if (speed)
*cost += extra_cost->ldst.load;
+ if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
+ *cost += 2*extra_cost->alu.arith;
}
return true;
@@ -14373,6 +14474,7 @@ initialize_aarch64_tls_size (struct gcc_options *opts)
if (aarch64_tls_size > 32)
aarch64_tls_size = 32;
break;
+ case AARCH64_CMODEL_MEDIUM:
case AARCH64_CMODEL_LARGE:
/* The maximum TLS size allowed under large is 16E.
FIXME: 16E should be 64bit, we only support 48bit offset now. */
@@ -15266,6 +15368,12 @@ initialize_aarch64_code_model (struct gcc_options *opts)
#endif
}
break;
+ case AARCH64_CMODEL_MEDIUM:
+ if (opts->x_flag_pic)
+ {
+ aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC;
+ }
+ break;
case AARCH64_CMODEL_LARGE:
if (opts->x_flag_pic)
sorry ("code model %qs with %<-f%s%>", "large",
@@ -15276,6 +15384,7 @@ initialize_aarch64_code_model (struct gcc_options *opts)
case AARCH64_CMODEL_TINY_PIC:
case AARCH64_CMODEL_SMALL_PIC:
case AARCH64_CMODEL_SMALL_SPIC:
+ case AARCH64_CMODEL_MEDIUM_PIC:
gcc_unreachable ();
}
}
@@ -15286,6 +15395,7 @@ static void
aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
{
ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
+ ptr->x_aarch64_data_threshold = opts->x_aarch64_data_threshold;
ptr->x_aarch64_branch_protection_string
= opts->x_aarch64_branch_protection_string;
}
@@ -15301,6 +15411,7 @@ aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
opts->x_explicit_arch = ptr->x_explicit_arch;
selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
+ opts->x_aarch64_data_threshold = ptr->x_aarch64_data_threshold;
opts->x_aarch64_branch_protection_string
= ptr->x_aarch64_branch_protection_string;
if (opts->x_aarch64_branch_protection_string)
@@ -16169,6 +16280,8 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
case AARCH64_CMODEL_SMALL_SPIC:
case AARCH64_CMODEL_SMALL_PIC:
+ case AARCH64_CMODEL_MEDIUM_PIC:
+ case AARCH64_CMODEL_MEDIUM:
case AARCH64_CMODEL_SMALL:
return SYMBOL_SMALL_ABSOLUTE;
@@ -16205,6 +16318,7 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
return SYMBOL_TINY_ABSOLUTE;
case AARCH64_CMODEL_SMALL:
+ AARCH64_SMALL_ROUTINE:
/* Same reasoning as the tiny code model, but the offset cap here is
1MB, allowing +/-3.9GB for the offset to the symbol. */
@@ -16228,7 +16342,50 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
return SYMBOL_SMALL_ABSOLUTE;
+ case AARCH64_CMODEL_MEDIUM:
+ {
+ tree decl_local = SYMBOL_REF_DECL (x);
+ if (decl_local != NULL
+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
+ {
+ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local));
+ /* If the data is smaller than the threshold, goto
+ the small code model. Else goto the large code
+ model. */
+ if (size >= HOST_WIDE_INT (aarch64_data_threshold))
+ goto AARCH64_LARGE_ROUTINE;
+ }
+ goto AARCH64_SMALL_ROUTINE;
+ }
+
+ case AARCH64_CMODEL_MEDIUM_PIC:
+ {
+ tree decl_local = SYMBOL_REF_DECL (x);
+ if (decl_local != NULL
+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
+ {
+ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local));
+ if (size < HOST_WIDE_INT (aarch64_data_threshold))
+ {
+ if (!aarch64_symbol_binds_local_p (x))
+ {
+ /* flag_pic is 2 only when -fPIC is on, when we should
+ use 4G GOT. */
+ return flag_pic == 2 ? SYMBOL_SMALL_GOT_4G
+ : SYMBOL_SMALL_GOT_28K ;
+ }
+ return SYMBOL_SMALL_ABSOLUTE;
+ }
+ }
+ if (!aarch64_symbol_binds_local_p (x))
+ {
+ return SYMBOL_MEDIUM_GOT_4G;
+ }
+ return SYMBOL_MEDIUM_ABSOLUTE;
+ }
+
case AARCH64_CMODEL_LARGE:
+ AARCH64_LARGE_ROUTINE:
/* This is alright even in PIC code as the constant
pool reference is always PC relative and within
the same translation unit. */
@@ -19352,6 +19509,8 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
case AARCH64_CMODEL_SMALL:
case AARCH64_CMODEL_SMALL_PIC:
case AARCH64_CMODEL_SMALL_SPIC:
+ case AARCH64_CMODEL_MEDIUM:
+ case AARCH64_CMODEL_MEDIUM_PIC:
/* text+got+data < 4Gb. 4-byte signed relocs are sufficient
for everything. */
type = DW_EH_PE_sdata4;
@@ -22605,7 +22764,14 @@ aarch64_empty_mask_is_expensive (unsigned)
bool
aarch64_use_pseudo_pic_reg (void)
{
- return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
+ /* flag_pic is 2 when -fPIC is on, where we do not need the pseudo
+ pic reg. In medium code mode, when combine with -fpie/-fpic, there are
+ possibility that some symbol size smaller than the -mlarge-data-threshold
+ will still use SMALL_SPIC relocation, which need the pseudo pic reg.
+ Fix spill_1.c fail. */
+ return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
+ || (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC
+ && flag_pic != 2);
}
/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
@@ -22615,6 +22781,7 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
{
switch (XINT (x, 1))
{
+ case UNSPEC_GOTMEDIUMPIC4G:
case UNSPEC_GOTSMALLPIC:
case UNSPEC_GOTSMALLPIC28K:
case UNSPEC_GOTTINYPIC:
@@ -22976,6 +23143,18 @@ aarch64_estimated_poly_value (poly_int64 val)
return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
}
+/* Implement TARGET_MEDIUM_SYMBOL_P.
+ Return true if x is a symbol loaded by UNSPEC_LOAD_SYMBOL_MEDIUM. */
+bool
+aarch64_medium_symbol_p (rtx x)
+{
+ if (GET_CODE (x) != UNSPEC)
+ {
+ return false;
+ }
+ return XINT (x, 1) == UNSPEC_LOAD_SYMBOL_MEDIUM;
+}
+
/* Return true for types that could be supported as SIMD return or
argument types. */
@@ -24015,6 +24194,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_ESTIMATED_POLY_VALUE
#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
+#undef TARGET_MEDIUM_SYMBOL_P
+#define TARGET_MEDIUM_SYMBOL_P aarch64_medium_symbol_p
+
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 51148846345..8fc92d13dcb 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -33,6 +33,10 @@
#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
+/* Default threshold 64-bit relocation data
+ with aarch64 medium memory model. */
+#define AARCH64_DEFAULT_LARGE_DATA_THRESHOLD 65536
+
/* Target machine storage layout. */
#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 58445dea941..ee80261f1ac 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -224,6 +224,9 @@
UNSPEC_RSQRTS
UNSPEC_NZCV
UNSPEC_XPACLRI
+ UNSPEC_GOTMEDIUMPIC4G
+ UNSPEC_GET_GOTOFF
+ UNSPEC_LOAD_SYMBOL_MEDIUM
UNSPEC_LD1_SVE
UNSPEC_ST1_SVE
UNSPEC_LDNT1_SVE
@@ -6792,6 +6795,39 @@
[(set_attr "type" "load_4")]
)
+(define_insn "get_gotoff_<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (unspec:GPI [(match_operand 1 "aarch64_valid_symref" "S")]
+ UNSPEC_GET_GOTOFF))]
+ ""
+ "movz\\t%x0, :gotoff_g1:%A1\;movk\\t%x0, :gotoff_g0_nc:%A1"
+ [(set_attr "type" "multiple")
+ (set_attr "length" "8")]
+)
+
+(define_insn "ldr_got_medium_<mode>"
+ [(set (match_operand:PTR 0 "register_operand" "=r")
+ (unspec:PTR [(mem:PTR (lo_sum:PTR
+ (match_operand:PTR 1 "register_operand" "r")
+ (match_operand:PTR 2 "register_operand" "r")))]
+ UNSPEC_GOTMEDIUMPIC4G))]
+ ""
+ "ldr\\t%0, [%1, %2]"
+ [(set_attr "type" "load_4")]
+)
+
+(define_insn "ldr_got_medium_sidi"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (unspec:SI [(mem:SI (lo_sum:DI
+ (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "register_operand" "r")))]
+ UNSPEC_GOTMEDIUMPIC4G)))]
+ "TARGET_ILP32"
+ "ldr\\t%0, [%1, %2]"
+ [(set_attr "type" "load_4")]
+)
+
(define_insn "ldr_got_small_28k_<mode>"
[(set (match_operand:PTR 0 "register_operand" "=r")
(unspec:PTR [(mem:PTR (lo_sum:PTR
@@ -6955,6 +6991,23 @@
(set_attr "length" "12")]
)
+(define_insn "load_symbol_medium_<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (unspec:GPI [(match_operand 2 "aarch64_valid_symref" "S")]
+ UNSPEC_LOAD_SYMBOL_MEDIUM))
+ (clobber (match_operand:GPI 1 "register_operand" "=r"))]
+ ""
+ "movz\\t%x0, :prel_g3:%A2\;\\
+movk\\t%x0, :prel_g2_nc:%A2\;\\
+movk\\t%x0, :prel_g1_nc:%A2\;\\
+movk\\t%x0, :prel_g0_nc:%A2\;\\
+adr\\t%x1, .\;\\
+sub\\t%x1, %x1, 0x4\;\\
+add\\t%x0, %x0, %x1"
+ [(set_attr "type" "multiple")
+ (set_attr "length" "28")]
+)
+
(define_expand "tlsdesc_small_<mode>"
[(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)]
"TARGET_TLS_DESC"
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 4539156d6f4..bb888461ab0 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_core = aarch64_none
TargetVariable
enum aarch64_arch explicit_arch = aarch64_no_arch
+;; -mlarge-data-threshold=
+TargetSave
+int x_aarch64_data_threshold
+
TargetSave
const char *x_aarch64_override_tune_string
@@ -60,9 +64,16 @@ Enum(cmodel) String(tiny) Value(AARCH64_CMODEL_TINY)
EnumValue
Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL)
+EnumValue
+Enum(cmodel) String(medium) Value(AARCH64_CMODEL_MEDIUM)
+
EnumValue
Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE)
+mlarge-data-threshold=
+Target RejectNegative Joined UInteger Var(aarch64_data_threshold) Init(AARCH64_DEFAULT_LARGE_DATA_THRESHOLD)
+-mlarge-data-threshold=<number> Data greater than given threshold will be assume that it should be relocated using 64-bit relocation.
+
mbig-endian
Target Report RejectNegative Mask(BIG_END)
Assume target CPU is configured as big endian.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index fcb7245e95c..0508fce57a7 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6983,6 +6983,11 @@ things like cost calculations or profiling frequencies. The default
implementation returns the lowest possible value of @var{val}.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_MEDIUM_SYMBOL_P (rtx @var{x})
+Return true if the input rtx is a symbol loaded by kunpeng medium code
+model.
+@end deftypefn
+
@node Scheduling
@section Adjusting the Instruction Scheduler
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index c17209daa51..3b70ea4841a 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4701,6 +4701,8 @@ Define this macro if a non-short-circuit operation produced by
@hook TARGET_ESTIMATED_POLY_VALUE
+@hook TARGET_MEDIUM_SYMBOL_P
+
@node Scheduling
@section Adjusting the Instruction Scheduler
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index d61cadb5208..bad8208cd22 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -14501,14 +14501,17 @@ const_ok_for_output_1 (rtx rtl)
/* If delegitimize_address couldn't do anything with the UNSPEC, and
the target hook doesn't explicitly allow it in debug info, assume
we can't express it in the debug info. */
- /* Don't complain about TLS UNSPECs, those are just too hard to
- delegitimize. Note this could be a non-decl SYMBOL_REF such as
- one in a constant pool entry, so testing SYMBOL_REF_TLS_MODEL
- rather than DECL_THREAD_LOCAL_P is not just an optimization. */
+ /* Don't complain about TLS UNSPECs and aarch64 medium code model
+ related UNSPECs, those are just too hard to delegitimize. Note
+ this could be a non-decl SYMBOL_REF such as one in a constant
+ pool entry, so testing SYMBOL_REF_TLS_MODEL rather than
+ DECL_THREAD_LOCAL_P is not just an optimization. */
if (flag_checking
&& (XVECLEN (rtl, 0) == 0
|| GET_CODE (XVECEXP (rtl, 0, 0)) != SYMBOL_REF
- || SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0)) == TLS_MODEL_NONE))
+ || (!targetm.medium_symbol_p (rtl)
+ && SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0))
+ == TLS_MODEL_NONE)))
inform (current_function_decl
? DECL_SOURCE_LOCATION (current_function_decl)
: UNKNOWN_LOCATION,
diff --git a/gcc/target.def b/gcc/target.def
index f5a6d507e91..2020564118b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3869,6 +3869,13 @@ implementation returns the lowest possible value of @var{val}.",
HOST_WIDE_INT, (poly_int64 val),
default_estimated_poly_value)
+DEFHOOK
+(medium_symbol_p,
+ "Return true if the input rtx is a symbol loaded by kunpeng medium code\n\
+model.",
+ bool, (rtx x),
+ default_medium_symbol_p)
+
/* Permit speculative instructions in delay slots during delayed-branch
scheduling. */
DEFHOOK
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 7cb04f30bdb..43a9f0cdf5b 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1708,6 +1708,13 @@ default_estimated_poly_value (poly_int64 x)
return x.coeffs[0];
}
+/* The default implementation of TARGET_MEDIUM_SYMBOL_P. */
+bool
+default_medium_symbol_p (rtx x ATTRIBUTE_UNUSED)
+{
+ return false;
+}
+
/* For hooks which use the MOVE_RATIO macro, this gives the legacy default
behavior. SPEED_P is true if we are compiling for speed. */
--
2.21.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,296 +0,0 @@
From 5392e41dcb7d58a80f2864b3c3f600c538fba799 Mon Sep 17 00:00:00 2001
From: huangxiaoquan <huangxiaoquan1@huawei.com>
Date: Wed, 4 Aug 2021 14:21:08 +0800
Subject: [PATCH 19/22] [StructReorderFields] Fix bugs and improve mechanism
Fix bugs and improve mechanism:
1. Fixed a bug in multi-layer pointer recording.
2. Use new to initialize allocated memory in symbol-summary.h.
3. Only enable optimizations in C language.
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 384aa81583c..fe364f742d8 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -173,31 +173,30 @@ lang_c_p (void)
return false;
}
- if (strcmp (language_string, "GNU GIMPLE") == 0)
+ if (lang_GNU_C ())
+ {
+ return true;
+ }
+ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
{
unsigned i = 0;
- tree t = NULL;
- const char *unit_string = NULL;
+ tree t = NULL_TREE;
FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
{
- unit_string = TRANSLATION_UNIT_LANGUAGE (t);
- if (!unit_string
- || (strncmp (unit_string, "GNU C", 5) != 0)
- || (!ISDIGIT (unit_string[5])))
+ language_string = TRANSLATION_UNIT_LANGUAGE (t);
+ if (language_string == NULL
+ || strncmp (language_string, "GNU C", 5)
+ || (language_string[5] != '\0'
+ && !(ISDIGIT (language_string[5]))))
{
return false;
}
}
return true;
}
- else if (strncmp (language_string, "GNU C", 5) == 0
- && ISDIGIT (language_string[5]))
- {
- return true;
- }
-
return false;
+}
/* Get the number of pointer layers. */
@@ -1262,7 +1261,7 @@ public:
void check_uses (srdecl *decl, vec<srdecl*>&);
void check_use (srdecl *decl, gimple *stmt, vec<srdecl*>&);
void check_type_and_push (tree newdecl, srdecl *decl,
- vec<srdecl*> &worklist, gimple *stmt);
+ vec<srdecl*> &worklist, gimple *stmt);
void check_other_side (srdecl *decl, tree other, gimple *stmt, vec<srdecl*> &worklist);
void check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt);
@@ -3010,11 +3009,9 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt)
{
tree r = TREE_OPERAND (expr, 0);
tree orig_type = TREE_TYPE (expr);
- if (handled_component_p (r)
- || TREE_CODE (r) == MEM_REF)
+ if (handled_component_p (r) || TREE_CODE (r) == MEM_REF)
{
- while (handled_component_p (r)
- || TREE_CODE (r) == MEM_REF)
+ while (handled_component_p (r) || TREE_CODE (r) == MEM_REF)
{
if (TREE_CODE (r) == VIEW_CONVERT_EXPR)
{
@@ -3092,10 +3089,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
srdecl *d = find_decl (lhs);
if (!d && t)
{
- current_function->record_decl (t, lhs, -1);
+ current_function->record_decl (t, lhs, -1,
+ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
tree var = SSA_NAME_VAR (lhs);
if (var && VOID_POINTER_P (TREE_TYPE (var)))
- current_function->record_decl (t, var, -1);
+ current_function->record_decl (t, var, -1,
+ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
}
}
/* void * _1; struct arc * _2;
@@ -3108,10 +3107,12 @@ ipa_struct_reorg::find_vars (gimple *stmt)
srdecl *d = find_decl (rhs);
if (!d && t)
{
- current_function->record_decl (t, rhs, -1);
+ current_function->record_decl (t, rhs, -1,
+ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL);
tree var = SSA_NAME_VAR (rhs);
if (var && VOID_POINTER_P (TREE_TYPE (var)))
- current_function->record_decl (t, var, -1);
+ current_function->record_decl (t, var, -1,
+ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL);
}
}
}
@@ -3529,7 +3530,7 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple
{
/* The type is other, the declaration is side. */
current_function->record_decl (type, side, -1,
- find_decl (other) ? find_decl (other)->orig_type : NULL);
+ isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
}
else
{
@@ -5111,31 +5112,23 @@ ipa_struct_reorg::propagate_escape_via_original (void)
{
for (unsigned i = 0; i < types.length (); i++)
{
- for (unsigned j = 0; j < types[i]->fields.length (); j++)
- {
- srfield *field = types[i]->fields[j];
- if (handled_type (field->fieldtype) && field->type)
- {
- for (unsigned k = 0; k < types.length (); k++)
- {
- const char *type1 = get_type_name (field->type->type);
- const char *type2 = get_type_name (types[k]->type);
- if (type1 == NULL || type2 == NULL)
- {
- continue;
- }
- if (type1 == type2 && types[k]->has_escaped ())
- {
- if (!field->type->has_escaped ())
- {
- field->type->mark_escape (
- escape_via_orig_escape, NULL);
- }
- break;
- }
- }
- }
- }
+ for (unsigned j = 0; j < types.length (); j++)
+ {
+ const char *type1 = get_type_name (types[i]->type);
+ const char *type2 = get_type_name (types[j]->type);
+ if (type1 == NULL || type2 == NULL)
+ {
+ continue;
+ }
+ if (type1 == type2 && types[j]->has_escaped ())
+ {
+ if (!types[i]->has_escaped ())
+ {
+ types[i]->mark_escape (escape_via_orig_escape, NULL);
+ }
+ break;
+ }
+ }
}
}
@@ -6683,7 +6676,10 @@ pass_ipa_reorder_fields::gate (function *)
&& flag_ipa_reorder_fields
/* Don't bother doing anything if the program has errors. */
&& !seen_error ()
- && flag_lto_partition == LTO_PARTITION_ONE);
+ && flag_lto_partition == LTO_PARTITION_ONE
+ /* Only enable struct optimizations in C since other
+ languages' grammar forbid. */
+ && lang_c_p ());
}
} // anon namespace
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
index a223b4dadea..ddf5e35776e 100644
--- a/gcc/symbol-summary.h
+++ b/gcc/symbol-summary.h
@@ -61,10 +61,9 @@ protected:
{
/* In structure optimizatons, we call new to ensure that
the allocated memory is initialized to 0. */
- if (flag_ipa_struct_reorg)
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
: new T ();
-
/* Call gcc_internal_because we do not want to call finalizer for
a type T. We call dtor explicitly. */
return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
@@ -78,7 +77,7 @@ protected:
ggc_delete (item);
else
{
- if (flag_ipa_struct_reorg)
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
delete item;
else
m_allocator.remove (item);
diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
new file mode 100644
index 00000000000..23765fc5615
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct T_HASH_ENTRY
+{
+ unsigned int hash;
+ unsigned int klen;
+ char *key;
+} iHashEntry;
+
+typedef struct T_HASH
+{
+ unsigned int size;
+ unsigned int fill;
+ unsigned int keys;
+
+ iHashEntry **array;
+} uHash;
+
+uHash *retval;
+
+int
+main() {
+ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
index 8d687c58b30..54e737ee856 100644
--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
@@ -1,6 +1,6 @@
-// 针对
+// For testing:
/*
-Compile options: /home/hxq/hcc_gcc9.3.0_org_debug/bin/gcc -O3 -g
+Compile options: gcc -O3 -g
-flto -flto-partition=one -fipa-reorder-fields -fipa-struct-reorg
-v -save-temps -fdump-ipa-all-details test.c -o test
@@ -94,12 +94,11 @@ switch_arcs(arc_t** deleted_arcs, arc_t* arcnew)
copy = *test_arc;
count++;
*test_arc = arcnew[0];
- replace_weaker_arc(arcnew, copy.tail, copy.head);
+ replace_weaker_arc(arcnew, NULL, NULL);
}
return count;
}
-
int
main ()
{
diff --git a/gcc/tree.c b/gcc/tree.c
index 5c1374d6fb1..89fa469c359 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -5219,7 +5219,7 @@ fld_simplified_type_name (tree type)
/* Simplify type will cause that struct A and struct A within
struct B are different type pointers, so skip it in structure
optimizations. */
- if (flag_ipa_struct_reorg)
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
return TYPE_NAME (type);
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
@@ -5463,7 +5463,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
/* Simplify type will cause that struct A and struct A within
struct B are different type pointers, so skip it in structure
optimizations. */
- if (flag_ipa_struct_reorg)
+ if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
return t;
if (POINTER_TYPE_P (t))
return fld_incomplete_type_of (t, fld);
--
2.21.0.windows.1

View File

@ -1,128 +0,0 @@
From 633dd654347b6146d6e94d6434e7028617019134 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Mon, 9 Aug 2021 20:18:26 +0800
Subject: [PATCH 20/22] [Backport]vect: Fix an ICE in
vect_recog_mask_conversion_pattern
Reference:https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=91d80cf4bd2827dd9c40fe6a7c719c909d79083d
When processing the cond expression, vect_recog_mask_conversion_pattern
doesn't consider the situation that two operands of rhs1 are different
vectypes, leading to a vect ICE. This patch adds the identification and
handling of the situation to fix the problem.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr96757.c b/gcc/testsuite/gcc.target/aarch64/pr96757.c
new file mode 100644
index 00000000000..122e39dca0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr96757.c
@@ -0,0 +1,23 @@
+/* PR target/96757 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+short
+fun1(short i, short j)
+{
+ return i * j;
+}
+
+int
+fun(int a, int b, int c)
+{
+ int *v, z, k, m;
+ short f, d;
+ for (int i=0; i<c; i++)
+ {
+ f= 4 <= d;
+ k= a > m;
+ z = f > k;
+ *v += fun1(z,b);
+ }
+}
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 310165084a3..84d7ddb170f 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4237,6 +4237,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
tree vectype1, vectype2;
stmt_vec_info pattern_stmt_info;
vec_info *vinfo = stmt_vinfo->vinfo;
+ tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
+ tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
/* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
if (is_gimple_call (last_stmt)
@@ -4336,9 +4338,37 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
it is better for b1 and b2 to use the mask type associated
with int elements rather bool (byte) elements. */
- rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
- if (!rhs1_type)
- rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
+ rhs1_op0 = TREE_OPERAND (rhs1, 0);
+ rhs1_op1 = TREE_OPERAND (rhs1, 1);
+ if (!rhs1_op0 || !rhs1_op1)
+ return NULL;
+ rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
+ rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
+
+ if (!rhs1_op0_type)
+ rhs1_type = TREE_TYPE (rhs1_op0);
+ else if (!rhs1_op1_type)
+ rhs1_type = TREE_TYPE (rhs1_op1);
+ else if (TYPE_PRECISION (rhs1_op0_type)
+ != TYPE_PRECISION (rhs1_op1_type))
+ {
+ int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
+ int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
+ if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
+ {
+ if (abs (tmp0) > abs (tmp1))
+ rhs1_type = rhs1_op1_type;
+ else
+ rhs1_type = rhs1_op0_type;
+ }
+ else
+ rhs1_type = build_nonstandard_integer_type
+ (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
+ }
+ else
+ rhs1_type = rhs1_op0_type;
}
else
return NULL;
@@ -4356,8 +4386,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
name from the outset. */
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
TYPE_VECTOR_SUBPARTS (vectype2))
- && (TREE_CODE (rhs1) == SSA_NAME
- || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
+ && !rhs1_op0_type
+ && !rhs1_op1_type)
return NULL;
/* If rhs1 is invariant and we can promote it leave the COND_EXPR
@@ -4390,7 +4420,16 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
if (TREE_CODE (rhs1) != SSA_NAME)
{
tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
- pattern_stmt = gimple_build_assign (tmp, rhs1);
+ if (rhs1_op0_type
+ && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
+ rhs1_op0 = build_mask_conversion (rhs1_op0,
+ vectype2, stmt_vinfo);
+ if (rhs1_op1_type
+ && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
+ rhs1_op1 = build_mask_conversion (rhs1_op1,
+ vectype2, stmt_vinfo);
+ pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
+ rhs1_op0, rhs1_op1);
rhs1 = tmp;
append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
rhs1_type);
--
2.21.0.windows.1

View File

@ -1,23 +0,0 @@
From 023c92ac45b727768599a95f7da748158a270753 Mon Sep 17 00:00:00 2001
From: bule <bule1@huawei.com>
Date: Mon, 16 Aug 2021 11:20:35 +0800
Subject: [PATCH 21/22] [mcmodel] Bugfix for mcmodel=medium on x86
Declare default_medium_symbol_p in targhooks.h which otherwise
cause the build failure on x86 platform.
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 72f3064e8f8..95c136edc79 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -218,6 +218,7 @@ extern int default_register_move_cost (machine_mode, reg_class_t,
reg_class_t);
extern bool default_slow_unaligned_access (machine_mode, unsigned int);
extern HOST_WIDE_INT default_estimated_poly_value (poly_int64);
+extern bool default_medium_symbol_p (rtx);
extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
unsigned int,
--
2.21.0.windows.1

View File

@ -1,167 +0,0 @@
From 1c69390a01d3bf7226fce2a670a0f71731744b04 Mon Sep 17 00:00:00 2001
From: huangxiaoquan <huangxiaoquan1@huawei.com>
Date: Tue, 17 Aug 2021 15:50:31 +0800
Subject: [PATCH 22/22] [StructReorderFields] Fix pointer layer check bug
In the pointer layer check, the NULL pointer check is added
for the escape type mark.
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index fe364f742d8..85986ce5803 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -2235,9 +2235,9 @@ check_record_ptr_usage (gimple *use_stmt, tree &current_node,
}
bool res = true;
- /* MEM[(long int *)a_1] = _57; (record).
+ /* MEM[(long int *)a_1] = _1; (record).
If lhs is ssa_name, lhs cannot be the current node.
- _283 = _282->flow; (No record). */
+ _2 = _1->flow; (No record). */
if (TREE_CODE (rhs1) == SSA_NAME)
{
tree tmp = (rhs1 != current_node) ? rhs1 : lhs;
@@ -2285,13 +2285,13 @@ check_record_single_node (gimple *use_stmt, tree &current_node,
bool res = true;
if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == MEM_REF)
{
- /* _257 = MEM[(struct arc_t * *)_17]. */
+ /* add such as: _2 = MEM[(struct arc_t * *)_1]. */
res = add_node (lhs, *ptr_layers.get (current_node) - 1,
ptr_layers, ssa_name_stack);
}
else if (TREE_CODE (lhs) == MEM_REF && TREE_CODE (rhs1) == SSA_NAME)
{
- /* MEM[(long int *)a_1] = _57. */
+ /* add such as: MEM[(long int *)a_1] = _1. */
if (rhs1 == current_node)
{
res = add_node (TREE_OPERAND (lhs, 0),
@@ -3097,7 +3097,8 @@ ipa_struct_reorg::find_vars (gimple *stmt)
isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
}
}
- /* void * _1; struct arc * _2;
+ /* find void ssa_name such as:
+ void * _1; struct arc * _2;
_2 = _1 + _3; _1 = calloc (100, 40). */
if (TREE_CODE (rhs) == SSA_NAME
&& VOID_POINTER_P (TREE_TYPE (rhs))
@@ -3126,7 +3127,7 @@ ipa_struct_reorg::find_vars (gimple *stmt)
find_var (gimple_assign_rhs1 (stmt), stmt);
find_var (gimple_assign_rhs2 (stmt), stmt);
}
- /* _23 = _21 - old_arcs_12. */
+ /* find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */
else if ((current_mode == STRUCT_REORDER_FIELDS)
&& gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR
&& types_compatible_p (
@@ -3310,7 +3311,7 @@ trace_calculate_negate (gimple *size_def_stmt, tree *num, tree struct_size)
{
gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NEGATE_EXPR);
- /* _480 = -_479; _479 = _478 * 72. */
+ /* support NEGATE_EXPR trace: _3 = -_2; _2 = _1 * 72. */
tree num1 = NULL_TREE;
tree arg0 = gimple_assign_rhs1 (size_def_stmt);
if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE)
@@ -3329,7 +3330,8 @@ trace_calculate_diff (gimple *size_def_stmt, tree *num)
{
gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NOP_EXPR);
- /* _25 = (long unsigned int) _23; _23 = _21 - old_arcs_12. */
+ /* support POINTER_DIFF_EXPR trace:
+ _3 = (long unsigned int) _2; _2 = _1 - old_arcs_1. */
tree arg = gimple_assign_rhs1 (size_def_stmt);
size_def_stmt = SSA_NAME_DEF_STMT (arg);
if (size_def_stmt && is_gimple_assign (size_def_stmt)
@@ -3811,8 +3813,8 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
release INTEGER_TYPE cast to struct pointer.
(If t has escpaed above, then directly returns
and doesn't mark escape follow.). */
- /* _607 = MEM[(struct arc_t * *)pl_100].
- then base pl_100ssa_name - pointer_type - integer_type. */
+ /* _1 = MEM[(struct arc_t * *)a_1].
+ then base a_1: ssa_name - pointer_type - integer_type. */
if (current_mode == STRUCT_REORDER_FIELDS)
{
bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base))
@@ -4520,8 +4522,15 @@ ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt)
{
return;
}
- a->type->mark_escape (escape_cast_another_ptr, stmt);
- b->type->mark_escape (escape_cast_another_ptr, stmt);
+
+ if (a)
+ {
+ a->type->mark_escape (escape_cast_another_ptr, stmt);
+ }
+ if (b)
+ {
+ b->type->mark_escape (escape_cast_another_ptr, stmt);
+ }
}
void
@@ -5649,9 +5658,9 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_
if (current_mode == STRUCT_REORDER_FIELDS)
{
/* Supports the MEM_REF offset.
- _1 = MEM[(struct arc *)ap_4 + 72B].flow;
- Old rewrite_1 = ap.reorder.0_8->flow;
- New rewrite_1
+ _1 = MEM[(struct arc *)ap_1 + 72B].flow;
+ Old rewrite: _1 = ap.reorder.0_8->flow;
+ New rewrite: _1
= MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow;
*/
HOST_WIDE_INT offset_tmp = 0;
@@ -6150,10 +6159,10 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
return false;
}
- /* Old rewriteif (x_1 != 0B)
+ /* Old rewrite: if (x_1 != 0B)
-> _1 = x.reorder.0_1 != 0B; if (_1 != 1)
The logic is incorrect.
- New rewriteif (x_1 != 0B)
+ New rewrite: if (x_1 != 0B)
-> if (x.reorder.0_1 != 0B)*/
for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
{
diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
new file mode 100644
index 00000000000..a5477dcc9be
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
@@ -0,0 +1,24 @@
+/* check_ptr_layers bugfix.*/
+/* { dg-do compile } */
+struct {
+ char a;
+} **b = 0, *e = 0;
+long c;
+char d = 9;
+int f;
+
+void g()
+{
+ for (; f;)
+ if (c)
+ (*e).a++;
+ if (!d)
+ for (;;)
+ b &&c;
+}
+int
+main()
+{
+ g();
+}
+/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
\ No newline at end of file
--
2.21.0.windows.1

View File

@ -1,87 +0,0 @@
From 83a35da4910fc7d8f29ced3e0ff8adddeb537731 Mon Sep 17 00:00:00 2001
From: huangxiaoquan <huangxiaoquan1@huawei.com>
Date: Fri, 27 Aug 2021 14:53:18 +0800
Subject: [PATCH 23/24] [StructReorderFields] Add pointer offset check
The pointer offset check is added for the expr that is dereferenced
in the memory, and escapes struct pointer offset operations involving
field order.
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 85986ce5803..b0d4fe80797 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -3876,6 +3876,17 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
return false;
}
+ /* Escape the operation of fetching field with pointer offset such as:
+ *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0);
+ */
+ if (current_mode != NORMAL
+ && (TREE_CODE (expr) == MEM_REF) && (offset != 0))
+ {
+ gcc_assert (can_escape);
+ t->mark_escape (escape_non_multiply_size, NULL);
+ return false;
+ }
+
if (wholeaccess (expr, base, accesstype, t))
{
field = NULL;
diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
index 190b9418275..2ae46fb3112 100644
--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
@@ -84,4 +84,4 @@ main ()
return cnt;
}
-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
\ No newline at end of file
+/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
new file mode 100644
index 00000000000..317aafa5f72
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct node
+{
+ struct node *left, *right;
+ double a, b, c, d, e, f;
+}
+*a;
+int b, c;
+void
+CreateNode (struct node **p1)
+{
+ *p1 = calloc (10, sizeof (struct node));
+}
+
+int
+main ()
+{
+ a->left = 0;
+ struct node *t = a;
+ CreateNode (&t->right);
+
+ struct node p = *a;
+ b = 1;
+ if (p.left)
+ b = 0;
+ if (b)
+ printf (" Tree.\n");
+}
+
+/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
\ No newline at end of file
--
2.21.0.windows.1

View File

@ -1,90 +0,0 @@
From 0ee0f0ebeb098787cb9698887c237606b6ab10c6 Mon Sep 17 00:00:00 2001
From: huangxiaoquan <huangxiaoquan1@huawei.com>
Date: Wed, 1 Sep 2021 17:07:22 +0800
Subject: [PATCH 24/24] [StructReorderFields] Add lto and whole-program gate
Only enable struct reorder fields optimizations in lto or whole-program.
This prevents some .c files from being struct reorder fields optimized
while some of them are not optimized during project compilation.
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index b0d4fe80797..2bf41e0d83b 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -6655,7 +6655,9 @@ pass_ipa_struct_reorg::gate (function *)
&& flag_lto_partition == LTO_PARTITION_ONE
/* Only enable struct optimizations in C since other
languages' grammar forbid. */
- && lang_c_p ());
+ && lang_c_p ()
+ /* Only enable struct optimizations in lto or whole_program. */
+ && (in_lto_p || flag_whole_program));
}
const pass_data pass_data_ipa_reorder_fields =
@@ -6699,7 +6701,9 @@ pass_ipa_reorder_fields::gate (function *)
&& flag_lto_partition == LTO_PARTITION_ONE
/* Only enable struct optimizations in C since other
languages' grammar forbid. */
- && lang_c_p ());
+ && lang_c_p ()
+ /* Only enable struct optimizations in lto or whole_program. */
+ && (in_lto_p || flag_whole_program));
}
} // anon namespace
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
index 6565fe8dd63..23444fe8b0d 100644
--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
@@ -1,5 +1,5 @@
// { dg-do compile }
-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" }
+// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" }
struct a
{
@@ -21,4 +21,10 @@ int g(void)
return b->t;
}
+int main()
+{
+ f ();
+ return g ();
+}
+
/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
index 5864ad46fd3..2d1f95c9935 100644
--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
@@ -1,5 +1,5 @@
// { dg-do compile }
-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" }
+// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" }
#include <stdlib.h>
typedef struct {
@@ -10,7 +10,7 @@ typedef struct {
compile_stack_elt_t *stack;
unsigned size;
} compile_stack_type;
-void f (const char *p, const char *pend, int c)
+__attribute__((noinline)) void f (const char *p, const char *pend, int c)
{
compile_stack_type compile_stack;
while (p != pend)
@@ -20,4 +20,9 @@ void f (const char *p, const char *pend, int c)
* sizeof (compile_stack_elt_t));
}
+int main()
+{
+ f (NULL, NULL, 1);
+}
+
/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
--
2.21.0.windows.1

View File

@ -1,669 +0,0 @@
From 26e4ba63112f55c27b7dd3d5f8c4497ef9a2f459 Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Thu, 6 Jan 2022 15:33:29 +0800
Subject: [PATCH 25/28] [AutoPrefetch] Support cache misses profile
Add pass ex-afdo after pass afdo in auto-profile.c.
Add flag -fcache-misses-profile.
Read profile of different types of perf events and build maps for
function and gimple location to its count of each perf event.
Currently, instruction execution and cahce misses are supported.
---
gcc/auto-profile.c | 415 +++++++++++++++++++++++++++++++++++++++++++++
gcc/auto-profile.h | 28 +++
gcc/common.opt | 14 ++
gcc/opts.c | 26 +++
gcc/passes.def | 1 +
gcc/timevar.def | 1 +
gcc/toplev.c | 6 +
gcc/tree-pass.h | 2 +
8 files changed, 493 insertions(+)
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index 7d09887c9..aced8fca5 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see
#include "auto-profile.h"
#include "tree-pretty-print.h"
#include "gimple-pretty-print.h"
+#include <map>
+#include <vector>
+#include <algorithm>
/* The following routines implements AutoFDO optimization.
@@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
*/
#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
+#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov"
#define AUTO_PROFILE_VERSION 1
namespace autofdo
@@ -117,6 +121,14 @@ private:
bool annotated_;
};
+/* pair <func_decl, count> */
+static bool
+event_count_cmp (std::pair<unsigned, gcov_type> &a,
+ std::pair<unsigned, gcov_type> &b)
+{
+ return a.second > b.second;
+}
+
/* Represent a source location: (function_decl, lineno). */
typedef std::pair<tree, unsigned> decl_lineno;
@@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile;
/* gcov_summary structure to store the profile_info. */
static gcov_summary *afdo_profile_info;
+/* Check opts->x_flags and put file name into EVENT_FILES. */
+
+static bool
+get_all_profile_names (const char **event_files)
+{
+ if (!(flag_auto_profile || flag_cache_misses_profile))
+ {
+ return false;
+ }
+
+ event_files[INST_EXEC] = auto_profile_file;
+
+ if (cache_misses_profile_file == NULL)
+ {
+ cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE;
+ }
+ event_files[CACHE_MISSES] = cache_misses_profile_file;
+
+ return true;
+}
+
+static void read_profile (void);
+
+/* Maintain multiple profile data of different events with event_loc_count_map
+ and event_func_count_map. */
+
+class extend_auto_profile
+{
+public:
+ bool auto_profile_exist (enum event_type type);
+ gcov_type get_loc_count (location_t, event_type);
+ gcov_type get_func_count (unsigned, event_type);
+ struct rank_info get_func_rank (unsigned, enum event_type);
+ /* There should be only one instance of class EXTEND_AUTO_PROFILE. */
+ static extend_auto_profile *create ()
+ {
+ extend_auto_profile *map = new extend_auto_profile ();
+ if (map->read ())
+ {
+ return map;
+ }
+ delete map;
+ return NULL;
+ }
+private:
+ /* Basic maps of extend_auto_profile. */
+ typedef std::map<location_t, gcov_type> loc_count_map;
+ typedef std::map<unsigned, gcov_type> func_count_map;
+
+ /* Map of function_uid to its descending order rank of counts. */
+ typedef std::map<unsigned, unsigned> rank_map;
+
+ /* Mapping hardware events to corresponding basic maps. */
+ typedef std::map<event_type, loc_count_map> event_loc_count_map;
+ typedef std::map<event_type, func_count_map> event_func_count_map;
+ typedef std::map<event_type, rank_map> event_rank_map;
+
+ extend_auto_profile () {}
+ bool read ();
+ void set_loc_count ();
+ void process_extend_source_profile ();
+ void read_extend_afdo_file (const char*, event_type);
+ void rank_all_func ();
+ void dump_event ();
+ event_loc_count_map event_loc_map;
+ event_func_count_map event_func_map;
+ event_rank_map func_rank;
+ event_type profile_type;
+};
+
+/* Member functions for extend_auto_profile. */
+
+bool
+extend_auto_profile::auto_profile_exist (enum event_type type)
+{
+ switch (type)
+ {
+ case INST_EXEC:
+ return event_func_map.count (INST_EXEC) != 0
+ || event_loc_map.count (INST_EXEC) != 0;
+ case CACHE_MISSES:
+ return event_func_map.count (CACHE_MISSES) != 0
+ || event_loc_map.count (CACHE_MISSES) != 0;
+ default:
+ return false;
+ }
+}
+
+void
+extend_auto_profile::dump_event ()
+{
+ if (dump_file)
+ {
+ switch (profile_type)
+ {
+ case INST_EXEC:
+ fprintf (dump_file, "Processing event instruction execution.\n");
+ break;
+ case CACHE_MISSES:
+ fprintf (dump_file, "Processing event cache misses.\n");
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/* Return true if any profile data was read. */
+
+bool
+extend_auto_profile::read ()
+{
+ const char *event_files[EVENT_NUMBER] = {NULL};
+ if (!get_all_profile_names (event_files))
+ {
+ return false;
+ }
+
+ /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create
+ new ones for each event_type. */
+ autofdo::string_table *string_table_afdo = afdo_string_table;
+ autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile;
+
+ for (unsigned i = 0; i < EVENT_NUMBER; i++)
+ {
+ if (event_files[i] == NULL)
+ {
+ continue;
+ }
+ profile_type = (enum event_type) i;
+ dump_event ();
+ gcov_close ();
+ auto_profile_file = event_files[i];
+ read_profile ();
+ gcov_close ();
+
+ process_extend_source_profile ();
+
+ delete afdo_source_profile;
+ delete afdo_string_table;
+ }
+
+ /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function
+ END_AUTO_PROFILE will free them at the end of compilation. */
+ afdo_string_table = string_table_afdo;
+ afdo_source_profile = source_profile_afdo;
+ return true;
+}
+
+/* Helper functions. */
+
+gcov_type
+extend_auto_profile::get_loc_count (location_t loc, event_type type)
+{
+ event_loc_count_map::iterator event_iter = event_loc_map.find (type);
+ if (event_iter != event_loc_map.end ())
+ {
+ loc_count_map::iterator loc_iter = event_iter->second.find (loc);
+ if (loc_iter != event_iter->second.end ())
+ {
+ return loc_iter->second;
+ }
+ }
+ return 0;
+}
+
+struct rank_info
+extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type)
+{
+ struct rank_info info = {0, 0};
+ event_rank_map::iterator event_iter = func_rank.find (type);
+ if (event_iter != func_rank.end ())
+ {
+ rank_map::iterator func_iter = event_iter->second.find (decl_uid);
+ if (func_iter != event_iter->second.end ())
+ {
+ info.rank = func_iter->second;
+ info.total = event_iter->second.size ();
+ }
+ }
+ return info;
+}
+
+gcov_type
+extend_auto_profile::get_func_count (unsigned decl_uid, event_type type)
+{
+ event_func_count_map::iterator event_iter = event_func_map.find (type);
+ if (event_iter != event_func_map.end ())
+ {
+ func_count_map::iterator func_iter = event_iter->second.find (decl_uid);
+ if (func_iter != event_iter->second.end ())
+ {
+ return func_iter->second;
+ }
+ }
+ return 0;
+}
+
+static extend_auto_profile *extend_profile;
+
/* Helper functions. */
/* Return the original name of NAME: strip the suffix that starts
@@ -1654,6 +1866,131 @@ auto_profile (void)
return TODO_rebuild_cgraph_edges;
}
+
+void
+extend_auto_profile::rank_all_func ()
+{
+ std::vector<std::pair<unsigned, gcov_type> > func_sorted;
+ event_func_count_map::iterator event_iter
+ = event_func_map.find (profile_type);
+ if (event_iter != event_func_map.end ())
+ {
+ func_count_map::iterator func_iter;
+ for (func_iter = event_iter->second.begin ();
+ func_iter != event_iter->second.end (); func_iter++)
+ {
+ func_sorted.push_back (std::make_pair (func_iter->first,
+ func_iter->second));
+ }
+
+ std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp);
+
+ for (unsigned i = 0; i < func_sorted.size (); ++i)
+ {
+ func_rank[profile_type][func_sorted[i].first] = i + 1;
+ }
+ }
+}
+
+/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */
+
+void
+extend_auto_profile::set_loc_count ()
+{
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ count_info info;
+ gimple *stmt = gsi_stmt (gsi);
+ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
+ {
+ continue;
+ }
+ if (afdo_source_profile->get_count_info (stmt, &info))
+ {
+ location_t loc = gimple_location (stmt);
+ event_loc_map[profile_type][loc] += info.count;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt ");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+ fprintf (dump_file, "counts %ld\n",
+ event_loc_map[profile_type][loc]);
+ }
+ }
+ }
+ }
+}
+
+/* Process data in extend_auto_source_profile, save them into two maps.
+ 1. gimple_location to count.
+ 2. function_index to count. */
+void
+extend_auto_profile::process_extend_source_profile ()
+{
+ struct cgraph_node *node;
+ if (symtab->state == FINISHED)
+ {
+ return;
+ }
+ FOR_EACH_FUNCTION (node)
+ {
+ if (!gimple_has_body_p (node->decl) || node->inlined_to)
+ {
+ continue;
+ }
+
+ /* Don't profile functions produced for builtin stuff. */
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
+ {
+ continue;
+ }
+
+ function *fn = DECL_STRUCT_FUNCTION (node->decl);
+ push_cfun (fn);
+
+ const function_instance *s
+ = afdo_source_profile->get_function_instance_by_decl (
+ current_function_decl);
+
+ if (s == NULL)
+ {
+ pop_cfun ();
+ continue;
+ }
+ unsigned int decl_uid = DECL_UID (current_function_decl);
+ gcov_type count = s->total_count ();
+ if (dump_file)
+ {
+ fprintf (dump_file, "Extend auto-profile for function %s.\n",
+ node->dump_name ());
+ }
+ event_func_map[profile_type][decl_uid] += count;
+ set_loc_count ();
+ pop_cfun ();
+ }
+ rank_all_func ();
+}
+
+/* Main entry of extend_auto_profile. */
+
+static void
+extend_source_profile ()
+{
+ extend_profile = autofdo::extend_auto_profile::create ();
+ if (dump_file)
+ {
+ if (extend_profile == NULL)
+ {
+ fprintf (dump_file, "No profile file is found.\n");
+ return;
+ }
+ fprintf (dump_file, "Extend profile info generated.\n");
+ }
+}
} /* namespace autofdo. */
/* Read the profile from the profile data file. */
@@ -1682,6 +2019,42 @@ end_auto_profile (void)
profile_info = NULL;
}
+/* Extern function to get profile info in other passes. */
+
+bool
+profile_exist (enum event_type type)
+{
+ return autofdo::extend_profile != NULL
+ && autofdo::extend_profile->auto_profile_exist (type);
+}
+
+gcov_type
+event_get_loc_count (location_t loc, event_type type)
+{
+ return autofdo::extend_profile->get_loc_count (loc, type);
+}
+
+gcov_type
+event_get_func_count (unsigned decl_uid, event_type type)
+{
+ return autofdo::extend_profile->get_func_count (decl_uid, type);
+}
+
+struct rank_info
+event_get_func_rank (unsigned decl_uid, enum event_type type)
+{
+ return autofdo::extend_profile->get_func_rank (decl_uid, type);
+}
+
+void
+free_extend_profile_info ()
+{
+ if (autofdo::extend_profile != NULL)
+ {
+ delete autofdo::extend_profile;
+ }
+}
+
/* Returns TRUE if EDGE is hot enough to be inlined early. */
bool
@@ -1743,8 +2116,50 @@ public:
} // anon namespace
+namespace
+{
+const pass_data pass_data_ipa_extend_auto_profile =
+{
+ SIMPLE_IPA_PASS, /* type */
+ "ex-afdo", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass
+{
+public:
+ pass_ipa_extend_auto_profile (gcc::context *ctxt)
+ : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);}
+ virtual unsigned int execute (function *);
+
+};
+
+unsigned int
+pass_ipa_extend_auto_profile::execute (function *fun)
+{
+ autofdo::extend_source_profile ();
+ return 0;
+}
+} // anon namespace
+
simple_ipa_opt_pass *
make_pass_ipa_auto_profile (gcc::context *ctxt)
{
return new pass_ipa_auto_profile (ctxt);
}
+
+simple_ipa_opt_pass *
+make_pass_ipa_extend_auto_profile (gcc::context *ctxt)
+{
+ return new pass_ipa_extend_auto_profile (ctxt);
+}
\ No newline at end of file
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
index f5cff091d..230d7e68a 100644
--- a/gcc/auto-profile.h
+++ b/gcc/auto-profile.h
@@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see
#ifndef AUTO_PROFILE_H
#define AUTO_PROFILE_H
+enum event_type
+{
+ INST_EXEC = 0,
+ CACHE_MISSES,
+ EVENT_NUMBER
+};
+
/* Read, process, finalize AutoFDO data structures. */
extern void read_autofdo_file (void);
extern void end_auto_profile (void);
@@ -28,4 +35,25 @@ extern void end_auto_profile (void);
/* Returns TRUE if EDGE is hot enough to be inlined early. */
extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
+/* Chcek if profile exists before using this profile. */
+extern bool profile_exist (enum event_type);
+
+/* Given func decl_uid or gimple location and event_type, return count.
+ Count is 0 if function or gimple is not sampled. */
+extern gcov_type event_get_func_count (unsigned, enum event_type);
+extern gcov_type event_get_loc_count (location_t, enum event_type);
+
+struct rank_info
+{
+ unsigned total;
+ unsigned rank;
+};
+
+/* Given function decl_uid and event type, return rank_info. Rank_info
+ is {0, 0} if function was not sampled. */
+extern struct rank_info event_get_func_rank (unsigned, enum event_type);
+
+/* Free memory allocated by autofdo::extern_profile. */
+extern void free_extend_profile_info ();
+
#endif /* AUTO_PROFILE_H */
diff --git a/gcc/common.opt b/gcc/common.opt
index 73c24f28d..37cbbd8c0 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file)
Use sample profile information for call graph node weights. The profile
file is specified in the argument.
+fcache-misses-profile
+Common Report Var(flag_cache_misses_profile)
+Use sample profile information for source code cache miss count. The default
+profile file is cmsdata.gcov in `pwd`.
+
+fcache-misses-profile=
+Common Joined RejectNegative Var(cache_misses_profile_file)
+Use sample profile information for source code cache miss count. The profile
+file is specified in the argument.
+
; -fcheck-bounds causes gcc to generate array bounds checks.
; For C, C++ and ObjC: defaults off.
; For Java: defaults to on.
@@ -1873,6 +1883,10 @@ fipa-struct-reorg
Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
Perform structure layout optimizations.
+fipa-extend-auto-profile
+Common Report Var(flag_ipa_extend_auto_profile)
+Use sample profile information for source code.
+
fipa-vrp
Common Report Var(flag_ipa_vrp) Optimization
Perform IPA Value Range Propagation.
diff --git a/gcc/opts.c b/gcc/opts.c
index 6924a973a..642327296 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts,
SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value);
}
+static void
+set_cache_misses_profile_params (struct gcc_options *opts,
+ struct gcc_options *opts_set)
+{
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
+}
+
/* -f{,no-}sanitize{,-recover}= suboptions. */
const struct sanitizer_opts_s sanitizer_opts[] =
{
@@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts,
param_early_inliner_max_iterations, 10);
break;
+ case OPT_fipa_extend_auto_profile:
+ opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile
+ ? true : value;
+ break;
+
+ case OPT_fcache_misses_profile_:
+ opts->x_cache_misses_profile_file = xstrdup (arg);
+ opts->x_flag_cache_misses_profile = true;
+ value = true;
+ /* No break here - do -fcache-misses-profile processing. */
+ /* FALLTHRU */
+ case OPT_fcache_misses_profile:
+ opts->x_flag_ipa_extend_auto_profile = value;
+ if (value)
+ {
+ set_cache_misses_profile_params (opts, opts_set);
+ }
+ break;
+
case OPT_fprofile_generate_:
opts->x_profile_data_prefix = xstrdup (arg);
value = true;
diff --git a/gcc/passes.def b/gcc/passes.def
index 63303ab65..e9c91d26e 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_target_clone);
NEXT_PASS (pass_ipa_auto_profile);
+ NEXT_PASS (pass_ipa_extend_auto_profile);
NEXT_PASS (pass_ipa_tree_profile);
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
NEXT_PASS (pass_feedback_split_functions);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index ee25eccbb..e873747a8 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization")
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
+DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression")
diff --git a/gcc/toplev.c b/gcc/toplev.c
index eaed6f6c7..51e6bd400 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -577,6 +577,12 @@ compile_file (void)
targetm.asm_out.output_ident (ident_str);
}
+ /* Extend auto profile finalization. */
+ if (flag_ipa_extend_auto_profile)
+ {
+ free_extend_profile_info ();
+ }
+
/* Auto profile finalization. */
if (flag_auto_profile)
end_auto_profile ();
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index eb32c5d44..be6387768 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
+ *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
--
2.27.0.windows.1

View File

@ -1,353 +0,0 @@
From eb58d920a95696d8d5a7db9a6d640d4494fb023f Mon Sep 17 00:00:00 2001
From: liyancheng <412998149@qq.com>
Date: Tue, 25 Jan 2022 16:57:28 +0800
Subject: [PATCH 26/28] [AutoFDO] Enable discriminator and MCF algorithm on
AutoFDO
1. Support discriminator for distinguishes among several
basic blocks that share a common locus, allowing for
more accurate autofdo.
2. Using option -fprofile-correction for calling MCF algorithm
to smooth non conservative BB counts.
---
gcc/auto-profile.c | 172 ++++++++++++++++++++++++++++++++++++++++++++-
gcc/cfghooks.c | 7 ++
gcc/ipa-cp.c | 21 ++++++
gcc/opts.c | 5 +-
gcc/tree-inline.c | 14 ++++
5 files changed, 215 insertions(+), 4 deletions(-)
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index aced8fca5..e6164b91b 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -678,6 +678,17 @@ string_table::get_index (const char *name) const
if (name == NULL)
return -1;
string_index_map::const_iterator iter = map_.find (name);
+ /* Function name may be duplicate. Try to distinguish by the
+ #file_name#function_name defined by the autofdo tool chain. */
+ if (iter == map_.end ())
+ {
+ char* file_name = get_original_name (lbasename (dump_base_name));
+ char* file_func_name
+ = concat ("#", file_name, "#", name, NULL);
+ iter = map_.find (file_func_name);
+ free (file_name);
+ free (file_func_name);
+ }
if (iter == map_.end ())
return -1;
@@ -866,7 +877,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
for (unsigned i = 0; i < num_pos_counts; i++)
{
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
+ unsigned offset = gcov_read_unsigned ();
unsigned num_targets = gcov_read_unsigned ();
gcov_type count = gcov_read_counter ();
s->pos_counts[offset].count = count;
@@ -945,6 +956,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
function_instance *s = get_function_instance_by_inline_stack (stack);
if (s == NULL)
return false;
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
+ {
+ return true;
+ }
return s->get_count_info (stack[0].second, info);
}
@@ -1583,6 +1598,68 @@ afdo_propagate (bb_set *annotated_bb)
}
}
+/* Process the following scene when the branch probability
+ inversion when do function afdo_propagate (). E.g.
+ BB_NUM (sample count)
+ BB1 (1000)
+ / \
+ BB2 (10) BB3 (0)
+ \ /
+ BB4
+ In afdo_propagate(), count of BB3 is calculated by
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
+
+ In fact, BB3 may be colder than BB2 by sample count.
+
+ This function allocate source BB count to each succ BB by sample
+ rate, E.g.
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
+
+static void
+afdo_preprocess_bb_count ()
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
+ && bb->count > profile_count::zero ().afdo ())
+ {
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
+ if (single_succ_p (bb1) && single_succ_p (bb2)
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
+ {
+ gcov_type max_count = 0;
+ gcov_type total_count = 0;
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (!e->dest->count.ipa_p ())
+ {
+ continue;
+ }
+ max_count = MAX(max_count, e->dest->count.to_gcov_type ());
+ total_count += e->dest->count.to_gcov_type ();
+ }
+ /* Only bb_count > max_count * 2, branch probability will
+ inversion. */
+ if (max_count > 0
+ && bb->count.to_gcov_type () > max_count * 2)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type target_count = bb->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type () / total_count;
+ e->dest->count
+ = profile_count::from_gcov_type (target_count).afdo ();
+ }
+ }
+ }
+ }
+ }
+}
+
/* Propagate counts on control flow graph and calculate branch
probabilities. */
@@ -1608,6 +1685,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
}
afdo_find_equiv_class (annotated_bb);
+ afdo_preprocess_bb_count ();
afdo_propagate (annotated_bb);
FOR_EACH_BB_FN (bb, cfun)
@@ -1711,6 +1789,82 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
return false;
}
+/* Preparation before executing MCF algorithm. */
+
+static void
+afdo_init_mcf ()
+{
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
+ }
+
+ /* Step1: when use mcf, BB id must be continous,
+ so we need compact_blocks (). */
+ compact_blocks ();
+
+ /* Step2: allocate memory for MCF input data. */
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
+
+ /* Step3: init MCF input data from cfg. */
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ /* Init BB count for MCF. */
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
+
+ gcov_type total_count = 0;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ total_count += e->dest->count.to_gcov_type ();
+ }
+
+ /* If there is no sample in each successor blocks, source
+ BB samples are allocated to each edge by branch static prob. */
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (total_count == 0)
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
+ }
+ else
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type () / total_count;
+ }
+ }
+ }
+}
+
+/* Free the resources used by MCF and reset BB count from MCF result,
+ branch probability has been updated in mcf_smooth_cfg (). */
+
+static void
+afdo_process_after_mcf ()
+{
+ basic_block bb;
+ /* Reset BB count from MCF result. */
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (bb_gcov_count (bb))
+ {
+ bb->count
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
+ }
+ }
+
+ /* Clean up MCF resource. */
+ bb_gcov_counts.release ();
+ delete edge_gcov_counts;
+ edge_gcov_counts = NULL;
+}
+
/* Annotate auto profile to the control flow graph. Do not annotate value
profile for stmts in PROMOTED_STMTS. */
@@ -1762,8 +1916,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
afdo_source_profile->mark_annotated (cfun->function_end_locus);
if (max_count > profile_count::zero ())
{
- /* Calculate, propagate count and probability information on CFG. */
- afdo_calculate_branch_prob (&annotated_bb);
+ /* 1 means -fprofile-correction is enabled manually, and MCF
+ algorithm will be used to calculate count and probability.
+ Otherwise, use the default calculate algorithm. */
+ if (flag_profile_correction == 1)
+ {
+ afdo_init_mcf ();
+ mcf_smooth_cfg ();
+ afdo_process_after_mcf ();
+ }
+ else
+ {
+ /* Calculate, propagate count and probability information on CFG. */
+ afdo_calculate_branch_prob (&annotated_bb);
+ }
}
update_max_bb_count ();
profile_status_for_fn (cfun) = PROFILE_READ;
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index ea558b469..4ea490a8a 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -526,6 +526,9 @@ split_block_1 (basic_block bb, void *i)
return NULL;
new_bb->count = bb->count;
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
new_bb->discriminator = bb->discriminator;
if (dom_info_available_p (CDI_DOMINATORS))
@@ -1091,6 +1094,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
move_block_after (new_bb, after);
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ new_bb->discriminator = bb->discriminator;
FOR_EACH_EDGE (s, ei, bb->succs)
{
/* Since we are creating edges from a new block to successors
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index b1f0881bd..c208070c9 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4365,6 +4365,27 @@ update_profiling_info (struct cgraph_node *orig_node,
orig_node_count.dump (dump_file);
fprintf (dump_file, "\n");
}
+
+ /* When autofdo uses PMU as the sampling unit, the count of
+ cgraph_node->count cannot be obtained directly and will
+ be zero. It using for apply_scale will cause the node
+ count incorrectly overestimated. So set orig_new_node_count
+ equal to orig_node_count, which is same as known error
+ handling. */
+ if (orig_node->count == profile_count::zero ().afdo ()
+ && new_node->count == profile_count::zero ().global0adjusted ())
+ {
+ orig_new_node_count = (orig_sum + new_sum).apply_scale (12, 10);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, " node %s with zero count from afdo ",
+ new_node->dump_name ());
+ fprintf (dump_file, " proceeding by pretending it was ");
+ orig_new_node_count.dump (dump_file);
+ fprintf (dump_file, "\n");
+ }
+ }
}
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
diff --git a/gcc/opts.c b/gcc/opts.c
index 642327296..7a39f618b 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -2606,7 +2606,10 @@ common_handle_option (struct gcc_options *opts,
/* FALLTHRU */
case OPT_fauto_profile:
enable_fdo_optimizations (opts, opts_set, value);
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
+ /* 2 is special and means flag_profile_correction trun on by
+ -fauto-profile. */
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
+ (value ? 2 : 0));
SET_OPTION_IF_UNSET (opts, opts_set,
param_early_inliner_max_iterations, 10);
break;
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index efde5d158..8405a959c 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -2015,6 +2015,10 @@ copy_bb (copy_body_data *id, basic_block bb,
basic_block_info automatically. */
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
copy_basic_block->count = bb->count.apply_scale (num, den);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ copy_basic_block->discriminator = bb->discriminator;
copy_gsi = gsi_start_bb (copy_basic_block);
@@ -3028,6 +3032,16 @@ copy_cfg_body (copy_body_data * id,
den += e->count ();
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
}
+ /* When autofdo uses PMU as the sampling unit, the number of
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
+ be zero. It using for adjust_for_ipa_scaling will cause the
+ inlined BB count incorrectly overestimated. So set den equal
+ to num, which is the source inline BB count to avoid
+ overestimated. */
+ if (den == profile_count::zero ().afdo ())
+ {
+ den = num;
+ }
profile_count::adjust_for_ipa_scaling (&num, &den);
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,151 +0,0 @@
From 3d20b13bc2e5af8d52e221a33881423e38c3dfdd Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Thu, 17 Feb 2022 21:53:31 +0800
Subject: [PATCH 28/28] [AutoPrefetch] Handle the case that the basic block
branch probability is invalid
When the node branch probability value is not initialized,
the branch probability must be set to 0 to ensure that
the calculation of the basic block execution probability
must be less than or equal to 100%.
---
.../gcc.dg/autoprefetch/autoprefetch.exp | 27 +++++++++++++++++++
.../autoprefetch/branch-weighted-prefetch.c | 22 +++++++++++++++
.../autoprefetch/get-edge-prob-non-init.c | 24 +++++++++++++++++
gcc/tree-ssa-loop-prefetch.c | 17 +++++++++++-
4 files changed, 89 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
new file mode 100644
index 000000000..a7408e338
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
@@ -0,0 +1,27 @@
+# Copyright (C) 1997-2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+load_lib gcc-dg.exp
+load_lib target-supports.exp
+
+# Initialize `dg'.
+dg-init
+
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
+ "" "-fprefetch-loop-arrays"
+
+# All done.
+dg-finish
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
new file mode 100644
index 000000000..c63c5e5cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
+#define N 10000000
+
+long long a[N];
+
+long long func ()
+{
+ long long i;
+ long long sum = 0;
+
+ for (i = 0; i < N; i+=1) {
+ if (i < 100000)
+ sum += a[i];
+ else
+ continue;
+ }
+
+ return sum;
+}
+/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
+/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
new file mode 100644
index 000000000..f55481008
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fprefetch-loop-arrays=2 -fdump-tree-aprefetch-details" } */
+
+int a, c, f;
+static int *b = &a;
+int *d;
+int e[0];
+void g() {
+ int h;
+ for (;;) {
+ h = 1;
+ for (; h >= 0; h--) {
+ c = 2;
+ for (; c; c--)
+ if (e[0])
+ if (e[c])
+ *b = 0;
+ f || (*d = 0);
+ }
+ }
+}
+int main() {}
+
+/* { dg-final } */
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 3a5aef0fc..673f453a4 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -2132,7 +2132,7 @@ get_edge_prob (edge e)
{
/* Limit the minimum probability value. */
const float MINNUM_PROB = 0.00001f;
- float fvalue = 1;
+ float fvalue = 0;
profile_probability probability = e->probability;
if (probability.initialized_p ())
@@ -2143,6 +2143,21 @@ get_edge_prob (edge e)
fvalue = MINNUM_PROB;
}
}
+ else
+ {
+ /* When the node branch probability value is not initialized, the branch
+ probability must be set to 0 to ensure that the calculation of the
+ basic block execution probability must be less than or equal to 100%.
+ i.e,
+ ...
+ <bb 3> [local count: 20000]
+ if (f_2 != 0)
+ goto <bb 6>; [INV]
+ else
+ goto <bb 7>; [100.00%]
+ ... */
+ fvalue = 0;
+ }
return fvalue;
}
--
2.27.0.windows.1

View File

@ -1,548 +0,0 @@
From c34a02199b1dfd362e81e78cb90fbd11e02eb93e Mon Sep 17 00:00:00 2001
From: liyancheng <412998149@qq.com>
Date: Mon, 14 Feb 2022 14:34:41 +0800
Subject: [PATCH 29/32] [AutoBOLT] Support saving feedback count info to ELF
segment 1/3
Add flag -fauto-bolt to save the feedback count info from PGO or
AutoFDO to segment .text.fdo. The bolt plugin will read and parse
it into the profile of llvm-bolt.
---
gcc/common.opt | 8 +
gcc/final.c | 400 +++++++++++++++++++++++++++++++++++++++++++++++++
gcc/opts.c | 61 ++++++++
3 files changed, 469 insertions(+)
diff --git a/gcc/common.opt b/gcc/common.opt
index 9488bd90f..5eaa667b3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2403,6 +2403,14 @@ freorder-functions
Common Report Var(flag_reorder_functions) Optimization
Reorder functions to improve code placement.
+fauto-bolt
+Common Report Var(flag_auto_bolt)
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
+
+fauto-bolt=
+Common Joined RejectNegative
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
+
frerun-cse-after-loop
Common Report Var(flag_rerun_cse_after_loop) Optimization
Add a common subexpression elimination pass after loop optimizations.
diff --git a/gcc/final.c b/gcc/final.c
index a3601964a..b9affd3a7 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "rtl-iter.h"
#include "print-rtl.h"
#include "function-abi.h"
+#include "insn-codes.h"
#ifdef XCOFF_DEBUGGING_INFO
#include "xcoffout.h" /* Needed for external data declarations. */
@@ -4640,6 +4641,399 @@ leaf_renumber_regs_insn (rtx in_rtx)
}
#endif
+
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
+
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
+
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
+
+/* Return the relative offset address of the start instruction of BB,
+ return -1 if it is empty instruction. */
+
+static int
+get_bb_start_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+
+ int insn_code = recog_memoized (insn);
+
+ /* The instruction NOP in llvm-bolt belongs to the previous
+ BB, so it needs to be skipped. */
+ if (insn_code != CODE_FOR_nop)
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ }
+ return -1;
+}
+
+/* Return the relative offset address of the end instruction of BB,
+ return -1 if it is empty or call instruction. */
+
+static int
+get_bb_end_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ int num_succs = EDGE_COUNT (bb->succs);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ /* The jump target of call is not in this function, so
+ it should be excluded. */
+ if (CALL_P (insn))
+ {
+ return -1;
+ }
+ if ((num_succs == 1)
+ || ((num_succs == 2) && any_condjump_p (insn)))
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ return -1;
+}
+
+/* Return the end address of cfun. */
+
+static int
+get_function_end_addr ()
+{
+ rtx_insn *insn = get_last_insn ();
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+
+ return -1;
+}
+
+/* Return the function profile status string. */
+
+static const char *
+get_function_profile_status ()
+{
+ const char *profile_status[] = {
+ "PROFILE_ABSENT",
+ "PROFILE_GUESSED",
+ "PROFILE_READ",
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
+ };
+
+ return profile_status[profile_status_for_fn (cfun)];
+}
+
+/* Return the count from the feedback data, such as PGO or AFDO. */
+
+inline static gcov_type
+get_fdo_count (profile_count count)
+{
+ return count.quality () >= GUESSED
+ ? count.to_gcov_type () : 0;
+}
+
+/* Return the profile quality string. */
+
+static const char *
+get_fdo_count_quality (profile_count count)
+{
+ const char *profile_quality[] = {
+ "UNINITIALIZED_PROFILE",
+ "GUESSED_LOCAL",
+ "GUESSED_GLOBAL0",
+ "GUESSED_GLOBAL0_ADJUSTED",
+ "GUESSED",
+ "AFDO",
+ "ADJUSTED",
+ "PRECISE"
+ };
+
+ return profile_quality[count.quality ()];
+}
+
+static const char *
+alias_local_functions (const char *fnname)
+{
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ return fnname;
+ }
+
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
+}
+
+/* Return function bind type string. */
+
+static const char *
+simple_get_function_bind ()
+{
+ const char *function_bind[] = {
+ "GLOBAL",
+ "WEAK",
+ "LOCAL",
+ "UNKNOWN"
+ };
+
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ if (!(DECL_WEAK (cfun->decl)))
+ {
+ return function_bind[0];
+ }
+ else
+ {
+ return function_bind[1];
+ }
+ }
+ else
+ {
+ return function_bind[2];
+ }
+
+ return function_bind[3];
+}
+
+/* Dump the callee functions insn in bb by CALL_P (insn). */
+
+static void
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (insn && CALL_P (insn))
+ {
+ tree callee = get_call_fndecl (insn);
+
+ if (callee)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
+ INSN_ADDRESSES (INSN_UID (insn)));
+
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLEE_FLAG,
+ alias_local_functions (get_fnname_from_decl (callee)));
+
+ fprintf (asm_out_file,
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ call_count);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "call: %x --> %s\n",
+ INSN_ADDRESSES (INSN_UID (insn)),
+ alias_local_functions
+ (get_fnname_from_decl (callee)));
+ }
+ }
+ }
+ }
+}
+
+/* Dump the edge info into asm. */
+
+static void
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ edge e;
+ edge_iterator ei;
+ gcov_type edge_total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type edge_count = get_fdo_count (e->count ());
+ edge_total_count += edge_count;
+
+ int edge_start_addr = get_bb_end_addr (e->src);
+ int edge_end_addr = get_bb_start_addr (e->dest);
+
+ if (edge_start_addr == -1 || edge_end_addr == -1)
+ {
+ continue;
+ }
+
+ /* This is a reserved assert for the original design. If this
+ assert is found, use the address of the previous instruction
+ as edge_start_addr. */
+ gcc_assert (edge_start_addr != edge_end_addr);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
+ edge_start_addr, edge_end_addr, edge_count);
+ }
+
+ if (edge_count > 0)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ edge_count);
+ }
+ }
+
+ gcov_type call_count = MAX (edge_total_count, bb_count);
+ if (call_count > 0)
+ {
+ dump_direct_callee_info_to_asm (bb, call_count);
+ }
+}
+
+/* Dump the bb info into asm. */
+
+static void
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ int bb_start_addr = get_bb_start_addr (bb);
+ if (bb_start_addr != -1)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ bb_count);
+ }
+}
+
+/* Dump the function info into asm. */
+
+static void
+dump_function_info_to_asm (const char *fnname)
+{
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
+ alias_local_functions (fnname));
+ fprintf (dump_file, " file: %s\n",
+ dump_base_name);
+ fprintf (dump_file, " profile_status: %s\n",
+ get_function_profile_status ());
+ fprintf (dump_file, " size: %x\n",
+ get_function_end_addr ());
+ fprintf (dump_file, " function_bind: %s\n",
+ simple_get_function_bind ());
+ }
+}
+
+/* Dump function profile info form AutoFDO or PGO to asm. */
+
+static void
+dump_fdo_info_to_asm (const char *fnname)
+{
+ basic_block bb;
+
+ dump_function_info_to_asm (fnname);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gcov_type bb_count = get_fdo_count (bb->count);
+ if (bb_count == 0)
+ {
+ continue;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
+ bb_count, get_fdo_count_quality (bb->count));
+ }
+
+ if (flag_profile_use)
+ {
+ dump_edge_jump_info_to_asm (bb, bb_count);
+ }
+ else if (flag_auto_profile)
+ {
+ dump_bb_info_to_asm (bb, bb_count);
+ }
+ }
+}
+
+/* When -fauto-bolt option is turned on, the .text.fdo. section
+ will be generated in the *.s file if there is feedback information
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
+
+static void
+dump_profile_to_elf_sections ()
+{
+ if (!flag_function_sections)
+ {
+ error ("-fauto-bolt should work with -ffunction-sections");
+ return;
+ }
+ if (!flag_ipa_ra)
+ {
+ error ("-fauto-bolt should work with -fipa-ra");
+ return;
+ }
+ if (flag_align_jumps)
+ {
+ error ("-fauto-bolt is not supported with -falign-jumps");
+ return;
+ }
+ if (flag_align_labels)
+ {
+ error ("-fauto-bolt is not supported with -falign-labels");
+ return;
+ }
+ if (flag_align_loops)
+ {
+ error ("-fauto-bolt is not supported with -falign-loops");
+ return;
+ }
+
+ /* Return if no feedback data. */
+ if (!flag_profile_use && !flag_auto_profile)
+ {
+ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
+ return;
+ }
+
+ /* Avoid empty functions. */
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
+ {
+ return;
+ }
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
+ const char *fnname = get_fnname_from_decl (current_function_decl);
+ char *profile_fnname = NULL;
+
+ asprintf (&profile_fnname,"%s%s", ASM_FDO_SECTION_PREFIX, fnname);
+ switch_to_section (get_section (profile_fnname, flags , NULL));
+ dump_fdo_info_to_asm (fnname);
+
+ if (profile_fnname)
+ {
+ free (profile_fnname);
+ profile_fnname = NULL;
+ }
+}
+
/* Turn the RTL into assembly. */
static unsigned int
rest_of_handle_final (void)
@@ -4707,6 +5101,12 @@ rest_of_handle_final (void)
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
decl_fini_priority_lookup
(current_function_decl));
+
+ if (flag_auto_bolt)
+ {
+ dump_profile_to_elf_sections ();
+ }
+
return 0;
}
diff --git a/gcc/opts.c b/gcc/opts.c
index f49f5ee58..0b389ae1d 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1166,6 +1166,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
sorry ("vtable verification is not supported with LTO");
+ /* Currently -fauto-bolt is not supported for LTO. */
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
+
/* Control IPA optimizations based on different -flive-patching level. */
if (opts->x_flag_live_patching)
control_options_for_live_patching (opts, opts_set,
@@ -1183,6 +1187,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
= (opts->x_flag_unroll_loops
|| opts->x_flag_peel_loops
|| opts->x_optimize >= 3);
+
+ if (opts->x_flag_auto_bolt)
+ {
+ /* Record the function section to facilitate the feedback
+ data storage. */
+ if (!opts->x_flag_function_sections)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
+ " enabling %<-ffunction-sections%>");
+ opts->x_flag_function_sections = true;
+ }
+
+ /* Cancel the internal alignment of the function. The binary
+ optimizer bolt will cancel the internal alignment optimization
+ of the function, so the alignment is meaningless at this time,
+ and if not, it will bring trouble to the calculation of the
+ offset address of the instruction. */
+ if (opts->x_flag_align_jumps)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
+ " disabling %<-falign-jumps%>");
+ opts->x_flag_align_jumps = false;
+ }
+
+ if (opts->x_flag_align_labels)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
+ " disabling %<-falign-labels%>");
+ opts->x_flag_align_labels = false;
+ }
+
+ if (opts->x_flag_align_loops)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
+ " disabling %<-falign-loops%>");
+ opts->x_flag_align_loops = false;
+ }
+
+ /* When parsing instructions in RTL phase, we need to know
+ the call information of instructions to avoid being optimized. */
+ if (!opts->x_flag_ipa_ra)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
+ " enabling %<-fipa-ra%>");
+ opts->x_flag_ipa_ra = true;
+ }
+ }
}
#define LEFT_COLUMN 27
@@ -2881,6 +2937,11 @@ common_handle_option (struct gcc_options *opts,
check_alignment_argument (loc, arg, "functions");
break;
+ case OPT_fauto_bolt_:
+ case OPT_fauto_bolt:
+ /* Deferred. */
+ break;
+
default:
/* If the flag was handled in a standard way, assume the lack of
processing here is intentional. */
--
2.27.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,82 +0,0 @@
From 071d19832d788422034a3b052ff7ce91e1010344 Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Mon, 28 Feb 2022 16:52:58 +0800
Subject: [PATCH 32/32] [Autoprefetch] Prune invaild loops containing edges whose
probability exceeds 1
Skip auto prefetch analysis if the loop contains the bb in which the sum
of its outgoing edge probabilities is greater than 1.
---
gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp | 2 +-
.../gcc.dg/autoprefetch/branch-weighted-prefetch.c | 8 ++++----
gcc/tree-ssa-loop-prefetch.c | 12 ++++++++++++
3 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
index a7408e338..7cae630a2 100644
--- a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
@@ -20,7 +20,7 @@ load_lib target-supports.exp
# Initialize `dg'.
dg-init
-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
"" "-fprefetch-loop-arrays"
# All done.
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
index c63c5e5cb..ab537cb29 100644
--- a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 --param l1-cache-size=64 --param l1-cache-line-size=32 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
#define N 10000000
long long a[N];
@@ -18,5 +18,5 @@ long long func ()
return sum;
}
-/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
-/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
\ No newline at end of file
+/* { dg-final { scan-tree-dump "Calculating prefetch distance using bb branch weighting method" "aprefetch" } } */
+/* { dg-final { scan-tree-dump "builtin_prefetch" "optimized" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 673f453a4..0d992d8f6 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -2267,6 +2267,15 @@ traverse_prune_bb_branch (hash_map <basic_block, bb_bp> &bb_branch_prob,
&& bb_bp_node->false_edge_bb == NULL))
return false;
+ /* Do not process the loop with a bb branch probability of an abnormal
+ value. */
+ if (bb_bp_node->true_edge_prob + bb_bp_node->false_edge_prob > 1)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "bb branch probability is abnormal\n");
+ return false;
+ }
+
if (current_bb == latch_bb)
{
max_path--;
@@ -2409,6 +2418,9 @@ estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
dump_loop_bb (loop);
return 0;
}
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Calculating prefetch distance using bb branch "
+ "weighting method\n");
}
for (unsigned i = 0; i < loop->num_nodes; i++)
--
2.27.0

View File

@ -1,130 +0,0 @@
From adfcca263996bf174f7108b477e81e7ec58f19c4 Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Mon, 14 Mar 2022 10:42:07 +0800
Subject: [PATCH] [AutoFdo] Fix memory leaks in autofdo and autoprefetch
Fix memory leaks in autofdo and autoprefetch.
---
gcc/final.c | 23 +++++++++++++++--------
gcc/tree-ssa-loop-prefetch.c | 4 ++++
2 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/gcc/final.c b/gcc/final.c
index b9affd3a7..da8d20958 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -4770,12 +4770,16 @@ get_fdo_count_quality (profile_count count)
return profile_quality[count.quality ()];
}
-static const char *
+/* If the function is not public, return the function_name/file_name for
+ disambiguation of local symbols since there could be identical function
+ names coming from identical file names. The caller needs to free memory. */
+
+static char *
alias_local_functions (const char *fnname)
{
if (TREE_PUBLIC (cfun->decl))
{
- return fnname;
+ return concat (fnname, NULL);
}
return concat (fnname, "/", lbasename (dump_base_name), NULL);
@@ -4826,12 +4830,14 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
if (callee)
{
+ char *func_name =
+ alias_local_functions (get_fnname_from_decl (callee));
fprintf (asm_out_file, "\t.string \"%x\"\n",
INSN_ADDRESSES (INSN_UID (insn)));
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
ASM_FDO_CALLEE_FLAG,
- alias_local_functions (get_fnname_from_decl (callee)));
+ func_name);
fprintf (asm_out_file,
"\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
@@ -4841,9 +4847,9 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
{
fprintf (dump_file, "call: %x --> %s\n",
INSN_ADDRESSES (INSN_UID (insn)),
- alias_local_functions
- (get_fnname_from_decl (callee)));
+ func_name);
}
+ free (func_name);
}
}
}
@@ -4917,8 +4923,9 @@ dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
static void
dump_function_info_to_asm (const char *fnname)
{
+ char *func_name = alias_local_functions (fnname);
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
- ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+ ASM_FDO_CALLER_FLAG, func_name);
fprintf (asm_out_file, "\t.string \"%s%d\"\n",
ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
@@ -4926,8 +4933,7 @@ dump_function_info_to_asm (const char *fnname)
if (dump_file)
{
- fprintf (dump_file, "\n FUNC_NAME: %s\n",
- alias_local_functions (fnname));
+ fprintf (dump_file, "\n FUNC_NAME: %s\n", func_name);
fprintf (dump_file, " file: %s\n",
dump_base_name);
fprintf (dump_file, " profile_status: %s\n",
@@ -4937,6 +4943,7 @@ dump_function_info_to_asm (const char *fnname)
fprintf (dump_file, " function_bind: %s\n",
simple_get_function_bind ());
}
+ free (func_name);
}
/* Dump function profile info form AutoFDO or PGO to asm. */
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 0d992d8f6..781831c39 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -2248,6 +2248,7 @@ get_bb_branch_prob (hash_map <basic_block, bb_bp> &bb_branch_prob,
branch_prob.true_edge_prob = get_edge_prob (e);
}
}
+ free (body);
}
/* Traverse each bb in the loop and prune fake loops. */
@@ -2416,6 +2417,7 @@ estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
if (get_bb_prob (bb_branch_prob, loop) == false)
{
dump_loop_bb (loop);
+ free (body);
return 0;
}
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2596,6 +2598,7 @@ is_high_exec_rate_loop (struct loop *loop)
if (loop_exec_rate < (float) LOOP_EXECUTION_RATE / 100.0)
{
+ exit_edges.release ();
return false;
}
}
@@ -2606,6 +2609,7 @@ is_high_exec_rate_loop (struct loop *loop)
loop_exec_rate, (float) LOOP_EXECUTION_RATE / 100.0);
dump_loop_bb (loop);
}
+ exit_edges.release ();
return true;
}
--
2.27.0.windows.1

View File

@ -1,72 +0,0 @@
From 2969f5190561e26a8ce42d5dcda43ef59e0b6d32 Mon Sep 17 00:00:00 2001
From: liyancheng <412998149@qq.com>
Date: Tue, 26 Apr 2022 19:59:09 +0800
Subject: [PATCH] [Backport] sanitizer: Fix asan against glibc 2.34 [PR100114]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d9f462fb372fb02da032cefd6b091d7582c425ae
sanitizer: Fix asan against glibc 2.34 [PR100114]
As mentioned in the PR, SIGSTKSZ is no longer a compile time constant in
glibc 2.34 and later, so
static const uptr kAltStackSize = SIGSTKSZ * 4;
needs dynamic initialization, but is used by a function called indirectly
from .preinit_array and therefore before the variable is constructed.
This results in using 0 size instead and all asan instrumented programs
die with:
==91==ERROR: AddressSanitizer failed to allocate 0x0 (0) bytes of SetAlternateSignalStack (error code: 22)
Here is a cherry-pick from upstream to fix this.
2021-04-17 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/100114
* sanitizer_common/sanitizer_posix_libcdep.cpp: Cherry-pick
llvm-project revisions 82150606fb11d28813ae6da1101f5bda638165fe
and b93629dd335ffee2fc4b9b619bf86c3f9e6b0023.
---
.../sanitizer_common/sanitizer_posix_libcdep.cpp | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp
index 304b3a01a..ac88fbe07 100644
--- a/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -169,7 +169,11 @@ bool SupportsColoredOutput(fd_t fd) {
#if !SANITIZER_GO
// TODO(glider): different tools may require different altstack size.
-static const uptr kAltStackSize = SIGSTKSZ * 4; // SIGSTKSZ is not enough.
+static uptr GetAltStackSize() {
+ // SIGSTKSZ is not enough.
+ static const uptr kAltStackSize = SIGSTKSZ * 4;
+ return kAltStackSize;
+}
void SetAlternateSignalStack() {
stack_t altstack, oldstack;
@@ -180,10 +184,9 @@ void SetAlternateSignalStack() {
// TODO(glider): the mapped stack should have the MAP_STACK flag in the
// future. It is not required by man 2 sigaltstack now (they're using
// malloc()).
- void* base = MmapOrDie(kAltStackSize, __func__);
- altstack.ss_sp = (char*) base;
+ altstack.ss_size = GetAltStackSize();
+ altstack.ss_sp = (char *)MmapOrDie(altstack.ss_size, __func__);
altstack.ss_flags = 0;
- altstack.ss_size = kAltStackSize;
CHECK_EQ(0, sigaltstack(&altstack, nullptr));
}
@@ -191,7 +194,7 @@ void UnsetAlternateSignalStack() {
stack_t altstack, oldstack;
altstack.ss_sp = nullptr;
altstack.ss_flags = SS_DISABLE;
- altstack.ss_size = kAltStackSize; // Some sane value required on Darwin.
+ altstack.ss_size = GetAltStackSize(); // Some sane value required on Darwin.
CHECK_EQ(0, sigaltstack(&altstack, &oldstack));
UnmapOrDie(oldstack.ss_sp, oldstack.ss_size);
}
--
2.25.1

View File

@ -1,342 +0,0 @@
From cf0f086ec274d794a2a180047123920bf8a5224b Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Mon, 17 Jan 2022 21:03:47 +0800
Subject: [PATCH 01/12] [ccmp] Add another optimization opportunity for ccmp
instruction
Add flag -fccmp2.
Enables the use of the ccmp instruction by creating a new conflict
relationship for instances where temporary expressions replacement
cannot be effectively created.
---
gcc/ccmp.c | 33 ++++
gcc/ccmp.h | 1 +
gcc/common.opt | 4 +
gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++
gcc/tree-ssa-coalesce.c | 197 ++++++++++++++++++++++
5 files changed, 250 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
diff --git a/gcc/ccmp.c b/gcc/ccmp.c
index ca77375a9..8d2d73e52 100644
--- a/gcc/ccmp.c
+++ b/gcc/ccmp.c
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgexpand.h"
#include "ccmp.h"
#include "predict.h"
+#include "gimple-iterator.h"
/* Check whether T is a simple boolean variable or a SSA name
set by a comparison operator in the same basic block. */
@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
return false;
}
+/* Check whether bb is a potential conditional compare candidate. */
+bool
+check_ccmp_candidate (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ gimple *bb_last_stmt, *stmt;
+ tree op0, op1;
+
+ gsi = gsi_last_bb (bb);
+ bb_last_stmt = gsi_stmt (gsi);
+
+ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
+ {
+ op0 = gimple_cond_lhs (bb_last_stmt);
+ op1 = gimple_cond_rhs (bb_last_stmt);
+
+ if (TREE_CODE (op0) == SSA_NAME
+ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
+ && TREE_CODE (op1) == INTEGER_CST
+ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
+ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
+ {
+ stmt = SSA_NAME_DEF_STMT (op0);
+ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
+ {
+ return ccmp_candidate_p (stmt);
+ }
+ }
+ }
+ return false;
+}
+
/* Extract the comparison we want to do from the tree. */
void
get_compare_parts (tree t, int *up, rtx_code *rcode,
diff --git a/gcc/ccmp.h b/gcc/ccmp.h
index 199dd581d..ac862f0f6 100644
--- a/gcc/ccmp.h
+++ b/gcc/ccmp.h
@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see
#define GCC_CCMP_H
extern rtx expand_ccmp_expr (gimple *, machine_mode);
+extern bool check_ccmp_candidate (basic_block bb);
#endif /* GCC_CCMP_H */
diff --git a/gcc/common.opt b/gcc/common.opt
index 24834cf60..4dd566def 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1942,6 +1942,10 @@ fira-verbose=
Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
-fira-verbose=<number> Control IRA's level of diagnostic messages.
+fccmp2
+Common Report Var(flag_ccmp2) Init(0) Optimization
+Optimize potential ccmp instruction in complex scenarios.
+
fivopts
Common Report Var(flag_ivopts) Init(1) Optimization
Optimize induction variables on trees.
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
new file mode 100644
index 000000000..b509ba810
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
+
+int func (int a, int b, int c)
+{
+ while(1)
+ {
+ if(a-- == 0 || b >= c)
+ {
+ return 1;
+ }
+ }
+}
+
+/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c
index 0b0b1b18d..e0120a4a4 100644
--- a/gcc/tree-ssa-coalesce.c
+++ b/gcc/tree-ssa-coalesce.c
@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see
#include "explow.h"
#include "tree-dfa.h"
#include "stor-layout.h"
+#include "ccmp.h"
+#include "target.h"
+#include "tree-outof-ssa.h"
/* This set of routines implements a coalesce_list. This is an object which
is used to track pairs of ssa_names which are desirable to coalesce
@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
bitmap_clear (&ptr->live_base_var);
}
+/* Return true if gimple is a copy assignment. */
+
+static inline bool
+gimple_is_assign_copy_p (gimple *gs)
+{
+ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
+ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
+ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
+}
+
+#define MAX_CCMP_CONFLICT_NUM 5
+
+/* Clear high-cost conflict graphs. */
+
+static void
+remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
+{
+ unsigned x = 0;
+ int add_conflict_num = 0;
+ bitmap b;
+ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
+ {
+ if (b)
+ {
+ add_conflict_num++;
+ }
+ }
+ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
+ {
+ conflict_graph->conflicts.release ();
+ }
+}
+
+/* Adding a new conflict graph to the original graph. */
+
+static void
+process_add_graph (live_track *live, basic_block bb,
+ ssa_conflicts *conflict_graph)
+{
+ tree use, def;
+ ssa_op_iter iter;
+ gimple *first_visit_stmt = NULL;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ if (gimple_visited_p (gsi_stmt (gsi)))
+ {
+ first_visit_stmt = gsi_stmt (gsi);
+ break;
+ }
+ }
+ if (!first_visit_stmt)
+ return;
+
+ for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
+ {
+ continue;
+ }
+ if (gimple_is_assign_copy_p (stmt))
+ {
+ live_track_clear_var (live, gimple_assign_rhs1 (stmt));
+ }
+ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
+ {
+ live_track_process_def (live, def, conflict_graph);
+ }
+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+ {
+ live_track_process_use (live, use);
+ }
+ }
+}
+
+/* Build a conflict graph based on ccmp candidate. */
+
+static void
+add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
+ tree_live_info_p liveinfo, var_map map, basic_block bb)
+{
+ live_track *live;
+ tree use, def;
+ ssa_op_iter iter;
+ live = new_live_track (map);
+ live_track_init (live, live_on_exit (liveinfo, bb));
+
+ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
+ gcc_assert (gimple_cond_lhs (last_stmt));
+
+ auto_vec<tree> stack;
+ stack.safe_push (gimple_cond_lhs (last_stmt));
+ while (!stack.is_empty ())
+ {
+ tree op = stack.pop ();
+ gimple *op_stmt = SSA_NAME_DEF_STMT (op);
+ if (!op_stmt || gimple_bb (op_stmt) != bb
+ || !is_gimple_assign (op_stmt)
+ || !ssa_is_replaceable_p (op_stmt))
+ {
+ continue;
+ }
+ if (gimple_is_assign_copy_p (op_stmt))
+ {
+ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
+ }
+ gimple_set_visited (op_stmt, true);
+ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
+ {
+ live_track_process_def (live, def, conflict_graph);
+ }
+ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
+ {
+ stack.safe_push (use);
+ live_track_process_use (live, use);
+ }
+ }
+
+ process_add_graph (live, bb, conflict_graph);
+ delete_live_track (live);
+ remove_high_cost_graph_for_ccmp (conflict_graph);
+}
+
+/* Determine whether the ccmp conflict graph can be added.
+ i.e,
+
+ ;; basic block 3, loop depth 1
+ ;; pred: 2
+ ;; 3
+ # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
+ _7 = b_4 (D) >= c_5 (D);
+ _8 = ivtmp.5_10 == 0;
+ _9 = _7 | _8;
+ ivtmp.5_11 = ivtmp.5_10 - 1;
+ if (_9 != 0)
+ goto <bb 4>; [10.70%]
+ else
+ goto <bb 3>; [89.30%]
+
+ In the above loop, the expression will be replaced:
+
+ _7 replaced by b_4 (D) >= c_5 (D)
+ _8 replaced by ivtmp.5_10 == 0
+
+ If the current case want use the ccmp instruction, then
+
+ _9 can replaced by _7 | _8
+
+ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
+ partitions.
+
+ Now this function can achieve this ability. */
+
+static void
+determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
+ var_map map, ssa_conflicts *graph)
+{
+ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
+ return;
+ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
+ gsi_next (&bsi))
+ {
+ gimple_set_visited (gsi_stmt (bsi), false);
+ }
+ ssa_conflicts *ccmp_conflict_graph;
+ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
+ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
+ unsigned x;
+ bitmap b;
+ if (ccmp_conflict_graph)
+ {
+ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
+ {
+ if (!b)
+ continue;
+ unsigned y = bitmap_first_set_bit (b);
+ if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
+ {
+ ssa_conflicts_add (graph, x, y);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "potential ccmp: add additional "
+ "conflict-ssa : bb[%d] %d:%d\n",
+ bb->index, x, y);
+ }
+ }
+ }
+ }
+ ssa_conflicts_delete (ccmp_conflict_graph);
+}
/* Build a conflict graph based on LIVEINFO. Any partitions which are in the
partition view of the var_map liveinfo is based on get entries in the
@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
live_track_process_use (live, var);
}
+ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
+
/* If result of a PHI is unused, looping over the statements will not
record any conflicts since the def was never live. Since the PHI node
is going to be translated out of SSA form, it will insert a copy.
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,83 +0,0 @@
From 897d637aec3b077eb9ef95b2f4a5f7656e36ebd6 Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Wed, 15 Jun 2022 11:33:03 +0800
Subject: [PATCH 03/12] [Backport] loop-invariant: Don't move cold bb
instructions to preheader in RTL
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc1969dab392661cdac1170bbb8c9f83f388580d
When inner loop is unlikely to execute, loop invariant motion would move
cold instrcutions to a hotter loop. This patch adds profile count checking
to fix the problem.
---
gcc/loop-invariant.c | 17 ++++++++++++++---
gcc/testsuite/gcc.dg/loop-invariant-2.c | 20 ++++++++++++++++++++
2 files changed, 34 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/loop-invariant-2.c
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index 37ae6549e..24b9bcb11 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -1184,9 +1184,21 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
call. */
static void
-find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
+find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
+ bool always_executed)
{
rtx_insn *insn;
+ basic_block preheader = loop_preheader_edge (loop)->src;
+
+ /* Don't move insn of cold BB out of loop to preheader to reduce calculations
+ and register live range in hot loop with cold BB. */
+ if (!always_executed && preheader->count > bb->count)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n",
+ bb->index, loop->num);
+ return;
+ }
FOR_BB_INSNS (bb, insn)
{
@@ -1215,8 +1227,7 @@ find_invariants_body (class loop *loop, basic_block *body,
unsigned i;
for (i = 0; i < loop->num_nodes; i++)
- find_invariants_bb (body[i],
- bitmap_bit_p (always_reached, i),
+ find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
bitmap_bit_p (always_executed, i));
}
diff --git a/gcc/testsuite/gcc.dg/loop-invariant-2.c b/gcc/testsuite/gcc.dg/loop-invariant-2.c
new file mode 100644
index 000000000..df3d84585
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-invariant-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
+
+volatile int x;
+void
+bar (int, char *, char *);
+void
+foo (int *a, int n, int k)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ if (__builtin_expect (x, 0))
+ bar (k / 5, "one", "two");
+ a[i] = k;
+ }
+}
+
+/* { dg-final { scan-rtl-dump "Don't move invariant from bb: .*out of loop" "loop2_invariant" } } */
--
2.27.0.windows.1

View File

@ -1,902 +0,0 @@
From edd4200e2b3e94d5c124900657b91c22dfe9c557 Mon Sep 17 00:00:00 2001
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
Date: Wed, 15 Jun 2022 16:00:25 +0800
Subject: [PATCH 04/12] [DFE] Add Dead Field Elimination in Struct-Reorg.
We can transform gimple to eliminate fields that are never read
and remove their redundant stmts.
Also we adapted the partial escape_cast_another_ptr for struct relayout.
Add flag -fipa-struct-reorg=3 to enable dead field elimination.
---
gcc/common.opt | 4 +-
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 209 ++++++++++++++++--
gcc/ipa-struct-reorg/ipa-struct-reorg.h | 9 +-
gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 86 +++++++
.../gcc.dg/struct/dfe_ele_minus_verify.c | 60 +++++
.../gcc.dg/struct/dfe_mem_ref_offset.c | 58 +++++
.../struct/dfe_mul_layer_ptr_record_bug.c | 30 +++
gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 71 ++++++
.../gcc.dg/struct/dfe_ptr_negate_expr.c | 55 +++++
gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 55 +++++
gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 21 +-
11 files changed, 639 insertions(+), 19 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
diff --git a/gcc/common.opt b/gcc/common.opt
index 7fc075d35..b5ea3c7a1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1884,8 +1884,8 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
Perform structure layout optimizations.
fipa-struct-reorg=
-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 2)
--fipa-struct-reorg=[0,1,2] adding none, struct-reorg, reorder-fields optimizations.
+Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3)
+-fipa-struct-reorg=[0,1,2,3] adding none, struct-reorg, reorder-fields, dfe optimizations.
fipa-extend-auto-profile
Common Report Var(flag_ipa_extend_auto_profile)
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 9214ee74a..2fa560239 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pretty-print.h"
#include "gimple-pretty-print.h"
#include "gimple-iterator.h"
+#include "gimple-walk.h"
#include "cfg.h"
#include "ssa.h"
#include "tree-dfa.h"
@@ -238,11 +239,44 @@ enum srmode
STRUCT_LAYOUT_OPTIMIZE
};
+/* Enum the struct layout optimize level,
+ which should be the same as the option -fstruct-reorg=. */
+
+enum struct_layout_opt_level
+{
+ NONE = 0,
+ STRUCT_REORG,
+ STRUCT_REORDER_FIELDS,
+ DEAD_FIELD_ELIMINATION
+};
+
static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
bool isptrptr (tree type);
srmode current_mode;
+hash_map<tree, tree> replace_type_map;
+
+/* Return true if one of these types is created by struct-reorg. */
+
+static bool
+is_replace_type (tree type1, tree type2)
+{
+ if (replace_type_map.is_empty ())
+ return false;
+ if (type1 == NULL_TREE || type2 == NULL_TREE)
+ return false;
+ tree *type_value = replace_type_map.get (type1);
+ if (type_value)
+ if (types_compatible_p (*type_value, type2))
+ return true;
+ type_value = replace_type_map.get (type2);
+ if (type_value)
+ if (types_compatible_p (*type_value, type1))
+ return true;
+ return false;
+}
+
} // anon namespace
namespace struct_reorg {
@@ -318,12 +352,13 @@ srfunction::simple_dump (FILE *file)
/* Constructor of FIELD. */
srfield::srfield (tree field, srtype *base)
- : offset(int_byte_position (field)),
+ : offset (int_byte_position (field)),
fieldtype (TREE_TYPE (field)),
fielddecl (field),
- base(base),
- type(NULL),
- clusternum(0)
+ base (base),
+ type (NULL),
+ clusternum (0),
+ field_access (EMPTY_FIELD)
{
for(int i = 0;i < max_split; i++)
newfield[i] = NULL_TREE;
@@ -362,6 +397,25 @@ srtype::srtype (tree type)
}
}
+/* Check it if all fields in the RECORD_TYPE are referenced. */
+
+bool
+srtype::has_dead_field (void)
+{
+ bool may_dfe = false;
+ srfield *this_field;
+ unsigned i;
+ FOR_EACH_VEC_ELT (fields, i, this_field)
+ {
+ if (!(this_field->field_access & READ_FIELD))
+ {
+ may_dfe = true;
+ break;
+ }
+ }
+ return may_dfe;
+}
+
/* Mark the type as escaping type E at statement STMT. */
void
@@ -833,6 +887,10 @@ srtype::create_new_type (void)
for (unsigned i = 0; i < fields.length (); i++)
{
srfield *f = fields[i];
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
+ && !(f->field_access & READ_FIELD))
+ continue;
f->create_new_fields (newtype, newfields, newlast);
}
@@ -854,6 +912,16 @@ srtype::create_new_type (void)
warn_padded = save_warn_padded;
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ && replace_type_map.get (this->newtype[0]) == NULL)
+ replace_type_map.put (this->newtype[0], this->type);
+ if (dump_file)
+ {
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
+ && has_dead_field ())
+ fprintf (dump_file, "Dead field elimination.\n");
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Created %d types:\n", maxclusters);
@@ -1128,12 +1196,12 @@ csrtype::init_type_info (void)
/* Close enough to pad to improve performance.
33~63 should pad to 64 but 33~48 (first half) are too far away, and
- 65~127 should pad to 128 but 65~96 (first half) are too far away. */
+ 65~127 should pad to 128 but 65~80 (first half) are too far away. */
if (old_size > 48 && old_size < 64)
{
new_size = 64;
}
- if (old_size > 96 && old_size < 128)
+ if (old_size > 80 && old_size < 128)
{
new_size = 128;
}
@@ -1272,6 +1340,7 @@ public:
bool has_rewritten_type (srfunction*);
void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt);
unsigned execute_struct_relayout (void);
+ bool remove_dead_field_stmt (tree lhs);
};
struct ipa_struct_relayout
@@ -3206,6 +3275,90 @@ ipa_struct_reorg::find_vars (gimple *stmt)
}
}
+/* Update field_access in srfield. */
+
+static void
+update_field_access (tree record, tree field, unsigned access, void *data)
+{
+ srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (record);
+ if (this_srtype == NULL)
+ return;
+ srfield *this_srfield = this_srtype->find_field (int_byte_position (field));
+ if (this_srfield == NULL)
+ return;
+
+ this_srfield->field_access |= access;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "record field access %d:", access);
+ print_generic_expr (dump_file, record);
+ fprintf (dump_file, " field:");
+ print_generic_expr (dump_file, field);
+ fprintf (dump_file, "\n");
+ }
+ return;
+}
+
+/* A callback for walk_stmt_load_store_ops to visit store. */
+
+static bool
+find_field_p_store (gimple *, tree node, tree op, void *data)
+{
+ if (TREE_CODE (op) != COMPONENT_REF)
+ return false;
+ tree node_type = TREE_TYPE (node);
+ if (!handled_type (node_type))
+ return false;
+
+ update_field_access (node_type, TREE_OPERAND (op, 1), WRITE_FIELD, data);
+
+ return false;
+}
+
+/* A callback for walk_stmt_load_store_ops to visit load. */
+
+static bool
+find_field_p_load (gimple *, tree node, tree op, void *data)
+{
+ if (TREE_CODE (op) != COMPONENT_REF)
+ return false;
+ tree node_type = TREE_TYPE (node);
+ if (!handled_type (node_type))
+ return false;
+
+ update_field_access (node_type, TREE_OPERAND (op, 1), READ_FIELD, data);
+
+ return false;
+}
+
+/* Determine whether the stmt should be deleted. */
+
+bool
+ipa_struct_reorg::remove_dead_field_stmt (tree lhs)
+{
+ tree base = NULL_TREE;
+ bool indirect = false;
+ srtype *t = NULL;
+ srfield *f = NULL;
+ bool realpart = false;
+ bool imagpart = false;
+ bool address = false;
+ bool escape_from_base = false;
+ if (!get_type_field (lhs, base, indirect, t, f, realpart, imagpart,
+ address, escape_from_base))
+ return false;
+ if (t ==NULL)
+ return false;
+ if (t->newtype[0] == t->type)
+ return false;
+ if (f == NULL)
+ return false;
+ if (f->newfield[0] == NULL
+ && (f->field_access & WRITE_FIELD))
+ return true;
+ return false;
+}
+
/* Maybe record access of statement for further analaysis. */
void
@@ -3227,6 +3380,13 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt)
default:
break;
}
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION)
+ {
+ /* Look for loads and stores. */
+ walk_stmt_load_store_ops (stmt, this, find_field_p_load,
+ find_field_p_store);
+ }
}
/* Calculate the multiplier. */
@@ -3543,8 +3703,11 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple
}
else if (type != d->type)
{
- type->mark_escape (escape_cast_another_ptr, stmt);
- d->type->mark_escape (escape_cast_another_ptr, stmt);
+ if (!is_replace_type (d->type->type, type->type))
+ {
+ type->mark_escape (escape_cast_another_ptr, stmt);
+ d->type->mark_escape (escape_cast_another_ptr, stmt);
+ }
}
/* x_1 = y.x_nodes; void *x;
Directly mark the structure pointer type assigned
@@ -4131,8 +4294,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
}
/* If we have a non void* or a decl (which is hard to track),
then mark the type as escaping. */
- if (!VOID_POINTER_P (TREE_TYPE (newdecl))
- || DECL_P (newdecl))
+ if (replace_type_map.get (type->type) == NULL
+ && (!VOID_POINTER_P (TREE_TYPE (newdecl))
+ || DECL_P (newdecl)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -4142,7 +4306,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
print_generic_expr (dump_file, TREE_TYPE (newdecl));
fprintf (dump_file, "\n");
}
- type->mark_escape (escape_cast_another_ptr, stmt);
+ type->mark_escape (escape_cast_another_ptr, stmt);
return;
}
/* At this point there should only be unkown void* ssa names. */
@@ -4465,11 +4629,13 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
return;
}
+ if (!is_replace_type (t1->type, type->type))
+ {
+ if (t1)
+ t1->mark_escape (escape_cast_another_ptr, stmt);
- if (t1)
- t1->mark_escape (escape_cast_another_ptr, stmt);
-
- type->mark_escape (escape_cast_another_ptr, stmt);
+ type->mark_escape (escape_cast_another_ptr, stmt);
+ }
}
@@ -5722,6 +5888,19 @@ bool
ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
{
bool remove = false;
+
+ if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
+ && remove_dead_field_stmt (gimple_assign_lhs (stmt)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\n rewriting statement (remove): \n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ return true;
+ }
+
if (gimple_clobber_p (stmt))
{
tree lhs = gimple_assign_lhs (stmt);
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
index 54b0dc655..936c0fa6f 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
@@ -142,6 +142,7 @@ public:
bool create_new_type (void);
void analyze (void);
+ bool has_dead_field (void);
void mark_escape (escape_type, gimple *stmt);
bool has_escaped (void)
{
@@ -163,6 +164,12 @@ public:
}
};
+/* Bitflags used for determining if a field
+ is never accessed, read or written. */
+const unsigned EMPTY_FIELD = 0x0u;
+const unsigned READ_FIELD = 0x01u;
+const unsigned WRITE_FIELD = 0x02u;
+
struct srfield
{
unsigned HOST_WIDE_INT offset;
@@ -174,7 +181,7 @@ struct srfield
unsigned clusternum;
tree newfield[max_split];
-
+ unsigned field_access; /* FIELD_DECL -> bitflag (use for dfe). */
// Constructors
srfield (tree field, srtype *base);
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
new file mode 100644
index 000000000..4261d2352
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
@@ -0,0 +1,86 @@
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct node node_t;
+typedef struct node *node_p;
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+typedef struct network
+{
+ arc_p arcs;
+ arc_p sorted_arcs;
+ int x;
+ node_p nodes;
+ node_p stop_nodes;
+} network_t;
+
+struct node
+{
+ int64_t potential;
+ int orientation;
+ node_p child;
+ node_p pred;
+ node_p sibling;
+ node_p sibling_prev;
+ arc_p basic_arc;
+ arc_p firstout;
+ arc_p firstin;
+ arc_p arc_tmp;
+ int64_t flow;
+ int64_t depth;
+ int number;
+ int time;
+};
+
+struct arc
+{
+ int id;
+ int64_t cost;
+ node_p tail;
+ node_p head;
+ short ident;
+ arc_p nextout;
+ arc_p nextin;
+ int64_t flow;
+ int64_t org_cost;
+ network_t* net_add;
+};
+
+
+const int MAX = 100;
+
+/* let it escape_array, "Type is used in an array [not handled yet]". */
+network_t* net[2];
+arc_p stop_arcs = NULL;
+
+int
+main ()
+{
+ net[0] = (network_t*) calloc (1, sizeof(network_t));
+ net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
+ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
+
+ net[0]->arcs->id = 100;
+
+ for (unsigned i = 0; i < 3; i++)
+ {
+ net[0]->arcs->id = net[0]->arcs->id + 2;
+ stop_arcs->cost = net[0]->arcs->id / 2;
+ stop_arcs->net_add = net[0];
+ printf("stop_arcs->cost = %ld\n", stop_arcs->cost);
+ net[0]->arcs++;
+ stop_arcs++;
+ }
+
+ if( net[1] != 0 && stop_arcs != 0)
+ {
+ return -1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
new file mode 100644
index 000000000..42d38c63a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
@@ -0,0 +1,60 @@
+// verify newarc[cmp-1].flow
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct node node_t;
+typedef struct node *node_p;
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+struct node
+{
+ int64_t potential;
+ int orientation;
+ node_p child;
+ node_p pred;
+ node_p sibling;
+ node_p sibling_prev;
+ arc_p basic_arc;
+ arc_p firstout;
+ arc_p firstin;
+ arc_p arc_tmp;
+ int64_t flow;
+ int64_t depth;
+ int number;
+ int time;
+};
+
+struct arc
+{
+ int id;
+ int64_t cost;
+ node_p tail;
+ node_p head;
+ short ident;
+ arc_p nextout;
+ arc_p nextin;
+ int64_t flow;
+ int64_t org_cost;
+};
+
+const int MAX = 100;
+arc_p ap = NULL;
+
+int
+main ()
+{
+ ap = (arc_p) calloc(MAX, sizeof(arc_t));
+ printf("%d\n", ap[0].id);
+ for (int i = 1; i < MAX; i++)
+ {
+ ap[i-1].id = 500;
+ }
+ printf("%d\n", ap[0].id);
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
new file mode 100644
index 000000000..53583fe82
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
@@ -0,0 +1,58 @@
+/* Supports the MEM_REF offset.
+ _1 = MEM[(struct arc *)ap_4 + 72B].flow;
+ Old rewrite_1 = ap.reorder.0_8->flow;
+ New rewrite_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow. */
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct node node_t;
+typedef struct node *node_p;
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+struct node
+{
+ int64_t potential;
+ int orientation;
+ node_p child;
+ node_p pred;
+ node_p sibling;
+ node_p sibling_prev;
+ arc_p basic_arc;
+ arc_p firstout;
+ arc_p firstin;
+ arc_p arc_tmp;
+ int64_t flow;
+ int64_t depth;
+ int number;
+ int time;
+};
+
+struct arc
+{
+ int id;
+ int64_t cost;
+ node_p tail;
+ node_p head;
+ short ident;
+ arc_p nextout;
+ arc_p nextin;
+ int64_t flow;
+ int64_t org_cost;
+};
+
+int
+main ()
+{
+ const int MAX = 100;
+ /* A similar scenario can be reproduced only by using local variables. */
+ arc_p ap = NULL;
+ ap = (arc_p) calloc(MAX, sizeof(arc_t));
+ printf("%d\n", ap[1].flow);
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
new file mode 100644
index 000000000..fd675ec2e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct T_HASH_ENTRY
+{
+ unsigned int hash;
+ unsigned int klen;
+ char *key;
+} iHashEntry;
+
+typedef struct T_HASH
+{
+ unsigned int size;
+ unsigned int fill;
+ unsigned int keys;
+
+ iHashEntry **array;
+} uHash;
+
+uHash *retval;
+
+int
+main() {
+ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
new file mode 100644
index 000000000..600e7908b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
@@ -0,0 +1,71 @@
+// support POINTER_DIFF_EXPR & NOP_EXPR to avoid
+// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt"
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct node node_t;
+typedef struct node *node_p;
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+typedef struct network
+{
+ arc_p arcs;
+ arc_p sorted_arcs;
+ int x;
+ node_p nodes;
+ node_p stop_nodes;
+} network_t;
+
+struct node
+{
+ int64_t potential;
+ int orientation;
+ node_p child;
+ node_p pred;
+ node_p sibling;
+ node_p sibling_prev;
+ arc_p basic_arc;
+ arc_p firstout;
+ arc_p firstin;
+ arc_p arc_tmp;
+ int64_t flow;
+ int64_t depth;
+ int number;
+ int time;
+};
+
+struct arc
+{
+ int id;
+ int64_t cost;
+ node_p tail;
+ node_p head;
+ short ident;
+ arc_p nextout;
+ arc_p nextin;
+ int64_t flow;
+ int64_t org_cost;
+};
+
+int
+main ()
+{
+ arc_t *old_arcs;
+ node_t *node;
+ node_t *stop;
+ size_t off;
+ network_t* net;
+
+ for( ; node->number < stop->number; node++ )
+ {
+ off = node->basic_arc - old_arcs;
+ node->basic_arc = (arc_t *)(net->arcs + off);
+ }
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
new file mode 100644
index 000000000..f411364a7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
@@ -0,0 +1,55 @@
+// support NEGATE_EXPR rewriting
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct node node_t;
+typedef struct node *node_p;
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+struct node
+{
+ int64_t potential;
+ int orientation;
+ node_p child;
+ node_p pred;
+ node_p sibling;
+ node_p sibling_prev;
+ arc_p basic_arc;
+ arc_p firstout;
+ arc_p firstin;
+ arc_p arc_tmp;
+ int64_t flow;
+ int64_t depth;
+ int number;
+ int time;
+};
+
+struct arc
+{
+ int id;
+ int64_t cost;
+ node_p tail;
+ node_p head;
+ short ident;
+ arc_p nextout;
+ arc_p nextin;
+ int64_t flow;
+ int64_t org_cost;
+};
+
+int
+main ()
+{
+ int64_t susp = 0;
+ const int MAX = 100;
+ arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t));
+ ap -= susp;
+ printf("%d\n", ap[1].flow);
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
new file mode 100644
index 000000000..a4e723763
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
@@ -0,0 +1,55 @@
+// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]";
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct node node_t;
+typedef struct node *node_p;
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+struct node
+{
+ int64_t potential;
+ int orientation;
+ node_p child;
+ node_p pred;
+ node_p sibling;
+ node_p sibling_prev;
+ arc_p basic_arc;
+ arc_p firstout;
+ arc_p firstin;
+ arc_p arc_tmp;
+ int64_t flow;
+ int64_t depth;
+ int number;
+ int time;
+};
+
+struct arc
+{
+ int id;
+ int64_t cost;
+ node_p tail;
+ node_p head;
+ short ident;
+ arc_p nextout;
+ arc_p nextin;
+ int64_t flow;
+ int64_t org_cost;
+};
+
+const int MAX = 100;
+arc_t **ap = NULL;
+
+int
+main ()
+{
+ ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
+ (*ap)[0].id = 300;
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
index 67b3ac2d5..ac5585813 100644
--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
@@ -64,8 +64,27 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout
"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
# -fipa-struct-reorg=2
-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
+ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+
+# -fipa-struct-reorg=3
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \
+ "" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
+
# All done.
torture-finish
dg-finish
--
2.27.0.windows.1

View File

@ -1,143 +0,0 @@
From d8753de2129d230afc9a887d5804747c69824a68 Mon Sep 17 00:00:00 2001
From: zhaowenyu <804544223@qq.com>
Date: Mon, 20 Jun 2022 11:24:45 +0800
Subject: [PATCH 05/12] [Backport] ipa-sra: Fix thinko when overriding
safe_to_import_accesses (PR 101066)
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5aa28c8cf15cd254cc5a3a12278133b93b8b017f
ipa-sra: Fix thinko when overriding safe_to_import_accesses (PR 101066)
The "new" IPA-SRA has a more difficult job than the previous
not-truly-IPA version when identifying situations in which a parameter
passed by reference can be passed into a third function and only thee
converted to one passed by value (and possibly "split" at the same
time).
In order to allow this, two conditions must be fulfilled. First the
call to the third function must happen before any modifications of
memory, because it could change the value passed by reference.
Second, in order to make sure we do not introduce new (invalid)
dereferences, the call must postdominate the entry BB.
The second condition is actually not necessary if the caller function
is also certain to dereference the pointer but the first one must
still hold. Unfortunately, the code making this overriding decision
also happen to trigger when the first condition is not fulfilled.
This is fixed in the following patch.
gcc/ChangeLog:
2021-06-16 Martin Jambor <mjambor@suse.cz>
(cherry picked from commit 763121ccd908f52bc666f277ea2cf42110b3aad9)
---
gcc/ipa-sra.c | 15 +++++++++++++--
gcc/testsuite/gcc.dg/ipa/pr101066.c | 20 ++++++++++++++++++++
2 files changed, 33 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/ipa/pr101066.c
diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c
index b706fceff..1cb30afc3 100644
--- a/gcc/ipa-sra.c
+++ b/gcc/ipa-sra.c
@@ -340,7 +340,7 @@ class isra_call_summary
public:
isra_call_summary ()
: m_arg_flow (), m_return_ignored (false), m_return_returned (false),
- m_bit_aligned_arg (false)
+ m_bit_aligned_arg (false), m_before_any_store (false)
{}
void init_inputs (unsigned arg_count);
@@ -359,6 +359,10 @@ public:
/* Set when any of the call arguments are not byte-aligned. */
unsigned m_bit_aligned_arg : 1;
+
+ /* Set to true if the call happend before any (other) store to memory in the
+ caller. */
+ unsigned m_before_any_store : 1;
};
/* Class to manage function summaries. */
@@ -472,6 +476,8 @@ isra_call_summary::dump (FILE *f)
fprintf (f, " return value ignored\n");
if (m_return_returned)
fprintf (f, " return value used only to compute caller return value\n");
+ if (m_before_any_store)
+ fprintf (f, " happens before any store to memory\n");
for (unsigned i = 0; i < m_arg_flow.length (); i++)
{
fprintf (f, " Parameter %u:\n", i);
@@ -516,6 +522,7 @@ ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
new_sum->m_return_ignored = old_sum->m_return_ignored;
new_sum->m_return_returned = old_sum->m_return_returned;
new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
+ new_sum->m_before_any_store = old_sum->m_before_any_store;
}
@@ -2355,6 +2362,7 @@ process_scan_results (cgraph_node *node, struct function *fun,
unsigned count = gimple_call_num_args (call_stmt);
isra_call_summary *csum = call_sums->get_create (cs);
csum->init_inputs (count);
+ csum->m_before_any_store = uses_memory_as_obtained;
for (unsigned argidx = 0; argidx < count; argidx++)
{
if (!csum->m_arg_flow[argidx].pointer_pass_through)
@@ -2601,6 +2609,7 @@ isra_write_edge_summary (output_block *ob, cgraph_edge *e)
bp_pack_value (&bp, csum->m_return_ignored, 1);
bp_pack_value (&bp, csum->m_return_returned, 1);
bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
+ bp_pack_value (&bp, csum->m_before_any_store, 1);
streamer_write_bitpack (&bp);
}
@@ -2719,6 +2728,7 @@ isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
csum->m_return_ignored = bp_unpack_value (&bp, 1);
csum->m_return_returned = bp_unpack_value (&bp, 1);
csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
+ csum->m_before_any_store = bp_unpack_value (&bp, 1);
}
/* Read intraprocedural analysis information about NODE and all of its outgoing
@@ -3475,7 +3485,8 @@ param_splitting_across_edge (cgraph_edge *cs)
}
else if (!ipf->safe_to_import_accesses)
{
- if (!all_callee_accesses_present_p (param_desc, arg_desc))
+ if (!csum->m_before_any_store
+ || !all_callee_accesses_present_p (param_desc, arg_desc))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " %u->%u: cannot import accesses.\n",
diff --git a/gcc/testsuite/gcc.dg/ipa/pr101066.c b/gcc/testsuite/gcc.dg/ipa/pr101066.c
new file mode 100644
index 000000000..1ceb6e431
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr101066.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-options "-Os -fno-ipa-cp -fno-inline" } */
+
+int a = 1, c, d, e;
+int *b = &a;
+static int g(int *h) {
+ c = *h;
+ return d;
+}
+static void f(int *h) {
+ e = *h;
+ *b = 0;
+ g(h);
+}
+int main() {
+ f(b);
+ if (c)
+ __builtin_abort();
+ return 0;
+}
--
2.27.0.windows.1

View File

@ -1,59 +0,0 @@
From ea059ab02ac79eba1c05d6e05cbb2590c47d7c1f Mon Sep 17 00:00:00 2001
From: zhaowenyu <804544223@qq.com>
Date: Thu, 23 Jun 2022 10:16:08 +0800
Subject: [PATCH 06/12] [Backport] ifcvt: Allow constants for
noce_convert_multiple.
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9b8eaa282250ad260e01d164093b597579db00d9
This lifts the restriction of not allowing constants for noce_convert_multiple.
The code later checks if a valid sequence is produced anyway.
gcc/ChangeLog:
* ifcvt.cc (noce_convert_multiple_sets): Allow constants.
(bb_ok_for_noce_convert_multiple_sets): Likewise.
---
gcc/ifcvt.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 977dd1bd4..2452f231c 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -3252,7 +3252,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI).
Wrap the two cmove operands into subregs if appropriate to prevent
that. */
- if (GET_MODE (new_val) != GET_MODE (temp))
+
+ if (!CONSTANT_P (new_val)
+ && GET_MODE (new_val) != GET_MODE (temp))
{
machine_mode src_mode = GET_MODE (new_val);
machine_mode dst_mode = GET_MODE (temp);
@@ -3263,7 +3265,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
}
new_val = lowpart_subreg (dst_mode, new_val, src_mode);
}
- if (GET_MODE (old_val) != GET_MODE (temp))
+ if (!CONSTANT_P (old_val)
+ && GET_MODE (old_val) != GET_MODE (temp))
{
machine_mode src_mode = GET_MODE (old_val);
machine_mode dst_mode = GET_MODE (temp);
@@ -3392,9 +3395,9 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
if (!REG_P (dest))
return false;
- if (!(REG_P (src)
- || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
- && subreg_lowpart_p (src))))
+ if (!((REG_P (src) || CONSTANT_P (src))
+ || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
+ && subreg_lowpart_p (src))))
return false;
/* Destination must be appropriate for a conditional write. */
--
2.27.0.windows.1

View File

@ -1,40 +0,0 @@
From beeb0fb50c7e40ee3d79044abc6408f760d6584a Mon Sep 17 00:00:00 2001
From: zhaowenyu <804544223@qq.com>
Date: Thu, 23 Jun 2022 10:40:46 +0800
Subject: [PATCH 07/12] [Backport] Register --sysroot in the driver switches
table
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=48e2d9b7b88dabed336cd098cd212d0e53c5125f
This change adjusts the processing of --sysroot to save the option in the internal "switches"
array, which lets self-specs test for it and provide a default value possibly dependent on
environment variables, as in
--with-specs=%{!-sysroot*:--sysroot=%:getenv("WIND_BASE" /target)}
2021-12-20 Olivier Hainque <hainque@adacore.com>
gcc/
* gcc.c (driver_handle_option): do_save --sysroot.
---
gcc/gcc.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/gcc/gcc.c b/gcc/gcc.c
index b55075b14..655beffcc 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -4190,7 +4190,9 @@ driver_handle_option (struct gcc_options *opts,
case OPT__sysroot_:
target_system_root = arg;
target_system_root_changed = 1;
- do_save = false;
+ /* Saving this option is useful to let self-specs decide to
+ provide a default one. */
+ do_save = true;
break;
case OPT_time_:
--
2.27.0.windows.1

View File

@ -1,665 +0,0 @@
From f8308a2b440efe124cd6ff59924f135e85e53888 Mon Sep 17 00:00:00 2001
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
Date: Sat, 18 Jun 2022 17:51:04 +0800
Subject: [PATCH 08/12] [DFE] Fix bugs
Fix bugs:
1. Fixed a bug in check replace type.
2. Use new to update field access for ref.
3. We now replace the dead fields in stmt by creating a new ssa.
4. The replaced type is no longer optimized in NORMAL mode.
Also we added 5 dejaGNU test cases.
---
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 77 ++++++---
gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 56 ++++++
gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 162 ++++++++++++++++++
gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 126 ++++++++++++++
.../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 58 +++++++
.../gcc.dg/struct/dfe_extr_ui_main.c | 61 +++++++
6 files changed, 516 insertions(+), 24 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 2fa560239..00dc4bf1d 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -252,6 +252,7 @@ enum struct_layout_opt_level
static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
bool isptrptr (tree type);
+void get_base (tree &base, tree expr);
srmode current_mode;
@@ -631,7 +632,15 @@ srtype::analyze (void)
into 2 different structures. In future we intend to add profile
info and/or static heuristics to differentiate splitting process. */
if (fields.length () == 2)
- fields[1]->clusternum = 1;
+ {
+ for (hash_map<tree, tree>::iterator it = replace_type_map.begin ();
+ it != replace_type_map.end (); ++it)
+ {
+ if (types_compatible_p ((*it).second, this->type))
+ return;
+ }
+ fields[1]->clusternum = 1;
+ }
/* Otherwise we do nothing. */
if (fields.length () >= 3)
@@ -3278,12 +3287,33 @@ ipa_struct_reorg::find_vars (gimple *stmt)
/* Update field_access in srfield. */
static void
-update_field_access (tree record, tree field, unsigned access, void *data)
+update_field_access (tree node, tree op, unsigned access, void *data)
{
- srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (record);
+ HOST_WIDE_INT offset = 0;
+ switch (TREE_CODE (op))
+ {
+ case COMPONENT_REF:
+ {
+ offset = int_byte_position (TREE_OPERAND (op, 1));
+ break;
+ }
+ case MEM_REF:
+ {
+ offset = tree_to_uhwi (TREE_OPERAND (op, 1));
+ break;
+ }
+ default:
+ return;
+ }
+ tree base = node;
+ get_base (base, node);
+ srdecl *this_srdecl = ((ipa_struct_reorg *)data)->find_decl (base);
+ if (this_srdecl == NULL)
+ return;
+ srtype *this_srtype = this_srdecl->type;
if (this_srtype == NULL)
return;
- srfield *this_srfield = this_srtype->find_field (int_byte_position (field));
+ srfield *this_srfield = this_srtype->find_field (offset);
if (this_srfield == NULL)
return;
@@ -3291,9 +3321,9 @@ update_field_access (tree record, tree field, unsigned access, void *data)
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "record field access %d:", access);
- print_generic_expr (dump_file, record);
+ print_generic_expr (dump_file, this_srtype->type);
fprintf (dump_file, " field:");
- print_generic_expr (dump_file, field);
+ print_generic_expr (dump_file, this_srfield->fielddecl);
fprintf (dump_file, "\n");
}
return;
@@ -3302,15 +3332,10 @@ update_field_access (tree record, tree field, unsigned access, void *data)
/* A callback for walk_stmt_load_store_ops to visit store. */
static bool
-find_field_p_store (gimple *, tree node, tree op, void *data)
+find_field_p_store (gimple *stmt ATTRIBUTE_UNUSED,
+ tree node, tree op, void *data)
{
- if (TREE_CODE (op) != COMPONENT_REF)
- return false;
- tree node_type = TREE_TYPE (node);
- if (!handled_type (node_type))
- return false;
-
- update_field_access (node_type, TREE_OPERAND (op, 1), WRITE_FIELD, data);
+ update_field_access (node, op, WRITE_FIELD, data);
return false;
}
@@ -3318,15 +3343,10 @@ find_field_p_store (gimple *, tree node, tree op, void *data)
/* A callback for walk_stmt_load_store_ops to visit load. */
static bool
-find_field_p_load (gimple *, tree node, tree op, void *data)
+find_field_p_load (gimple *stmt ATTRIBUTE_UNUSED,
+ tree node, tree op, void *data)
{
- if (TREE_CODE (op) != COMPONENT_REF)
- return false;
- tree node_type = TREE_TYPE (node);
- if (!handled_type (node_type))
- return false;
-
- update_field_access (node_type, TREE_OPERAND (op, 1), READ_FIELD, data);
+ update_field_access (node, op, READ_FIELD, data);
return false;
}
@@ -4629,7 +4649,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
return;
}
- if (!is_replace_type (t1->type, type->type))
+ if (!is_replace_type (inner_type (t), type->type))
{
if (t1)
t1->mark_escape (escape_cast_another_ptr, stmt);
@@ -5898,7 +5918,16 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
fprintf (dump_file, "\n rewriting statement (remove): \n");
print_gimple_stmt (dump_file, stmt, 0);
}
- return true;
+ /* Replace the dead field in stmt by creating a dummy ssa. */
+ tree dummy_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (stmt)));
+ gimple_assign_set_lhs (stmt, dummy_ssa);
+ update_stmt (stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "To: \n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ return false;
}
if (gimple_clobber_p (stmt))
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
new file mode 100644
index 000000000..13a226ee8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
@@ -0,0 +1,56 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+typedef struct TYPE_4__ TYPE_2__;
+typedef struct TYPE_3__ TYPE_1__;
+
+typedef int uint8_t;
+typedef int uint16_t;
+
+struct TYPE_4__
+{
+ size_t cpu_id;
+};
+
+struct TYPE_3__
+{
+ int cpuc_dtrace_flags;
+};
+
+TYPE_2__ *CPU;
+volatile int CPU_DTRACE_FAULT;
+TYPE_1__ *cpu_core;
+scalar_t__ dtrace_load8 (uintptr_t);
+
+__attribute__((used)) static int
+dtrace_bcmp (const void *s1, const void *s2, size_t len)
+{
+ volatile uint16_t *flags;
+ flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
+ if (s1 == s2)
+ return (0);
+ if (s1 == NULL || s2 == NULL)
+ return (1);
+ if (s1 != s2 && len != 0)
+ {
+ const uint8_t *ps1 = s1;
+ const uint8_t *ps2 = s2;
+ do
+ {
+ if (dtrace_load8 ((uintptr_t)ps1++) != *ps2++)
+ return (1);
+ }
+ while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
+ }
+ return (0);
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
new file mode 100644
index 000000000..1fff2cb9d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
@@ -0,0 +1,162 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+struct mrb_context
+{
+ size_t stack;
+ size_t stbase;
+ size_t stend;
+ size_t eidx;
+ int *ci;
+ int *cibase;
+ int status;
+};
+
+struct RObject
+{
+ int dummy;
+};
+
+struct RHash
+{
+ int dummy;
+};
+
+struct RFiber
+{
+ struct mrb_context *cxt;
+};
+
+struct RClass
+{
+ int dummy;
+};
+
+struct RBasic
+{
+ int tt;
+};
+
+struct RArray
+{
+ int dummy;
+};
+
+typedef int mrb_state;
+typedef int mrb_gc;
+typedef int mrb_callinfo;
+size_t ARY_LEN (struct RArray *);
+size_t MRB_ENV_STACK_LEN (struct RBasic *);
+int MRB_FIBER_TERMINATED;
+
+#define MRB_TT_ARRAY 140
+#define MRB_TT_CLASS 139
+#define MRB_TT_DATA 138
+#define MRB_TT_ENV 137
+#define MRB_TT_EXCEPTION 136
+#define MRB_TT_FIBER 135
+#define MRB_TT_HASH 134
+#define MRB_TT_ICLASS 133
+#define MRB_TT_MODULE 132
+#define MRB_TT_OBJECT 131
+#define MRB_TT_PROC 130
+#define MRB_TT_RANGE 129
+#define MRB_TT_SCLASS 128
+
+size_t ci_nregs (int *);
+int gc_mark_children (int *, int *, struct RBasic *);
+size_t mrb_gc_mark_hash_size (int *, struct RHash *);
+size_t mrb_gc_mark_iv_size (int *, struct RObject *);
+size_t mrb_gc_mark_mt_size (int *, struct RClass *);
+
+__attribute__((used)) static size_t
+gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj)
+{
+ size_t children = 0;
+ gc_mark_children (mrb, gc, obj);
+ switch (obj->tt)
+ {
+ case MRB_TT_ICLASS:
+ children++;
+ break;
+
+ case MRB_TT_CLASS:
+ case MRB_TT_SCLASS:
+ case MRB_TT_MODULE:
+ {
+ struct RClass *c = (struct RClass *)obj;
+ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
+ children += mrb_gc_mark_mt_size (mrb, c);
+ children ++;
+ }
+ break;
+
+ case MRB_TT_OBJECT:
+ case MRB_TT_DATA:
+ case MRB_TT_EXCEPTION:
+ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
+ break;
+
+ case MRB_TT_ENV:
+ children += MRB_ENV_STACK_LEN (obj);
+ break;
+
+ case MRB_TT_FIBER:
+ {
+ struct mrb_context *c = ((struct RFiber *)obj)->cxt;
+ size_t i;
+ mrb_callinfo *ci;
+ if (!c || c->status == MRB_FIBER_TERMINATED)
+ break;
+
+ i = c->stack - c->stbase;
+ if (c->ci)
+ {
+ i += ci_nregs (c->ci);
+ }
+ if (c->stbase + i > c->stend)
+ i = c->stend - c->stbase;
+
+ children += i;
+ children += c->eidx;
+ if (c->cibase)
+ {
+ for (i = 0, ci = c->cibase; ci <= c->ci; i++, ci++)
+ ;
+ }
+ children += i;
+ }
+ break;
+
+ case MRB_TT_ARRAY:
+ {
+ struct RArray *a = (struct RArray *)obj;
+ children += ARY_LEN (a);
+ }
+ break;
+
+ case MRB_TT_HASH:
+ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
+ children += mrb_gc_mark_hash_size (mrb, (struct RHash *)obj);
+ break;
+
+ case MRB_TT_PROC:
+ case MRB_TT_RANGE:
+ children += 2;
+ break;
+ default:
+ break;
+ }
+
+ return children;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
new file mode 100644
index 000000000..0f577667c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
@@ -0,0 +1,126 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+typedef struct TYPE_6__ TYPE_3__;
+typedef struct TYPE_5__ TYPE_2__;
+typedef struct TYPE_4__ TYPE_1__;
+
+struct io_accel2_cmd
+{
+ int dummy;
+};
+
+struct hpsa_tmf_struct
+{
+ int it_nexus;
+};
+
+struct hpsa_scsi_dev_t
+{
+ int nphysical_disks;
+ int ioaccel_handle;
+ struct hpsa_scsi_dev_t **phys_disk;
+};
+
+struct ctlr_info
+{
+ TYPE_3__ *pdev;
+ struct io_accel2_cmd *ioaccel2_cmd_pool;
+};
+struct TYPE_4__
+{
+ int LunAddrBytes;
+};
+
+struct TYPE_5__
+{
+ TYPE_1__ LUN;
+};
+
+struct CommandList
+{
+ size_t cmdindex;
+ int cmd_type;
+ struct hpsa_scsi_dev_t *phys_disk;
+ TYPE_2__ Header;
+};
+
+struct TYPE_6__
+{
+ int dev;
+};
+
+int BUG ();
+#define CMD_IOACCEL1 132
+#define CMD_IOACCEL2 131
+#define CMD_IOCTL_PEND 130
+#define CMD_SCSI 129
+#define IOACCEL2_TMF 128
+int dev_err (int *, char *, int);
+scalar_t__ hpsa_is_cmd_idle (struct CommandList *);
+int le32_to_cpu (int);
+int test_memcmp (unsigned char *, int *, int);
+
+__attribute__((used)) static bool
+hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c,
+ struct hpsa_scsi_dev_t *dev, unsigned char *scsi3addr)
+{
+ int i;
+ bool match = false;
+ struct io_accel2_cmd * c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
+ struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *)c2;
+
+ if (hpsa_is_cmd_idle (c))
+ return false;
+
+ switch (c->cmd_type)
+ {
+ case CMD_SCSI:
+ case CMD_IOCTL_PEND:
+ match = !test_memcmp (scsi3addr, &c->Header.LUN.LunAddrBytes,
+ sizeof (c->Header.LUN.LunAddrBytes));
+ break;
+
+ case CMD_IOACCEL1:
+ case CMD_IOACCEL2:
+ if (c->phys_disk == dev)
+ {
+ match = true;
+ }
+ else
+ {
+ for (i = 0; i < dev->nphysical_disks && !match; i++)
+ {
+ match = dev->phys_disk[i] == c->phys_disk;
+ }
+ }
+ break;
+
+ case IOACCEL2_TMF:
+ for (i = 0; i < dev->nphysical_disks && !match; i++)
+ {
+ match = dev->phys_disk[i]->ioaccel_handle ==
+ le32_to_cpu (ac->it_nexus);
+ }
+ break;
+
+ case 0:
+ match = false;
+ break;
+ default:
+ dev_err (&h->pdev->dev, "unexpected cmd_type: %d\n", c->cmd_type);
+ BUG ();
+ }
+
+ return match;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
new file mode 100644
index 000000000..5570c762e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
@@ -0,0 +1,58 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+struct tcpcb
+{
+ int t_state;
+};
+
+struct socket
+{
+ int dummy;
+};
+
+struct proc
+{
+ int dummy;
+};
+
+struct inpcb
+{
+ scalar_t__ inp_lport;
+};
+
+int COMMON_END (int);
+int COMMON_START ();
+int PRU_LISTEN;
+int TCPS_LISTEN;
+int in_pcbbind (struct inpcb *, int *, struct proc *);
+struct inpcb* sotoinpcb (struct socket *);
+
+__attribute__((used)) static void
+tcp_usr_listen (struct socket *so, struct proc *p)
+{
+ int error = 0;
+ struct inpcb *inp = sotoinpcb (so);
+ struct tcpcb *tp;
+
+ COMMON_START ();
+ if (inp->inp_lport == 0)
+ {
+ error = in_pcbbind (inp, NULL, p);
+ }
+ if (error == 0)
+ {
+ tp->t_state = TCPS_LISTEN;
+ }
+ COMMON_END (PRU_LISTEN);
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
new file mode 100644
index 000000000..50ab9cc24
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
@@ -0,0 +1,61 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+typedef struct TYPE_4__ TYPE_2__;
+typedef struct TYPE_3__ TYPE_1__;
+
+struct TYPE_4__
+{
+ size_t modCount;
+ TYPE_1__ *modList;
+};
+
+struct TYPE_3__
+{
+ void *modDescr;
+ void *modName;
+};
+
+size_t MAX_MODS;
+void *String_Alloc (char *);
+int test_strlen (char *);
+int trap_FD_GetFileList (char *, char *, char *, int);
+TYPE_2__ uiInfo;
+
+__attribute__((used)) static void
+UI_LoadMods ()
+{
+ int numdirs;
+ char dirlist[2048];
+ char *dirptr;
+ char *descptr;
+ int i;
+ int dirlen;
+
+ uiInfo.modCount = 0;
+ numdirs = trap_FD_GetFileList ("$modelist", "", dirlist, sizeof (dirlist));
+ dirptr = dirlist;
+ for (i = 0; i < numdirs; i++)
+ {
+ dirlen = test_strlen (dirptr) + 1;
+ descptr = dirptr + dirlen;
+ uiInfo.modList[uiInfo.modCount].modName = String_Alloc (dirptr);
+ uiInfo.modList[uiInfo.modCount].modDescr = String_Alloc (descptr);
+ dirptr += dirlen + test_strlen (descptr) + 1;
+ uiInfo.modCount++;
+ if (uiInfo.modCount >= MAX_MODS)
+ {
+ break;
+ }
+ }
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,379 +0,0 @@
From 8f51c8c83355cb1b69553e582fb512c6e37b71f5 Mon Sep 17 00:00:00 2001
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
Date: Thu, 18 Aug 2022 17:15:08 +0800
Subject: [PATCH] [DFE] Fix the bug caused by inconsistent types: 1. Update
some functions to fix the bug caused by inconsistent base and node types.
Also we added 3 dejaGNU test cases.
---
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 57 ++++++++-----
.../gcc.dg/struct/dfe_extr_board_init.c | 77 +++++++++++++++++
gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 77 +++++++++++++++++
.../gcc.dg/struct/dfe_extr_mv_udc_core.c | 82 +++++++++++++++++++
4 files changed, 273 insertions(+), 20 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 00dc4bf1d..8d3da3540 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -3284,33 +3284,31 @@ ipa_struct_reorg::find_vars (gimple *stmt)
}
}
-/* Update field_access in srfield. */
-
-static void
-update_field_access (tree node, tree op, unsigned access, void *data)
+static HOST_WIDE_INT
+get_offset (tree op, HOST_WIDE_INT offset)
{
- HOST_WIDE_INT offset = 0;
switch (TREE_CODE (op))
{
case COMPONENT_REF:
{
- offset = int_byte_position (TREE_OPERAND (op, 1));
- break;
+ return int_byte_position (TREE_OPERAND (op, 1));
}
case MEM_REF:
{
- offset = tree_to_uhwi (TREE_OPERAND (op, 1));
- break;
+ return tree_to_uhwi (TREE_OPERAND (op, 1));
}
default:
- return;
+ return offset;
}
- tree base = node;
- get_base (base, node);
- srdecl *this_srdecl = ((ipa_struct_reorg *)data)->find_decl (base);
- if (this_srdecl == NULL)
- return;
- srtype *this_srtype = this_srdecl->type;
+ return offset;
+}
+
+/* Record field access. */
+static void
+record_field_access (tree type, HOST_WIDE_INT offset,
+ unsigned access, void *data)
+{
+ srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (type);
if (this_srtype == NULL)
return;
srfield *this_srfield = this_srtype->find_field (offset);
@@ -3321,12 +3319,33 @@ update_field_access (tree node, tree op, unsigned access, void *data)
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "record field access %d:", access);
- print_generic_expr (dump_file, this_srtype->type);
+ print_generic_expr (dump_file, type);
fprintf (dump_file, " field:");
print_generic_expr (dump_file, this_srfield->fielddecl);
fprintf (dump_file, "\n");
}
return;
+
+}
+
+/* Update field_access in srfield. */
+
+static void
+update_field_access (tree node, tree op, unsigned access, void *data)
+{
+ HOST_WIDE_INT offset = 0;
+ offset = get_offset (op, offset);
+ tree node_type = inner_type (TREE_TYPE (node));
+ record_field_access (node_type, offset, access, data);
+ tree base = node;
+ get_base (base, node);
+ tree base_type = inner_type (TREE_TYPE (base));
+ if (!types_compatible_p (base_type, node_type))
+ {
+ record_field_access (base_type, get_offset (node, offset),
+ access, data);
+ }
+ return;
}
/* A callback for walk_stmt_load_store_ops to visit store. */
@@ -3373,8 +3392,7 @@ ipa_struct_reorg::remove_dead_field_stmt (tree lhs)
return false;
if (f == NULL)
return false;
- if (f->newfield[0] == NULL
- && (f->field_access & WRITE_FIELD))
+ if (f->newfield[0] == NULL)
return true;
return false;
}
@@ -5927,7 +5945,6 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
fprintf (dump_file, "To: \n");
print_gimple_stmt (dump_file, stmt, 0);
}
- return false;
}
if (gimple_clobber_p (stmt))
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
new file mode 100644
index 000000000..4e52564b6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
@@ -0,0 +1,77 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+typedef struct TYPE_5__ TYPE_2__;
+typedef struct TYPE_4__ TYPE_1__;
+
+struct TYPE_4__
+{
+ int Pin;
+ int Pull;
+ int Mode;
+ int Speed;
+};
+
+struct TYPE_5__
+{
+ int MEMRMP;
+};
+typedef TYPE_1__ GPIO_InitTypeDef;
+
+int BT_RST_PIN;
+int BT_RST_PORT;
+int CONN_POS10_PIN;
+int CONN_POS10_PORT;
+int GPIO_HIGH (int, int);
+int GPIO_MODE_INPUT;
+int GPIO_MODE_OUTPUT_PP;
+int GPIO_NOPULL;
+int GPIO_PULLUP;
+int GPIO_SPEED_FREQ_LOW;
+int HAL_GPIO_Init (int, TYPE_1__ *);
+scalar_t__ IS_GPIO_RESET (int, int);
+TYPE_2__ *SYSCFG;
+int __HAL_RCC_GPIOB_CLK_ENABLE ();
+int __HAL_RCC_GPIOC_CLK_ENABLE ();
+
+__attribute__((used)) static void
+LBF_DFU_If_Needed (void)
+{
+ GPIO_InitTypeDef GPIO_InitStruct;
+ __HAL_RCC_GPIOC_CLK_ENABLE ();
+ GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
+ GPIO_InitStruct.Pull = GPIO_NOPULL;
+ GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
+ GPIO_InitStruct.Pin = BT_RST_PIN;
+ HAL_GPIO_Init (BT_RST_PORT, &GPIO_InitStruct);
+
+ GPIO_HIGH (BT_RST_PORT, BT_RST_PIN);
+ __HAL_RCC_GPIOB_CLK_ENABLE ();
+ GPIO_InitStruct.Mode = GPIO_MODE_INPUT;
+ GPIO_InitStruct.Pull = GPIO_PULLUP;
+ GPIO_InitStruct.Pin = CONN_POS10_PIN;
+ HAL_GPIO_Init (CONN_POS10_PORT, &GPIO_InitStruct);
+
+ if (IS_GPIO_RESET (CONN_POS10_PORT, CONN_POS10_PIN))
+ {
+ SYSCFG->MEMRMP = 0x00000001;
+ asm (
+ "LDR R0, =0x000000\n\t"
+ "LDR SP, [R0, #0]\n\t"
+ );
+ asm (
+ "LDR R0, [R0, #0]\n\t"
+ "BX R0\n\t"
+ );
+ }
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
new file mode 100644
index 000000000..894e9f460
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
@@ -0,0 +1,77 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+typedef struct TYPE_2__ TYPE_1__;
+
+struct net_device
+{
+ struct claw_privbk* ml_priv;
+};
+struct clawctl
+{
+ int linkid;
+};
+struct claw_privbk
+{
+ int system_validate_comp;
+ TYPE_1__* p_env;
+ int ctl_bk;
+};
+typedef int __u8;
+struct TYPE_2__
+{
+ scalar_t__ packing;
+ int api_type;
+};
+
+int CLAW_DBF_TEXT (int, int, char*);
+int CONNECTION_REQUEST;
+int HOST_APPL_NAME;
+scalar_t__ PACKING_ASK;
+scalar_t__ PACK_SEND;
+int WS_APPL_NAME_IP_NAME;
+int WS_APPL_NAME_PACKED;
+int claw_send_control (struct net_device*, int, int, int, int, int, int);
+int setup;
+
+__attribute__((used)) static int
+claw_snd_conn_req (struct net_device *dev, __u8 link)
+{
+ int rc;
+ struct claw_privbk *privptr = dev->ml_priv;
+ struct clawctl *p_ctl;
+ CLAW_DBF_TEXT (2, setup, "snd_conn");
+ rc = 1;
+ p_ctl = (struct clawctl *)&privptr->ctl_bk;
+ p_ctl->linkid = link;
+ if (privptr->system_validate_comp == 0x00)
+ {
+ return rc;
+ }
+ if (privptr->p_env->packing == PACKING_ASK)
+ {
+ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
+ WS_APPL_NAME_PACKED, WS_APPL_NAME_PACKED);
+ }
+ if (privptr->p_env->packing == PACK_SEND)
+ {
+ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
+ WS_APPL_NAME_IP_NAME, WS_APPL_NAME_IP_NAME);
+ }
+ if (privptr->p_env->packing == 0)
+ {
+ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
+ HOST_APPL_NAME, privptr->p_env->api_type);
+ }
+ return rc;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
new file mode 100644
index 000000000..9801f87f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
@@ -0,0 +1,82 @@
+/* { dg-do compile} */
+
+#define NULL ((void*)0)
+typedef unsigned long size_t;
+typedef long intptr_t;
+typedef unsigned long uintptr_t;
+typedef long scalar_t__;
+typedef int bool;
+#define false 0
+#define true 1
+
+typedef struct TYPE_4__ TYPE_2__;
+typedef struct TYPE_3__ TYPE_1__;
+typedef int u32;
+
+struct mv_udc
+{
+ TYPE_2__ *op_regs;
+ TYPE_1__ *ep_dqh;
+ struct mv_ep *eps;
+};
+
+struct mv_ep
+{
+ TYPE_1__ *dqh;
+ struct mv_udc *udc;
+};
+
+struct TYPE_4__
+{
+ int *epctrlx;
+};
+
+struct TYPE_3__
+{
+ int max_packet_length;
+ int next_dtd_ptr;
+};
+
+int EP0_MAX_PKT_SIZE;
+int EPCTRL_RX_ENABLE;
+int EPCTRL_RX_EP_TYPE_SHIFT;
+int EPCTRL_TX_ENABLE;
+int EPCTRL_TX_EP_TYPE_SHIFT;
+int EP_QUEUE_HEAD_IOS;
+int EP_QUEUE_HEAD_MAX_PKT_LEN_POS;
+int EP_QUEUE_HEAD_NEXT_TERMINATE;
+int USB_ENDPOINT_XFER_CONTROL;
+int readl (int *);
+int writel (int, int *);
+
+__attribute__((used)) static void
+ep0_reset (struct mv_udc *udc)
+{
+ struct mv_ep *ep;
+ u32 epctrlx;
+ int i = 0;
+ for (i = 0; i < 2; i++)
+ {
+ ep = &udc->eps[i];
+ ep->udc = udc;
+ ep->dqh = &udc->ep_dqh[i];
+ ep->dqh->max_packet_length =
+ (EP0_MAX_PKT_SIZE << EP_QUEUE_HEAD_MAX_PKT_LEN_POS)
+ | EP_QUEUE_HEAD_IOS;
+ ep->dqh->next_dtd_ptr = EP_QUEUE_HEAD_NEXT_TERMINATE;
+ epctrlx = readl (&udc->op_regs->epctrlx[0]);
+ if (i)
+ {
+ epctrlx |= EPCTRL_TX_ENABLE
+ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_TX_EP_TYPE_SHIFT);
+ }
+ else
+ {
+ epctrlx |= EPCTRL_RX_ENABLE
+ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_RX_EP_TYPE_SHIFT);
+ }
+ writel (epctrlx, &udc->op_regs->epctrlx[0]);
+ }
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
--
2.33.0

View File

@ -1,146 +0,0 @@
From b66a843505f32685f428c502f1a88e0f681b4acd Mon Sep 17 00:00:00 2001
From: eastb233 <xiezhiheng@huawei.com>
Date: Thu, 15 Sep 2022 17:57:00 +0800
Subject: [PATCH] [Struct Reorg] Type simplify limitation when in structure
optimizaiton
When enable structure optimization, we should not simplify
TYPE NODE. But now we unconditionally skip the simplification
under structure optimization regardless of whether it takes
effect. So add the same limitation as the optimization has.
---
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 72 ++++++++++++-------------
gcc/tree.c | 13 ++++-
2 files changed, 47 insertions(+), 38 deletions(-)
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 8d3da35400d..54c20ca3f33 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -104,6 +104,42 @@ along with GCC; see the file COPYING3. If not see
#define VOID_POINTER_P(type) (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
+/* Check whether in C language or LTO with only C language. */
+bool
+lang_c_p (void)
+{
+ const char *language_string = lang_hooks.name;
+
+ if (!language_string)
+ {
+ return false;
+ }
+
+ if (lang_GNU_C ())
+ {
+ return true;
+ }
+ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
+ {
+ unsigned i = 0;
+ tree t = NULL_TREE;
+
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
+ {
+ language_string = TRANSLATION_UNIT_LANGUAGE (t);
+ if (language_string == NULL
+ || strncmp (language_string, "GNU C", 5)
+ || (language_string[5] != '\0'
+ && !(ISDIGIT (language_string[5]))))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
namespace {
using namespace struct_reorg;
@@ -163,42 +199,6 @@ handled_type (tree type)
return false;
}
-/* Check whether in C language or LTO with only C language. */
-bool
-lang_c_p (void)
-{
- const char *language_string = lang_hooks.name;
-
- if (!language_string)
- {
- return false;
- }
-
- if (lang_GNU_C ())
- {
- return true;
- }
- else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
- {
- unsigned i = 0;
- tree t = NULL_TREE;
-
- FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
- {
- language_string = TRANSLATION_UNIT_LANGUAGE (t);
- if (language_string == NULL
- || strncmp (language_string, "GNU C", 5)
- || (language_string[5] != '\0'
- && !(ISDIGIT (language_string[5]))))
- {
- return false;
- }
- }
- return true;
- }
- return false;
-}
-
/* Get the number of pointer layers. */
int
diff --git a/gcc/tree.c b/gcc/tree.c
index c2075d73586..84a440b3576 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -128,6 +128,9 @@ const char *const tree_code_class_strings[] =
/* obstack.[ch] explicitly declined to prototype this. */
extern int _obstack_allocated_p (struct obstack *h, void *obj);
+/* Check whether in C language or LTO with only C language. */
+extern bool lang_c_p (void);
+
/* Statistics-gathering stuff. */
static uint64_t tree_code_counts[MAX_TREE_CODES];
@@ -5219,7 +5222,10 @@ fld_simplified_type_name (tree type)
/* Simplify type will cause that struct A and struct A within
struct B are different type pointers, so skip it in structure
optimizations. */
- if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
+ if ((flag_ipa_struct_layout || flag_ipa_struct_reorg)
+ && lang_c_p ()
+ && flag_lto_partition == LTO_PARTITION_ONE
+ && (in_lto_p || flag_whole_program))
return TYPE_NAME (type);
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
@@ -5463,7 +5469,10 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
/* Simplify type will cause that struct A and struct A within
struct B are different type pointers, so skip it in structure
optimizations. */
- if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
+ if ((flag_ipa_struct_layout || flag_ipa_struct_reorg)
+ && lang_c_p ()
+ && flag_lto_partition == LTO_PARTITION_ONE
+ && (in_lto_p || flag_whole_program))
return t;
if (POINTER_TYPE_P (t))
return fld_incomplete_type_of (t, fld);
--
2.21.0.windows.1

View File

@ -1,21 +0,0 @@
From 7dffda64fcbbd522616d7dc9c70530d146f4fed6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Tue, 1 Nov 2022 16:38:38 +0800
Subject: [PATCH 01/35] [build] Add some file right to executable
---
libgcc/mkheader.sh | 0
move-if-change | 0
2 files changed, 0 insertions(+), 0 deletions(-)
mode change 100644 => 100755 libgcc/mkheader.sh
mode change 100644 => 100755 move-if-change
diff --git a/libgcc/mkheader.sh b/libgcc/mkheader.sh
old mode 100644
new mode 100755
diff --git a/move-if-change b/move-if-change
old mode 100644
new mode 100755
--
2.27.0.windows.1

View File

@ -1,186 +0,0 @@
From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 4 Nov 2020 11:55:29 +0100
Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x <<
10 [PR97690]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab
The following patch generalizes the x ? 1 : 0 -> (int) x optimization
to handle also left shifts by constant.
During x86_64-linux and i686-linux bootstraps + regtests it triggered
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
filename, function name and shift count) and 1866 -m32 cases.
Unfortunately, the patch regresses (before the tests have been adjusted):
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1
and in both cases it actually results in worse code.
> > We'd need some optimization that would go through all PHI edges and
> > compute if some use of the phi results don't actually compute a constant
> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0.
> PRE should do this, IMHO only optimizing it at -O2 is fine.
> > Similarly, in the slp vectorization test there is:
> > a[0] = b[0] ? 1 : 7;
> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ...
> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase
> wasn't added for any real-world case but is artificial I guess for
> COND_EXPR handling of invariants.
> But yeah, for things like SLP it means we eventually have to
> implement reverse transforms for all of this to make the lanes
> matching. But that's true anyway for things like x + 1 vs. x + 0
> or x / 3 vs. x / 2 or other simplifications we do.
2020-11-04 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/97690
* tree-ssa-phiopt.c (conditional_replacement): Also optimize
cond ? pow2p_cst : 0 as ((type) cond) << cst.
* gcc.dg/tree-ssa/phi-opt-22.c: New test.
* gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1.
* gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and
? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++
gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +-
gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++--
gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------
4 files changed, 43 insertions(+), 14 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
new file mode 100644
index 000000000..fd3706666
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/97690 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+
+int foo (_Bool d) { return d ? 2 : 0; }
+int bar (_Bool d) { return d ? 1 : 0; }
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
+int qux (_Bool d) { return d ? 1024 : 0; }
+
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
index 36b8e7fc8..d70ea5a01 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
/* Test for CPROP across a DAG. */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
index d32cb7585..e64f0115a 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride)
for (i = 0; i < N/stride; i++, a += stride, b += stride)
{
a[0] = b[0] ? 1 : 7;
- a[1] = b[1] ? 2 : 0;
+ a[1] = b[1] ? 2 : 7;
a[2] = b[2] ? 3 : 0;
- a[3] = b[3] ? 4 : 0;
+ a[3] = b[3] ? 4 : 7;
a[4] = b[4] ? 5 : 0;
a[5] = b[5] ? 6 : 0;
a[6] = b[6] ? 7 : 0;
- a[7] = b[7] ? 8 : 0;
+ a[7] = b[7] ? 8 : 7;
}
}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 591b6435f..85587e8d1 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
gimple_stmt_iterator gsi;
edge true_edge, false_edge;
tree new_var, new_var2;
- bool neg;
+ bool neg = false;
+ int shift = 0;
+ tree nonzero_arg;
/* FIXME: Gimplification of complex type is too hard for now. */
/* We aren't prepared to handle vectors either (and it is a question
@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|| POINTER_TYPE_P (TREE_TYPE (arg1))))
return false;
- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
- convert it to the conditional. */
- if ((integer_zerop (arg0) && integer_onep (arg1))
- || (integer_zerop (arg1) && integer_onep (arg0)))
- neg = false;
- else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
- || (integer_zerop (arg1) && integer_all_onesp (arg0)))
+ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
+ 0 and (1 << cst), then convert it to the conditional. */
+ if (integer_zerop (arg0))
+ nonzero_arg = arg1;
+ else if (integer_zerop (arg1))
+ nonzero_arg = arg0;
+ else
+ return false;
+ if (integer_all_onesp (nonzero_arg))
neg = true;
+ else if (integer_pow2p (nonzero_arg))
+ {
+ shift = tree_log2 (nonzero_arg);
+ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
+ return false;
+ }
else
return false;
@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
falls through into BB.
There is a single PHI node at the join point (BB) and its arguments
- are constants (0, 1) or (0, -1).
+ are constants (0, 1) or (0, -1) or (0, (1 << shift)).
So, given the condition COND, and the two PHI arguments, we can
rewrite this PHI into non-branching code:
- dest = (COND) or dest = COND'
+ dest = (COND) or dest = COND' or dest = (COND) << shift
We use the condition as-is if the argument associated with the
true edge has the value one or the argument associated with the
@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
cond = fold_build1_loc (gimple_location (stmt),
NEGATE_EXPR, TREE_TYPE (cond), cond);
}
+ else if (shift)
+ {
+ cond = fold_convert_loc (gimple_location (stmt),
+ TREE_TYPE (result), cond);
+ cond = fold_build2_loc (gimple_location (stmt),
+ LSHIFT_EXPR, TREE_TYPE (cond), cond,
+ build_int_cst (integer_type_node, shift));
+ }
/* Insert our new statements at the end of conditional block before the
COND_STMT. */
--
2.27.0.windows.1

View File

@ -1,92 +0,0 @@
From 79a974bc7bb67cf425a7839f3c1f5689e41c7ee8 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 9 Mar 2021 19:13:11 +0100
Subject: [PATCH 03/35] [Backport] phiopt: Fix up conditional_replacement
[PR99305]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b610c30453d8e4cc88693d85a5a100d089640be5
Before my PR97690 changes, conditional_replacement would not set neg
when the nonzero arg was boolean true.
I've simplified the testing, so that it first finds the zero argument
and then checks the other argument for all the handled cases
(1, -1 and 1 << X, where the last case is what the patch added support for).
But, unfortunately I've placed the integer_all_onesp test first.
For unsigned precision 1 types such as bool integer_all_onesp, integer_onep
and integer_pow2p can all be true and the code set neg to true in that case,
which is undesirable.
The following patch tests integer_pow2p first (which is trivially true
for integer_onep too and tree_log2 in that case gives shift == 0)
and only if that isn't the case, integer_all_onesp.
2021-03-09 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/99305
* tree-ssa-phiopt.c (conditional_replacement): Test integer_pow2p
before integer_all_onesp instead of vice versa.
* g++.dg/opt/pr99305.C: New test.
---
gcc/testsuite/g++.dg/opt/pr99305.C | 26 ++++++++++++++++++++++++++
gcc/tree-ssa-phiopt.c | 6 +++---
2 files changed, 29 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/opt/pr99305.C
diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C
new file mode 100644
index 000000000..8a91277e7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr99305.C
@@ -0,0 +1,26 @@
+// PR tree-optimization/99305
+// { dg-do compile }
+// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" }
+// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" } }
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } }
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } }
+// { dg-final { scan-tree-dump-not "if \\\(c_\[0-9]*\\\(D\\\) \[!=]= 0\\\)" "optimized" } }
+// { dg-final { scan-tree-dump-not " = PHI <" "optimized" } }
+
+bool
+foo (char c)
+{
+ return c >= 48 && c <= 57;
+}
+
+bool
+bar (char c)
+{
+ return c != 0 && foo (c);
+}
+
+bool
+baz (char c)
+{
+ return c != 0 && c >= 48 && c <= 57;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 85587e8d1..b9be28474 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -774,14 +774,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
nonzero_arg = arg0;
else
return false;
- if (integer_all_onesp (nonzero_arg))
- neg = true;
- else if (integer_pow2p (nonzero_arg))
+ if (integer_pow2p (nonzero_arg))
{
shift = tree_log2 (nonzero_arg);
if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
return false;
}
+ else if (integer_all_onesp (nonzero_arg))
+ neg = true;
else
return false;
--
2.27.0.windows.1

View File

@ -1,122 +0,0 @@
From 09263d5ed4d81a008ca8ffcc2883dc766e7874d5 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Sun, 6 Dec 2020 10:58:10 +0100
Subject: [PATCH 04/35] [Backport] phiopt: Handle bool in two_value_replacement
[PR796232]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8c23434fdadcf4caa1f0e966294c5f67ccf4bcf9
The following patch improves code generation on the included testcase by
enabling two_value_replacement on booleans. It does that only for arg0/arg1
values that conditional_replacement doesn't handle. Additionally
it limits two_value_replacement optimization to the late phiopt like
conditional_replacement.
2020-12-06 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/96232
* tree-ssa-phiopt.c (two_value_replacement): Optimize even boolean lhs
cases as long as arg0 has wider precision and conditional_replacement
doesn't handle that case.
(tree_ssa_phiopt_worker): Don't call two_value_replacement during
early phiopt.
* gcc.dg/tree-ssa/pr96232-2.c: New test.
* gcc.dg/tree-ssa/pr88676-2.c: Check phiopt2 dump rather than phiopt1.
---
gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c | 4 ++--
gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c | 18 ++++++++++++++++++
gcc/tree-ssa-phiopt.c | 23 +++++++++++++++++++----
3 files changed, 39 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
index 0e616365b..ea88407b6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
@@ -1,7 +1,7 @@
/* PR tree-optimization/88676 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt1" } */
-/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt1" { target le } } } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt2" { target le } } } */
struct foo1 {
int i:1;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
new file mode 100644
index 000000000..9f51820ed
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
@@ -0,0 +1,18 @@
+/* PR tree-optimization/96232 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump " 38 - " "optimized" } } */
+/* { dg-final { scan-tree-dump " \\+ 97;" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
+
+int
+foo (_Bool x)
+{
+ return x ? 37 : 38;
+}
+
+int
+bar (_Bool x)
+{
+ return x ? 98 : 97;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index b9be28474..0623d740d 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -339,7 +339,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
}
/* Do the replacement of conditional if it can be done. */
- if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
+ if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
&& conditional_replacement (bb, bb1, e1, e2, phi,
@@ -636,7 +636,6 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
if (TREE_CODE (lhs) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|| TREE_CODE (rhs) != INTEGER_CST)
return false;
@@ -649,9 +648,25 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
return false;
}
+ /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
+ conditional_replacement. */
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
+ && (integer_zerop (arg0)
+ || integer_zerop (arg1)
+ || TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
+ || (TYPE_PRECISION (TREE_TYPE (arg0))
+ <= TYPE_PRECISION (TREE_TYPE (lhs)))))
+ return false;
+
wide_int min, max;
- if (get_range_info (lhs, &min, &max) != VR_RANGE
- || min + 1 != max
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
+ {
+ min = wi::to_wide (boolean_false_node);
+ max = wi::to_wide (boolean_true_node);
+ }
+ else if (get_range_info (lhs, &min, &max) != VR_RANGE)
+ return false;
+ if (min + 1 != max
|| (wi::to_wide (rhs) != min
&& wi::to_wide (rhs) != max))
return false;
--
2.27.0.windows.1

View File

@ -1,256 +0,0 @@
From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 21 Oct 2020 10:51:33 +0200
Subject: [PATCH 05/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
in GIMPLE [PR97503]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163
While we have at the RTL level noce_try_ifelse_collapse combined with
simplify_cond_clz_ctz, that optimization doesn't always trigger because
e.g. on powerpc there is an define_insn to compare a reg against zero and
copy that register to another one and so we end up with a different pseudo
in the simplify_cond_clz_ctz test and punt.
For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
we can optimize it already in phiopt though, just need to ensure that
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
VRP changes codified that the builtin calls are always undefined at zero,
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
And, in phiopt we already have popcount handling that does pretty much the
same thing, except for always using a zero value rather than the one set
by C?Z_DEFINED_VALUE_AT_ZERO.
So, this patch extends that function to handle not just popcount, but also
clz and ctz.
2020-10-21 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/97503
* tree-ssa-phiopt.c: Include internal-fn.h.
(cond_removal_in_popcount_pattern): Rename to ...
(cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just
popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
* gcc.dg/tree-ssa/pr97503.c: New test.
---
gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++
gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------
2 files changed, 95 insertions(+), 24 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
new file mode 100644
index 000000000..3a3dae6c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
@@ -0,0 +1,19 @@
+/* PR tree-optimization/97503 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+
+int
+foo (int x)
+{
+ return x ? __builtin_clz (x) : 32;
+}
+
+int
+bar (unsigned long long x)
+{
+ return x ? __builtin_clzll (x) : 64;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 0623d740d..c1e11916e 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "case-cfn-macros.h"
#include "tree-eh.h"
+#include "internal-fn.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
+ edge, edge, gimple *,
+ tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
- phi, arg0, arg1))
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
+ e2, phi, arg0,
+ arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
<bb 4>
c_12 = PHI <_9(2)>
-*/
+
+ Similarly for __builtin_clz or __builtin_ctz if
+ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
+ instead of 0 above it uses the value from that macro. */
static bool
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
- edge e1, edge e2,
- gimple *phi, tree arg0, tree arg1)
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
+ basic_block middle_bb,
+ edge e1, edge e2, gimple *phi,
+ tree arg0, tree arg1)
{
gimple *cond;
gimple_stmt_iterator gsi, gsi_from;
- gimple *popcount;
+ gimple *call;
gimple *cast = NULL;
tree lhs, arg;
@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
{
- popcount = gsi_stmt (gsi);
+ call = gsi_stmt (gsi);
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
return false;
}
else
{
- popcount = cast;
+ call = cast;
cast = NULL;
}
- /* Check that we have a popcount builtin. */
- if (!is_gimple_call (popcount))
+ /* Check that we have a popcount/clz/ctz builtin. */
+ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
+ return false;
+
+ arg = gimple_call_arg (call, 0);
+ lhs = gimple_get_lhs (call);
+
+ if (lhs == NULL_TREE)
return false;
- combined_fn cfn = gimple_call_combined_fn (popcount);
+
+ combined_fn cfn = gimple_call_combined_fn (call);
+ internal_fn ifn = IFN_LAST;
+ int val = 0;
switch (cfn)
{
CASE_CFN_POPCOUNT:
break;
+ CASE_CFN_CLZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
+ OPTIMIZE_FOR_BOTH)
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CLZ;
+ break;
+ }
+ }
+ return false;
+ CASE_CFN_CTZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
+ OPTIMIZE_FOR_BOTH)
+ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CTZ;
+ break;
+ }
+ }
+ return false;
default:
return false;
}
- arg = gimple_call_arg (popcount, 0);
- lhs = gimple_get_lhs (popcount);
-
if (cast)
{
- /* We have a cast stmt feeding popcount builtin. */
+ /* We have a cast stmt feeding popcount/clz/ctz builtin. */
/* Check that we have a cast prior to that. */
if (gimple_code (cast) != GIMPLE_ASSIGN
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
cond = last_stmt (cond_bb);
- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
+ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
builtin. */
if (gimple_code (cond) != GIMPLE_COND
|| (gimple_cond_code (cond) != NE_EXPR
@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
}
/* Check PHI arguments. */
- if (lhs != arg0 || !integer_zerop (arg1))
+ if (lhs != arg0
+ || TREE_CODE (arg1) != INTEGER_CST
+ || wi::to_wide (arg1) != val)
return false;
- /* And insert the popcount builtin and cast stmt before the cond_bb. */
+ /* And insert the popcount/clz/ctz builtin and cast stmt before the
+ cond_bb. */
gsi = gsi_last_bb (cond_bb);
if (cast)
{
@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
gsi_move_before (&gsi_from, &gsi);
reset_flow_sensitive_info (gimple_get_lhs (cast));
}
- gsi_from = gsi_for_stmt (popcount);
- gsi_move_before (&gsi_from, &gsi);
- reset_flow_sensitive_info (gimple_get_lhs (popcount));
+ gsi_from = gsi_for_stmt (call);
+ if (ifn == IFN_LAST || gimple_call_internal_p (call))
+ gsi_move_before (&gsi_from, &gsi);
+ else
+ {
+ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
+ the latter is well defined at zero. */
+ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
+ gimple_call_set_lhs (call, lhs);
+ gsi_insert_before (&gsi, call, GSI_SAME_STMT);
+ gsi_remove (&gsi_from, true);
+ }
+ reset_flow_sensitive_info (lhs);
/* Now update the PHI and remove unneeded bbs. */
replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
--
2.27.0.windows.1

View File

@ -1,69 +0,0 @@
From 7d5d2ab082ce9986db4f3313013b44faa46bc412 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 22 Oct 2020 09:34:28 +0200
Subject: [PATCH 06/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
in GIMPLE fallout [PR97503]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ef2d3ec325b1b720df5da20784eba46249af2294
> this broke sparc-sun-solaris2.11 bootstrap
>
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
> 1858 | scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
> | ^~~~
>
>
> and doubtlessly several other targets that use the defaults.h definition of
>
> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) 0
Ugh, seems many of those macros do not evaluate the first argument.
This got broken by the change to direct_internal_fn_supported_p, previously
it used mode also in the optab test.
2020-10-22 Jakub Jelinek <jakub@redhat.com>
* tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern):
For CLZ and CTZ tests, use type temporary instead of mode.
---
gcc/tree-ssa-phiopt.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index c1e11916e..707a5882e 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1836,10 +1836,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
CASE_CFN_CLZ:
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
{
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
- if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
- OPTIMIZE_FOR_BOTH)
- && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ tree type = TREE_TYPE (arg);
+ if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
+ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
+ val) == 2)
{
ifn = IFN_CLZ;
break;
@@ -1849,10 +1849,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
CASE_CFN_CTZ:
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
{
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
- if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
- OPTIMIZE_FOR_BOTH)
- && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ tree type = TREE_TYPE (arg);
+ if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
+ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
+ val) == 2)
{
ifn = IFN_CTZ;
break;
--
2.27.0.windows.1

View File

@ -1,218 +0,0 @@
From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 5 Jan 2021 16:35:22 +0100
Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31)
^ y [PR96928]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146
As requested in the PR, the one's complement abs can be done more
efficiently without cmov or branching.
Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
code, but with -m64:
movl %edi, %eax
- notl %eax
- cmpl %edi, %eax
- cmovl %edi, %eax
+ sarl $31, %eax
+ xorl %edi, %eax
ret
2021-01-05 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/96928
* tree-ssa-phiopt.c (xor_replacement): New function.
(tree_ssa_phiopt_worker): Call it.
* gcc.dg/tree-ssa/pr96928.c: New test.
* gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
instead of scanning rtl dump for ifcvt message check assembly
for xor instruction.
---
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++
gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++
2 files changed, 146 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
new file mode 100644
index 000000000..209135726
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
@@ -0,0 +1,38 @@
+/* PR tree-optimization/96928 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
+
+int
+foo (int a)
+{
+ return a < 0 ? ~a : a;
+}
+
+int
+bar (int a, int b)
+{
+ return a < 0 ? ~b : b;
+}
+
+unsigned
+baz (int a, unsigned int b)
+{
+ return a < 0 ? ~b : b;
+}
+
+unsigned
+qux (int a, unsigned int c)
+{
+ return a >= 0 ? ~c : c;
+}
+
+int
+corge (int a, int b)
+{
+ return a >= 0 ? b : ~b;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 707a5882e..b9cd07a60 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
+static bool xor_replacement (basic_block, basic_block,
+ edge, edge, gimple *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
edge, edge, gimple *,
tree, tree);
@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
+ else if (!early_p
+ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
else if (!early_p
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
+/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
+
+static bool
+xor_replacement (basic_block cond_bb, basic_block middle_bb,
+ edge e0 ATTRIBUTE_UNUSED, edge e1,
+ gimple *phi, tree arg0, tree arg1)
+{
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
+ return false;
+
+ /* OTHER_BLOCK must have only one executable statement which must have the
+ form arg0 = ~arg1 or arg1 = ~arg0. */
+
+ gimple *assign = last_and_only_stmt (middle_bb);
+ /* If we did not find the proper one's complement assignment, then we cannot
+ optimize. */
+ if (assign == NULL)
+ return false;
+
+ /* If we got here, then we have found the only executable statement
+ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
+ arg1 = ~arg0, then we cannot optimize. */
+ if (!is_gimple_assign (assign))
+ return false;
+
+ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
+ return false;
+
+ tree lhs = gimple_assign_lhs (assign);
+ tree rhs = gimple_assign_rhs1 (assign);
+
+ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
+ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
+ return false;
+
+ gimple *cond = last_stmt (cond_bb);
+ tree result = PHI_RESULT (phi);
+
+ /* Only relationals comparing arg[01] against zero are interesting. */
+ enum tree_code cond_code = gimple_cond_code (cond);
+ if (cond_code != LT_EXPR && cond_code != GE_EXPR)
+ return false;
+
+ /* Make sure the conditional is x OP 0. */
+ tree clhs = gimple_cond_lhs (cond);
+ if (TREE_CODE (clhs) != SSA_NAME
+ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
+ || TYPE_UNSIGNED (TREE_TYPE (clhs))
+ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
+ || !integer_zerop (gimple_cond_rhs (cond)))
+ return false;
+
+ /* We need to know which is the true edge and which is the false
+ edge so that we know if have xor or inverted xor. */
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+
+ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
+ will need to invert the result. Similarly for LT_EXPR if
+ the false edge goes to OTHER_BLOCK. */
+ edge e;
+ if (cond_code == GE_EXPR)
+ e = true_edge;
+ else
+ e = false_edge;
+
+ bool invert = e->dest == middle_bb;
+
+ result = duplicate_ssa_name (result, NULL);
+
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+
+ int prec = TYPE_PRECISION (TREE_TYPE (clhs));
+ gimple *new_stmt
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
+ build_int_cst (integer_type_node, prec - 1));
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+
+ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
+ {
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
+ NOP_EXPR, gimple_assign_lhs (new_stmt));
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ }
+ lhs = gimple_assign_lhs (new_stmt);
+
+ if (invert)
+ {
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
+ BIT_NOT_EXPR, rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ rhs = gimple_assign_lhs (new_stmt);
+ }
+
+ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
+
+ /* Note that we optimized this PHI. */
+ return true;
+}
+
/* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,253 +0,0 @@
From 96afd5b761a74e9eef40a2e843810c503c669de8 Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
Date: Thu, 28 May 2020 00:31:15 +0200
Subject: [PATCH 09/35] [Backport] Add support for __builtin_bswap128
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=fe7ebef7fe4f9acb79658ed9db0749b07efc3105
This patch introduces a new builtin named __builtin_bswap128 on targets
where TImode is supported, i.e. 64-bit targets only in practice. The
implementation simply reuses the existing double word path in optab, so
no routine is added to libgcc (which means that you get two calls to
_bswapdi2 in the worst case).
gcc/ChangeLog:
* builtin-types.def (BT_UINT128): New primitive type.
(BT_FN_UINT128_UINT128): New function type.
* builtins.def (BUILT_IN_BSWAP128): New GCC builtin.
* doc/extend.texi (__builtin_bswap128): Document it.
* builtins.c (expand_builtin): Deal with BUILT_IN_BSWAP128.
(is_inexpensive_builtin): Likewise.
* fold-const-call.c (fold_const_call_ss): Likewise.
* fold-const.c (tree_call_nonnegative_warnv_p): Likewise.
* tree-ssa-ccp.c (evaluate_stmt): Likewise.
* tree-vect-stmts.c (vect_get_data_ptr_increment): Likewise.
(vectorizable_call): Likewise.
* optabs.c (expand_unop): Always use the double word path for it.
* tree-core.h (enum tree_index): Add TI_UINT128_TYPE.
* tree.h (uint128_type_node): New global type.
* tree.c (build_common_tree_nodes): Build it if TImode is supported.
gcc/testsuite/ChangeLog:
* gcc.dg/builtin-bswap-10.c: New test.
* gcc.dg/builtin-bswap-11.c: Likewise.
* gcc.dg/builtin-bswap-12.c: Likewise.
* gcc.target/i386/builtin-bswap-5.c: Likewise.
---
gcc/builtin-types.def | 4 ++++
gcc/builtins.c | 2 ++
gcc/builtins.def | 2 ++
gcc/doc/extend.texi | 10 ++++++++--
gcc/fold-const-call.c | 1 +
gcc/fold-const.c | 2 ++
gcc/optabs.c | 5 ++++-
gcc/tree-core.h | 1 +
gcc/tree-ssa-ccp.c | 1 +
gcc/tree-vect-stmts.c | 5 +++--
gcc/tree.c | 2 ++
gcc/tree.h | 1 +
12 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index c7aa691b2..c46b1bc5c 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -73,6 +73,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT8, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (BT_UINT16, uint16_type_node)
DEF_PRIMITIVE_TYPE (BT_UINT32, uint32_type_node)
DEF_PRIMITIVE_TYPE (BT_UINT64, uint64_type_node)
+DEF_PRIMITIVE_TYPE (BT_UINT128, uint128_type_node
+ ? uint128_type_node
+ : error_mark_node)
DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
DEF_PRIMITIVE_TYPE (BT_UNWINDWORD, (*lang_hooks.types.type_for_mode)
(targetm.unwind_word_mode (), 1))
@@ -300,6 +303,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64)
+DEF_FUNCTION_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_FLOAT, BT_UINT64, BT_FLOAT)
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_PTR, BT_BOOL, BT_PTR)
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 10b6fd3bb..1b1c75cc1 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -8015,6 +8015,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
case BUILT_IN_BSWAP16:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
+ case BUILT_IN_BSWAP128:
target = expand_builtin_bswap (target_mode, exp, target, subtarget);
if (target)
return target;
@@ -11732,6 +11733,7 @@ is_inexpensive_builtin (tree decl)
case BUILT_IN_BSWAP16:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
+ case BUILT_IN_BSWAP128:
case BUILT_IN_CLZ:
case BUILT_IN_CLZIMAX:
case BUILT_IN_CLZL:
diff --git a/gcc/builtins.def b/gcc/builtins.def
index fa8b0641a..ee67ac15d 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -834,6 +834,8 @@ DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_L
DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST)
+
DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
/* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */
DEF_LIB_BUILTIN (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_WARN_UNUSED_RESULT_SIZE_1_2_NOTHROW_LEAF_LIST)
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 9c7345959..a7bd772de 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -13727,14 +13727,20 @@ exactly 8 bits.
@deftypefn {Built-in Function} uint32_t __builtin_bswap32 (uint32_t x)
Similar to @code{__builtin_bswap16}, except the argument and return types
-are 32 bit.
+are 32-bit.
@end deftypefn
@deftypefn {Built-in Function} uint64_t __builtin_bswap64 (uint64_t x)
Similar to @code{__builtin_bswap32}, except the argument and return types
-are 64 bit.
+are 64-bit.
@end deftypefn
+@deftypefn {Built-in Function} uint128_t __builtin_bswap128 (uint128_t x)
+Similar to @code{__builtin_bswap64}, except the argument and return types
+are 128-bit. Only supported on targets when 128-bit types are supported.
+@end deftypefn
+
+
@deftypefn {Built-in Function} Pmode __builtin_extend_pointer (void * x)
On targets where the user visible pointer size is smaller than the size
of an actual hardware address this function returns the extended user
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
index 6150d7ada..da01759d9 100644
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1032,6 +1032,7 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
case CFN_BUILT_IN_BSWAP16:
case CFN_BUILT_IN_BSWAP32:
case CFN_BUILT_IN_BSWAP64:
+ case CFN_BUILT_IN_BSWAP128:
*result = wide_int::from (arg, precision, TYPE_SIGN (arg_type)).bswap ();
return true;
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 6e635382f..78227a83d 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -13889,8 +13889,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1,
CASE_CFN_POPCOUNT:
CASE_CFN_CLZ:
CASE_CFN_CLRSB:
+ case CFN_BUILT_IN_BSWAP16:
case CFN_BUILT_IN_BSWAP32:
case CFN_BUILT_IN_BSWAP64:
+ case CFN_BUILT_IN_BSWAP128:
/* Always true. */
return true;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 049a18ceb..c3751fdf7 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -2896,8 +2896,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
if (temp)
return temp;
+ /* We do not provide a 128-bit bswap in libgcc so force the use of
+ a double bswap for 64-bit targets. */
if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
- && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+ && (UNITS_PER_WORD == 64
+ || optab_handler (unoptab, word_mode) != CODE_FOR_nothing))
{
temp = expand_doubleword_bswap (mode, op0, target);
if (temp)
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index eb01c2434..058e046aa 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -600,6 +600,7 @@ enum tree_index {
TI_UINT16_TYPE,
TI_UINT32_TYPE,
TI_UINT64_TYPE,
+ TI_UINT128_TYPE,
TI_VOID,
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 952fd9cd4..dcdf10369 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -2005,6 +2005,7 @@ evaluate_stmt (gimple *stmt)
case BUILT_IN_BSWAP16:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
+ case BUILT_IN_BSWAP128:
val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
if (val.lattice_val == UNDEFINED)
break;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b872cfc8d..4636b7ba2 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3085,7 +3085,7 @@ vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
return iv_step;
}
-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
+/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
static bool
vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
@@ -3454,7 +3454,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
else if (modifier == NONE
&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
vectype_in, cost_vec);
else
diff --git a/gcc/tree.c b/gcc/tree.c
index 84a440b35..3e6647ae0 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10394,6 +10394,8 @@ build_common_tree_nodes (bool signed_char)
uint16_type_node = make_or_reuse_type (16, 1);
uint32_type_node = make_or_reuse_type (32, 1);
uint64_type_node = make_or_reuse_type (64, 1);
+ if (targetm.scalar_mode_supported_p (TImode))
+ uint128_type_node = make_or_reuse_type (128, 1);
/* Decimal float types. */
if (targetm.decimal_float_supported_p ())
diff --git a/gcc/tree.h b/gcc/tree.h
index 328a2d5d2..bddc6e528 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4035,6 +4035,7 @@ tree_strip_any_location_wrapper (tree exp)
#define uint16_type_node global_trees[TI_UINT16_TYPE]
#define uint32_type_node global_trees[TI_UINT32_TYPE]
#define uint64_type_node global_trees[TI_UINT64_TYPE]
+#define uint128_type_node global_trees[TI_UINT128_TYPE]
#define void_node global_trees[TI_VOID]
--
2.27.0.windows.1

View File

@ -1,113 +0,0 @@
From b9ac0cc69aab3c8d662d5b0a9ed43d971c13ac70 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 29 May 2020 09:25:53 +0200
Subject: [PATCH 10/35] [Backport] tree-optimization/95393 - fold MIN/MAX_EXPR
generated by phiopt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=07852a81f58532c63a57631d7c3757fc6bcea17d
This makes sure to fold generated stmts so they do not survive
until RTL expansion and cause awkward code generation.
2020-05-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/95393
* tree-ssa-phiopt.c (minmax_replacement): Use gimple_build
to build the min/max expression so we simplify cases like
MAX(0, s) immediately.
* gcc.dg/tree-ssa/phi-opt-21.c: New testcase.
* g++.dg/vect/slp-pr87105.cc: Adjust.
---
gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c | 15 +++++++++++++
gcc/tree-ssa-phiopt.c | 25 +++++++++++-----------
3 files changed, 29 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
index 5518f319b..d07b1cd46 100644
--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
+++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
@@ -102,4 +102,4 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
// It's a bit awkward to detect that all stores were vectorized but the
// following more or less does the trick
-// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
+// { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
new file mode 100644
index 000000000..9f3d56957
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt4-details" } */
+
+int f(unsigned s)
+{
+ int i;
+ for (i = 0; i < s; ++i)
+ ;
+
+ return i;
+}
+
+/* { dg-final { scan-tree-dump "converted to straightline code" "phiopt4" } } */
+/* Make sure we fold the detected MAX<s, 0>. */
+/* { dg-final { scan-tree-dump-not "MAX" "phiopt4" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index fca32222f..269eda21c 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "case-cfn-macros.h"
#include "tree-eh.h"
+#include "gimple-fold.h"
#include "internal-fn.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
@@ -1414,7 +1415,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
{
tree result, type, rhs;
gcond *cond;
- gassign *new_stmt;
edge true_edge, false_edge;
enum tree_code cmp, minmax, ass_code;
tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
@@ -1738,19 +1738,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
gsi_move_before (&gsi_from, &gsi);
}
- /* Create an SSA var to hold the min/max result. If we're the only
- things setting the target PHI, then we can clone the PHI
- variable. Otherwise we must create a new one. */
- result = PHI_RESULT (phi);
- if (EDGE_COUNT (gimple_bb (phi)->preds) == 2)
- result = duplicate_ssa_name (result, NULL);
- else
- result = make_ssa_name (TREE_TYPE (result));
-
/* Emit the statement to compute min/max. */
- new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
+ gimple_seq stmts = NULL;
+ tree phi_result = PHI_RESULT (phi);
+ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
+ /* Duplicate range info if we're the only things setting the target PHI. */
+ if (!gimple_seq_empty_p (stmts)
+ && EDGE_COUNT (gimple_bb (phi)->preds) == 2
+ && !POINTER_TYPE_P (TREE_TYPE (phi_result))
+ && SSA_NAME_RANGE_INFO (phi_result))
+ duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result),
+ SSA_NAME_RANGE_INFO (phi_result));
+
gsi = gsi_last_bb (cond_bb);
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
replace_phi_edge_with_variable (cond_bb, e1, phi, result);
--
2.27.0.windows.1

View File

@ -1,91 +0,0 @@
From 9f3a8c600abe16f172b36d8113862e8f7aea940c Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Sun, 16 May 2021 13:07:06 -0700
Subject: [PATCH 11/35] [Backport] Add a couple of A?CST1:CST2 match and
simplify optimizations
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b6bdd7a4cb41ee057f2d064fffcb00f23ce6b497
Instead of some of the more manual optimizations inside phi-opt,
it would be good idea to do a lot of the heavy lifting inside match
and simplify instead. In the process, this moves the three simple
A?CST1:CST2 (where CST1 or CST2 is zero) simplifications.
OK? Boostrapped and tested on x86_64-linux-gnu with no regressions.
Differences from V1:
* Use bit_xor 1 instead of bit_not to fix the problem with boolean types
which are not 1 bit precision.
Thanks,
Andrew Pinski
gcc:
* match.pd (A?CST1:CST2): Add simplifcations for A?0:+-1, A?+-1:0,
A?POW2:0 and A?0:POW2.
---
gcc/match.pd | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/gcc/match.pd b/gcc/match.pd
index 660d5c268..032830b0d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3334,6 +3334,54 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (cst1 && cst2)
(vec_cond @0 { cst1; } { cst2; })))))
+/* A few simplifications of "a ? CST1 : CST2". */
+/* NOTE: Only do this on gimple as the if-chain-to-switch
+ optimization depends on the gimple to have if statements in it. */
+#if GIMPLE
+(simplify
+ (cond @0 INTEGER_CST@1 INTEGER_CST@2)
+ (switch
+ (if (integer_zerop (@2))
+ (switch
+ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
+ (if (integer_onep (@1))
+ (convert (convert:boolean_type_node @0)))
+ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
+ (with {
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
+ }
+ (lshift (convert (convert:boolean_type_node @0)) { shift; })))
+ /* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1
+ here as the powerof2cst case above will handle that case correctly. */
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
+ (negate (convert (convert:boolean_type_node @0))))))
+ (if (integer_zerop (@1))
+ (with {
+ tree booltrue = constant_boolean_node (true, boolean_type_node);
+ }
+ (switch
+ /* a ? 0 : 1 -> !a. */
+ (if (integer_onep (@2))
+ (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
+ /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
+ (with {
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+ }
+ (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))
+ { shift; })))
+ /* a ? -1 : 0 -> -(!a). No need to check the TYPE_PRECISION not being 1
+ here as the powerof2cst case above will handle that case correctly. */
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
+ (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))))
+ )
+ )
+ )
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
--
2.27.0.windows.1

View File

@ -1,155 +0,0 @@
From 4352b952ba24c413697fcfc191d06165a8a31ced Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Sat, 22 May 2021 19:49:50 +0000
Subject: [PATCH 12/35] [Backport] Optimize x < 0 ? ~y : y to (x >> 31) ^ y in
match.pd
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1fd76b24306ed4df4cf9e797d900699ed59ce7f7
This copies the optimization that is done in phiopt for
"x < 0 ? ~y : y to (x >> 31) ^ y" into match.pd. The code
for phiopt is kept around until phiopt uses match.pd (which
I am working towards).
Note the original testcase is now optimized early on and I added a
new testcase to optimize during phiopt.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
Differences from v1:
V2: Add check for integeral type to make sure vector types are not done.
gcc:
* match.pd (x < 0 ? ~y : y): New patterns.
gcc/testsuite:
* gcc.dg/tree-ssa/pr96928.c: Update test for slightly different IR.
* gcc.dg/tree-ssa/pr96928-1.c: New testcase.
---
gcc/match.pd | 32 +++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 48 +++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 7 +++-
3 files changed, 85 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 032830b0d..5899eea95 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4390,6 +4390,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cmp (bit_and@2 @0 integer_pow2p@1) @1)
(icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
+(for cmp (ge lt)
+/* x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
+/* x >= 0 ? ~y : y into ~((x >> (prec-1)) ^ y). */
+ (simplify
+ (cond (cmp @0 integer_zerop) (bit_not @1) @1)
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
+ (with
+ {
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
+ }
+ (if (cmp == LT_EXPR)
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1))))))
+/* x < 0 ? y : ~y into ~((x >> (prec-1)) ^ y). */
+/* x >= 0 ? y : ~y into (x >> (prec-1)) ^ y. */
+ (simplify
+ (cond (cmp @0 integer_zerop) @1 (bit_not @1))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
+ (with
+ {
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
+ }
+ (if (cmp == GE_EXPR)
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1)))))))
+
/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2,
convert this into a shift followed by ANDing with D. */
(simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
new file mode 100644
index 000000000..a2770e5e8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96928 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
+
+int
+foo (int a)
+{
+ if (a < 0)
+ return ~a;
+ return a;
+}
+
+int
+bar (int a, int b)
+{
+ if (a < 0)
+ return ~b;
+ return b;
+}
+
+unsigned
+baz (int a, unsigned int b)
+{
+ if (a < 0)
+ return ~b;
+ return b;
+}
+
+unsigned
+qux (int a, unsigned int c)
+{
+ if (a >= 0)
+ return ~c;
+ return c;
+}
+
+int
+corge (int a, int b)
+{
+ if (a >= 0)
+ return b;
+ return ~b;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
index 209135726..e8fd82fc2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
@@ -1,8 +1,11 @@
/* PR tree-optimization/96928 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
-/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b)
+ and in the case of match.pd optimizing these ?:, the ~ is moved out already
+ by the time we get to phiopt2. */
+/* { dg-final { scan-tree-dump-times "\\\^ c_\[0-9]*\\\(D\\\);" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
--
2.27.0.windows.1

View File

@ -1,249 +0,0 @@
From 406071e8c1838c824f06c35ef3cf9419aa543e6e Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Tue, 1 Jun 2021 01:05:09 +0000
Subject: [PATCH 13/35] [Backport] Replace conditional_replacement with match
and simplify
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9f55df63154a39d67ef5b24def7044bf87300831
This is the first of series of patches to simplify phi-opt
to use match and simplify in many cases. This simplification
will more things to optimize.
This is what Richard requested in
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html
and I think it is the right thing to do too.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
gcc/ChangeLog:
PR tree-optimization/25290
* tree-ssa-phiopt.c (match_simplify_replacement):
New function.
(tree_ssa_phiopt_worker): Use match_simplify_replacement.
(two_value_replacement): Change the comment about
conditional_replacement.
(conditional_replacement): Delete.
---
gcc/tree-ssa-phiopt.c | 144 ++++++++++++------------------------------
1 file changed, 39 insertions(+), 105 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 269eda21c..9fa6363b6 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -52,8 +52,8 @@ along with GCC; see the file COPYING3. If not see
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
tree, tree);
-static bool conditional_replacement (basic_block, basic_block,
- edge, edge, gphi *, tree, tree);
+static bool match_simplify_replacement (basic_block, basic_block,
+ edge, edge, gphi *, tree, tree);
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
gimple *);
static int value_replacement (basic_block, basic_block,
@@ -349,8 +349,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && conditional_replacement (bb, bb1, e1, e2, phi,
- arg0, arg1))
+ && match_simplify_replacement (bb, bb1, e1, e2, phi,
+ arg0, arg1))
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -662,7 +662,7 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
}
/* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
- conditional_replacement. */
+ match_simplify_replacement. */
if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
&& (integer_zerop (arg0)
|| integer_zerop (arg1)
@@ -763,137 +763,71 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
-/* The function conditional_replacement does the main work of doing the
- conditional replacement. Return true if the replacement is done.
+/* The function match_simplify_replacement does the main work of doing the
+ replacement using match and simplify. Return true if the replacement is done.
Otherwise return false.
BB is the basic block where the replacement is going to be done on. ARG0
is argument 0 from PHI. Likewise for ARG1. */
static bool
-conditional_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0, edge e1, gphi *phi,
- tree arg0, tree arg1)
+match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
+ edge e0, edge e1, gphi *phi,
+ tree arg0, tree arg1)
{
- tree result;
gimple *stmt;
- gassign *new_stmt;
tree cond;
gimple_stmt_iterator gsi;
edge true_edge, false_edge;
- tree new_var, new_var2;
- bool neg = false;
- int shift = 0;
- tree nonzero_arg;
-
- /* FIXME: Gimplification of complex type is too hard for now. */
- /* We aren't prepared to handle vectors either (and it is a question
- if it would be worthwhile anyway). */
- if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
- || POINTER_TYPE_P (TREE_TYPE (arg0)))
- || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
- || POINTER_TYPE_P (TREE_TYPE (arg1))))
- return false;
+ gimple_seq seq = NULL;
+ tree result;
- /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
- 0 and (1 << cst), then convert it to the conditional. */
- if (integer_zerop (arg0))
- nonzero_arg = arg1;
- else if (integer_zerop (arg1))
- nonzero_arg = arg0;
- else
- return false;
- if (integer_pow2p (nonzero_arg))
- {
- shift = tree_log2 (nonzero_arg);
- if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
- return false;
- }
- else if (integer_all_onesp (nonzero_arg))
- neg = true;
- else
+ if (!empty_block_p (middle_bb))
return false;
- if (!empty_block_p (middle_bb))
+ /* Special case A ? B : B as this will always simplify to B. */
+ if (operand_equal_for_phi_arg_p (arg0, arg1))
return false;
- /* At this point we know we have a GIMPLE_COND with two successors.
+ /* At this point we know we have a GIMPLE_COND with two successors.
One successor is BB, the other successor is an empty block which
falls through into BB.
- There is a single PHI node at the join point (BB) and its arguments
- are constants (0, 1) or (0, -1) or (0, (1 << shift)).
-
- So, given the condition COND, and the two PHI arguments, we can
- rewrite this PHI into non-branching code:
+ There is a single PHI node at the join point (BB).
- dest = (COND) or dest = COND' or dest = (COND) << shift
-
- We use the condition as-is if the argument associated with the
- true edge has the value one or the argument associated with the
- false edge as the value zero. Note that those conditions are not
- the same since only one of the outgoing edges from the GIMPLE_COND
- will directly reach BB and thus be associated with an argument. */
+ So, given the condition COND, and the two PHI arguments, match and simplify
+ can happen on (COND) ? arg0 : arg1. */
stmt = last_stmt (cond_bb);
- result = PHI_RESULT (phi);
/* To handle special cases like floating point comparison, it is easier and
less error-prone to build a tree and gimplify it on the fly though it is
- less efficient. */
- cond = fold_build2_loc (gimple_location (stmt),
- gimple_cond_code (stmt), boolean_type_node,
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
+ less efficient.
+ Don't use fold_build2 here as that might create (bool)a instead of just
+ "a != 0". */
+ cond = build2_loc (gimple_location (stmt),
+ gimple_cond_code (stmt), boolean_type_node,
+ gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
/* We need to know which is the true edge and which is the false
edge so that we know when to invert the condition below. */
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
- if ((e0 == true_edge && integer_zerop (arg0))
- || (e0 == false_edge && !integer_zerop (arg0))
- || (e1 == true_edge && integer_zerop (arg1))
- || (e1 == false_edge && !integer_zerop (arg1)))
- cond = fold_build1_loc (gimple_location (stmt),
- TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
-
- if (neg)
- {
- cond = fold_convert_loc (gimple_location (stmt),
- TREE_TYPE (result), cond);
- cond = fold_build1_loc (gimple_location (stmt),
- NEGATE_EXPR, TREE_TYPE (cond), cond);
- }
- else if (shift)
- {
- cond = fold_convert_loc (gimple_location (stmt),
- TREE_TYPE (result), cond);
- cond = fold_build2_loc (gimple_location (stmt),
- LSHIFT_EXPR, TREE_TYPE (cond), cond,
- build_int_cst (integer_type_node, shift));
- }
+ if (e1 == true_edge || e0 == false_edge)
+ std::swap (arg0, arg1);
- /* Insert our new statements at the end of conditional block before the
- COND_STMT. */
- gsi = gsi_for_stmt (stmt);
- new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
- GSI_SAME_STMT);
+ tree type = TREE_TYPE (gimple_phi_result (phi));
+ result = gimple_simplify (COND_EXPR, type,
+ cond,
+ arg0, arg1,
+ &seq, NULL);
+ if (!result)
+ return false;
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
- {
- location_t locus_0, locus_1;
+ gsi = gsi_last_bb (cond_bb);
- new_var2 = make_ssa_name (TREE_TYPE (result));
- new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
- new_var = new_var2;
-
- /* Set the locus to the first argument, unless is doesn't have one. */
- locus_0 = gimple_phi_arg_location (phi, 0);
- locus_1 = gimple_phi_arg_location (phi, 1);
- if (locus_0 == UNKNOWN_LOCATION)
- locus_0 = locus_1;
- gimple_set_location (new_stmt, locus_0);
- }
+ if (seq)
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
- replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
/* Note that we optimized this PHI. */
return true;
@@ -3905,7 +3839,7 @@ gate_hoist_loads (void)
Conditional Replacement
-----------------------
- This transformation, implemented in conditional_replacement,
+ This transformation, implemented in match_simplify_replacement,
replaces
bb0:
--
2.27.0.windows.1

View File

@ -1,174 +0,0 @@
From fabbe6ccc798d3cb097c6371b4d53cd6dfde6c7c Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Fri, 11 Jun 2021 13:21:34 -0700
Subject: [PATCH 14/35] [Backport] Allow match-and-simplified phiopt to run in
early phiopt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cd48e550d1dc58307ab1c0ab490745673f748ccc
To move a few things more to match-and-simplify from phiopt,
we need to allow match_simplify_replacement to run in early
phiopt. To do this we add a replacement for gimple_simplify
that is explictly for phiopt.
OK? Bootstrapped and tested on x86_64-linux-gnu with no
regressions.
gcc/ChangeLog:
* tree-ssa-phiopt.c (match_simplify_replacement):
Add early_p argument. Call gimple_simplify_phiopt
instead of gimple_simplify.
(tree_ssa_phiopt_worker): Update call to
match_simplify_replacement and allow unconditionally.
(phiopt_early_allow): New function.
(gimple_simplify_phiopt): New function.
---
gcc/tree-ssa-phiopt.c | 89 ++++++++++++++++++++++++++++++++++---------
1 file changed, 70 insertions(+), 19 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 9fa6363b6..92aeb8415 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -48,12 +48,13 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "gimple-fold.h"
#include "internal-fn.h"
+#include "gimple-match.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
tree, tree);
static bool match_simplify_replacement (basic_block, basic_block,
- edge, edge, gphi *, tree, tree);
+ edge, edge, gphi *, tree, tree, bool);
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
gimple *);
static int value_replacement (basic_block, basic_block,
@@ -348,9 +349,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
/* Do the replacement of conditional if it can be done. */
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (!early_p
- && match_simplify_replacement (bb, bb1, e1, e2, phi,
- arg0, arg1))
+ else if (match_simplify_replacement (bb, bb1, e1, e2, phi,
+ arg0, arg1,
+ early_p))
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -763,6 +764,67 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
+/* Return TRUE if CODE should be allowed during early phiopt.
+ Currently this is to allow MIN/MAX and ABS/NEGATE. */
+static bool
+phiopt_early_allow (enum tree_code code)
+{
+ switch (code)
+ {
+ case MIN_EXPR:
+ case MAX_EXPR:
+ case ABS_EXPR:
+ case ABSU_EXPR:
+ case NEGATE_EXPR:
+ case SSA_NAME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT.
+ Return NULL if nothing can be simplified or the resulting simplified value
+ with parts pushed if EARLY_P was true. Also rejects non allowed tree code
+ if EARLY_P is set.
+ Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries
+ to simplify CMP ? ARG0 : ARG1. */
+static tree
+gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt,
+ tree arg0, tree arg1,
+ gimple_seq *seq)
+{
+ tree result;
+ enum tree_code comp_code = gimple_cond_code (comp_stmt);
+ location_t loc = gimple_location (comp_stmt);
+ tree cmp0 = gimple_cond_lhs (comp_stmt);
+ tree cmp1 = gimple_cond_rhs (comp_stmt);
+ /* To handle special cases like floating point comparison, it is easier and
+ less error-prone to build a tree and gimplify it on the fly though it is
+ less efficient.
+ Don't use fold_build2 here as that might create (bool)a instead of just
+ "a != 0". */
+ tree cond = build2_loc (loc, comp_code, boolean_type_node,
+ cmp0, cmp1);
+ gimple_match_op op (gimple_match_cond::UNCOND,
+ COND_EXPR, type, cond, arg0, arg1);
+
+ if (op.resimplify (early_p ? NULL : seq, follow_all_ssa_edges))
+ {
+ /* Early we want only to allow some generated tree codes. */
+ if (!early_p
+ || op.code.is_tree_code ()
+ || phiopt_early_allow ((tree_code)op.code))
+ {
+ result = maybe_push_res_to_seq (&op, seq);
+ if (result)
+ return result;
+ }
+ }
+
+ return NULL;
+}
+
/* The function match_simplify_replacement does the main work of doing the
replacement using match and simplify. Return true if the replacement is done.
Otherwise return false.
@@ -772,10 +834,9 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
static bool
match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
edge e0, edge e1, gphi *phi,
- tree arg0, tree arg1)
+ tree arg0, tree arg1, bool early_p)
{
gimple *stmt;
- tree cond;
gimple_stmt_iterator gsi;
edge true_edge, false_edge;
gimple_seq seq = NULL;
@@ -799,15 +860,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
stmt = last_stmt (cond_bb);
- /* To handle special cases like floating point comparison, it is easier and
- less error-prone to build a tree and gimplify it on the fly though it is
- less efficient.
- Don't use fold_build2 here as that might create (bool)a instead of just
- "a != 0". */
- cond = build2_loc (gimple_location (stmt),
- gimple_cond_code (stmt), boolean_type_node,
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
-
/* We need to know which is the true edge and which is the false
edge so that we know when to invert the condition below. */
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
@@ -815,10 +867,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
std::swap (arg0, arg1);
tree type = TREE_TYPE (gimple_phi_result (phi));
- result = gimple_simplify (COND_EXPR, type,
- cond,
- arg0, arg1,
- &seq, NULL);
+ result = gimple_simplify_phiopt (early_p, type, stmt,
+ arg0, arg1,
+ &seq);
if (!result)
return false;
--
2.27.0.windows.1

View File

@ -1,259 +0,0 @@
From d212d216be0752370dbe7bc63bd75b3a9249e0b5 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Tue, 1 Jun 2021 06:48:05 +0000
Subject: [PATCH 15/35] [Backport] Improve match_simplify_replacement in
phi-opt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791
This improves match_simplify_replace in phi-opt to handle the
case where there is one cheap (non-call) preparation statement in the
middle basic block similar to xor_replacement and others.
This allows to remove xor_replacement which it does too.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
Changes since v1:
v3 - Just minor changes to using gimple_assign_lhs
instead of gimple_lhs and fixing a comment.
v2 - change the check on the preparation statement to
allow only assignments and no calls and only assignments
that feed into the phi.
gcc/ChangeLog:
PR tree-optimization/25290
* tree-ssa-phiopt.c (xor_replacement): Delete.
(tree_ssa_phiopt_worker): Delete use of xor_replacement.
(match_simplify_replacement): Allow one cheap preparation
statement that can be moved to before the if.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~
happens on the outside of the bit_xor.
---
gcc/tree-ssa-phiopt.c | 164 ++++++++++++++----------------------------
1 file changed, 52 insertions(+), 112 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 92aeb8415..51a2d3684 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfghooks.h"
#include "tree-pass.h"
#include "ssa.h"
+#include "tree-ssa.h"
#include "optabs-tree.h"
#include "insn-config.h"
#include "gimple-pretty-print.h"
@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
-static bool xor_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
static bool spaceship_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
@@ -355,9 +354,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (!early_p
- && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
else if (!early_p
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -841,14 +837,51 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
edge true_edge, false_edge;
gimple_seq seq = NULL;
tree result;
-
- if (!empty_block_p (middle_bb))
- return false;
+ gimple *stmt_to_move = NULL;
/* Special case A ? B : B as this will always simplify to B. */
if (operand_equal_for_phi_arg_p (arg0, arg1))
return false;
+ /* If the basic block only has a cheap preparation statement,
+ allow it and move it once the transformation is done. */
+ if (!empty_block_p (middle_bb))
+ {
+ stmt_to_move = last_and_only_stmt (middle_bb);
+ if (!stmt_to_move)
+ return false;
+
+ if (gimple_vuse (stmt_to_move))
+ return false;
+
+ if (gimple_could_trap_p (stmt_to_move)
+ || gimple_has_side_effects (stmt_to_move))
+ return false;
+
+ if (gimple_uses_undefined_value_p (stmt_to_move))
+ return false;
+
+ /* Allow assignments and not no calls.
+ As const calls don't match any of the above, yet they could
+ still have some side-effects - they could contain
+ gimple_could_trap_p statements, like floating point
+ exceptions or integer division by zero. See PR70586.
+ FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
+ should handle this. */
+ if (!is_gimple_assign (stmt_to_move))
+ return false;
+
+ tree lhs = gimple_assign_lhs (stmt_to_move);
+ gimple *use_stmt;
+ use_operand_p use_p;
+
+ /* Allow only a statement which feeds into the phi. */
+ if (!lhs || TREE_CODE (lhs) != SSA_NAME
+ || !single_imm_use (lhs, &use_p, &use_stmt)
+ || use_stmt != phi)
+ return false;
+ }
+
/* At this point we know we have a GIMPLE_COND with two successors.
One successor is BB, the other successor is an empty block which
falls through into BB.
@@ -874,7 +907,17 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
return false;
gsi = gsi_last_bb (cond_bb);
-
+ if (stmt_to_move)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "statement un-sinked:\n");
+ print_gimple_stmt (dump_file, stmt_to_move, 0,
+ TDF_VOPS|TDF_MEMSYMS);
+ }
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
+ gsi_move_before (&gsi1, &gsi);
+ }
if (seq)
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
@@ -2474,109 +2517,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
-
-static bool
-xor_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0 ATTRIBUTE_UNUSED, edge e1,
- gimple *phi, tree arg0, tree arg1)
-{
- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
- return false;
-
- /* OTHER_BLOCK must have only one executable statement which must have the
- form arg0 = ~arg1 or arg1 = ~arg0. */
-
- gimple *assign = last_and_only_stmt (middle_bb);
- /* If we did not find the proper one's complement assignment, then we cannot
- optimize. */
- if (assign == NULL)
- return false;
-
- /* If we got here, then we have found the only executable statement
- in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
- arg1 = ~arg0, then we cannot optimize. */
- if (!is_gimple_assign (assign))
- return false;
-
- if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
- return false;
-
- tree lhs = gimple_assign_lhs (assign);
- tree rhs = gimple_assign_rhs1 (assign);
-
- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
- if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
- return false;
-
- gimple *cond = last_stmt (cond_bb);
- tree result = PHI_RESULT (phi);
-
- /* Only relationals comparing arg[01] against zero are interesting. */
- enum tree_code cond_code = gimple_cond_code (cond);
- if (cond_code != LT_EXPR && cond_code != GE_EXPR)
- return false;
-
- /* Make sure the conditional is x OP 0. */
- tree clhs = gimple_cond_lhs (cond);
- if (TREE_CODE (clhs) != SSA_NAME
- || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
- || TYPE_UNSIGNED (TREE_TYPE (clhs))
- || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
- || !integer_zerop (gimple_cond_rhs (cond)))
- return false;
-
- /* We need to know which is the true edge and which is the false
- edge so that we know if have xor or inverted xor. */
- edge true_edge, false_edge;
- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
-
- /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
- will need to invert the result. Similarly for LT_EXPR if
- the false edge goes to OTHER_BLOCK. */
- edge e;
- if (cond_code == GE_EXPR)
- e = true_edge;
- else
- e = false_edge;
-
- bool invert = e->dest == middle_bb;
-
- result = duplicate_ssa_name (result, NULL);
-
- gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
-
- int prec = TYPE_PRECISION (TREE_TYPE (clhs));
- gimple *new_stmt
- = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
- build_int_cst (integer_type_node, prec - 1));
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
-
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
- {
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
- NOP_EXPR, gimple_assign_lhs (new_stmt));
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
- }
- lhs = gimple_assign_lhs (new_stmt);
-
- if (invert)
- {
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
- BIT_NOT_EXPR, rhs);
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
- rhs = gimple_assign_lhs (new_stmt);
- }
-
- new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
-
- replace_phi_edge_with_variable (cond_bb, e1, phi, result);
-
- /* Note that we optimized this PHI. */
- return true;
-}
-
/* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track
--
2.27.0.windows.1

View File

@ -1,103 +0,0 @@
From 0d55d24aa4e47c40f74e0281d023089cfaafcf74 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 6 May 2021 14:05:06 +0200
Subject: [PATCH 16/35] [Backport] phiopt: Use gphi *phi instead of gimple *phi
some more
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cfd65e8d5299a7cf7d2ecd92b0e24ea4cfb697d9
Various functions in phiopt are also called with a gphi * but use
gimple * argument for it.
2021-05-06 Jakub Jelinek <jakub@redhat.com>
* tree-ssa-phiopt.c (value_replacement, minmax_replacement,
abs_replacement, xor_replacement,
cond_removal_in_popcount_clz_ctz_pattern,
replace_phi_edge_with_variable): Change type of phi argument from
gimple * to gphi *.
---
gcc/tree-ssa-phiopt.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 51a2d3684..045a7b1b8 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -59,21 +59,21 @@ static bool match_simplify_replacement (basic_block, basic_block,
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
gimple *);
static int value_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+ edge, edge, gphi *, tree, tree);
static bool minmax_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+ edge, edge, gphi *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+ edge, edge, gphi *, tree, tree);
static bool spaceship_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
- edge, edge, gimple *,
+ edge, edge, gphi *,
tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
static hash_set<tree> * get_non_trapping ();
-static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
+static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree);
static void hoist_adjacent_loads (basic_block, basic_block,
basic_block, basic_block);
static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
@@ -389,7 +389,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
static void
replace_phi_edge_with_variable (basic_block cond_block,
- edge e, gimple *phi, tree new_tree)
+ edge e, gphi *phi, tree new_tree)
{
basic_block bb = gimple_bb (phi);
basic_block block_to_remove;
@@ -1129,8 +1129,7 @@ absorbing_element_p (tree_code code, tree arg, bool right, tree rval)
static int
value_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0, edge e1, gimple *phi,
- tree arg0, tree arg1)
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
{
gimple_stmt_iterator gsi;
gimple *cond;
@@ -1438,8 +1437,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
static bool
minmax_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0, edge e1, gimple *phi,
- tree arg0, tree arg1)
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
{
tree result, type, rhs;
gcond *cond;
@@ -2240,7 +2238,7 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
static bool
cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
basic_block middle_bb,
- edge e1, edge e2, gimple *phi,
+ edge e1, edge e2, gphi *phi,
tree arg0, tree arg1)
{
gimple *cond;
@@ -2398,7 +2396,7 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
static bool
abs_replacement (basic_block cond_bb, basic_block middle_bb,
edge e0 ATTRIBUTE_UNUSED, edge e1,
- gimple *phi, tree arg0, tree arg1)
+ gphi *phi, tree arg0, tree arg1)
{
tree result;
gassign *new_stmt;
--
2.27.0.windows.1

View File

@ -1,212 +0,0 @@
From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001
From: Roger Sayle <roger@nextmovesoftware.com>
Date: Mon, 2 Aug 2021 13:27:53 +0100
Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f
Many thanks again to Jakub Jelinek for a speedy fix for PR 101642.
Interestingly, that test case "bswap16(x) ? : x" also reveals a
missed optimization opportunity. The resulting "x ? bswap(x) : 0"
can be further simplified to just bswap(x).
Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the
related "x ? popcount(x) : 0", so this patch simply makes that
transformation make general, additionally handling bswap, parity,
ffs and clrsb. All of the required infrastructure is already
present thanks to Jakub previously adding support for clz/ctz.
To reflect this generalization, the name of the function is changed
from cond_removal_in_popcount_clz_ctz_pattern to the hopefully
equally descriptive cond_removal_in_builtin_zero_pattern.
2021-08-02 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern):
Renamed from cond_removal_in_popcount_clz_ctz_pattern.
Add support for BSWAP, FFS, PARITY and CLRSB builtins.
(tree_ssa_phiop_worker): Update call to function above.
gcc/testsuite/ChangeLog
* gcc.dg/tree-ssa/phi-opt-25.c: New test case.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++
gcc/tree-ssa-phiopt.c | 37 +++++++---
2 files changed, 109 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
new file mode 100644
index 000000000..c52c92e1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
@@ -0,0 +1,83 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned short test_bswap16(unsigned short x)
+{
+ return x ? __builtin_bswap16(x) : 0;
+}
+
+unsigned int test_bswap32(unsigned int x)
+{
+ return x ? __builtin_bswap32(x) : 0;
+}
+
+unsigned long long test_bswap64(unsigned long long x)
+{
+ return x ? __builtin_bswap64(x) : 0;
+}
+
+int test_clrsb(int x)
+{
+ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1);
+}
+
+int test_clrsbl(long x)
+{
+ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1);
+}
+
+int test_clrsbll(long long x)
+{
+ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1);
+}
+
+#if 0
+/* BUILT_IN_FFS is transformed by match.pd */
+int test_ffs(unsigned int x)
+{
+ return x ? __builtin_ffs(x) : 0;
+}
+
+int test_ffsl(unsigned long x)
+{
+ return x ? __builtin_ffsl(x) : 0;
+}
+
+int test_ffsll(unsigned long long x)
+{
+ return x ? __builtin_ffsll(x) : 0;
+}
+#endif
+
+int test_parity(int x)
+{
+ return x ? __builtin_parity(x) : 0;
+}
+
+int test_parityl(long x)
+{
+ return x ? __builtin_parityl(x) : 0;
+}
+
+int test_parityll(long long x)
+{
+ return x ? __builtin_parityll(x) : 0;
+}
+
+int test_popcount(int x)
+{
+ return x ? __builtin_popcount(x) : 0;
+}
+
+int test_popcountl(long x)
+{
+ return x ? __builtin_popcountl(x) : 0;
+}
+
+int test_popcountll(long long x)
+{
+ return x ? __builtin_popcountll(x) : 0;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
+
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 045a7b1b8..21ac08145 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static bool spaceship_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
- edge, edge, gphi *,
- tree, tree);
+static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block,
+ edge, edge, gphi *,
+ tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
- e2, phi, arg0,
- arg1))
+ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
+ phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
-/* Convert
+/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0).
+ Convert
<bb 2>
if (b_4(D) != 0)
@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
instead of 0 above it uses the value from that macro. */
static bool
-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
- basic_block middle_bb,
- edge e1, edge e2, gphi *phi,
- tree arg0, tree arg1)
+cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
+ basic_block middle_bb,
+ edge e1, edge e2, gphi *phi,
+ tree arg0, tree arg1)
{
gimple *cond;
gimple_stmt_iterator gsi, gsi_from;
@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
int val = 0;
switch (cfn)
{
+ case CFN_BUILT_IN_BSWAP16:
+ case CFN_BUILT_IN_BSWAP32:
+ case CFN_BUILT_IN_BSWAP64:
+ case CFN_BUILT_IN_BSWAP128:
+ CASE_CFN_FFS:
+ CASE_CFN_PARITY:
CASE_CFN_POPCOUNT:
break;
CASE_CFN_CLZ:
@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
}
}
return false;
+ case BUILT_IN_CLRSB:
+ val = TYPE_PRECISION (integer_type_node) - 1;
+ break;
+ case BUILT_IN_CLRSBL:
+ val = TYPE_PRECISION (long_integer_type_node) - 1;
+ break;
+ case BUILT_IN_CLRSBLL:
+ val = TYPE_PRECISION (long_long_integer_type_node) - 1;
+ break;
default:
return false;
}
--
2.27.0.windows.1

View File

@ -1,251 +0,0 @@
From 77398954ce517aa011b7a254c7aa2858521b2093 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Mon, 15 Nov 2021 15:19:36 +0100
Subject: [PATCH 18/35] [Backport] tree-optimization/102880 - make PHI-OPT
recognize more CFGs
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f98f373dd822b35c52356b753d528924e9f89678
This allows extra edges into the middle BB for the PHI-OPT
transforms using replace_phi_edge_with_variable that do not
end up moving stmts from that middle BB. This avoids regressing
gcc.dg/tree-ssa/ssa-hoist-4.c with the actual fix for PR102880
where CFG cleanup has the choice to remove two forwarders and
picks "the wrong" leading to
if (a > b) /
/\ /
/ <BB>
/ |
# PHI <a, b>
rather than
if (a > b) |
/\ |
<BB> \ |
/ \ |
# PHI <a, b, b>
but it's relatively straight-forward to support extra edges
into the middle-BB in paths ending in replace_phi_edge_with_variable
and that do not require moving stmts. That's because we really
only want to remove the edge from the condition to the middle BB.
Of course actually doing that means updating dominators in non-trival
ways which is why I kept the original code for the single edge
case and simply defer to CFG cleanup by adjusting the condition for
the complicated case.
The testcase needs to be a GIMPLE one since it's quite unreliable
to produce the desired CFG.
2021-11-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/102880
* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Push
single_pred (bb1) condition to places that really need it.
(match_simplify_replacement): Likewise.
(value_replacement): Likewise.
(replace_phi_edge_with_variable): Deal with extra edges
into the middle BB.
* gcc.dg/tree-ssa/phi-opt-26.c: New testcase.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 +++++++++
gcc/tree-ssa-phiopt.c | 73 +++++++++++++---------
2 files changed, 75 insertions(+), 29 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
new file mode 100644
index 000000000..21aa66e38
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
+
+int __GIMPLE (ssa,startwith("phiopt"))
+foo (int a, int b, int flag)
+{
+ int res;
+
+ __BB(2):
+ if (flag_2(D) != 0)
+ goto __BB6;
+ else
+ goto __BB4;
+
+ __BB(4):
+ if (a_3(D) > b_4(D))
+ goto __BB7;
+ else
+ goto __BB6;
+
+ __BB(6):
+ goto __BB7;
+
+ __BB(7):
+ res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
+ return res_1;
+}
+
+/* We should be able to detect MAX despite the extra edge into
+ the middle BB. */
+/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 21ac08145..079d29e74 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -219,7 +219,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
if (EDGE_COUNT (bb1->succs) == 0
- || bb2 == NULL
|| EDGE_COUNT (bb2->succs) == 0)
continue;
@@ -279,14 +278,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|| (e1->flags & EDGE_FALLTHRU) == 0)
continue;
- /* Also make sure that bb1 only have one predecessor and that it
- is bb. */
- if (!single_pred_p (bb1)
- || single_pred (bb1) != bb)
- continue;
-
if (do_store_elim)
{
+ /* Also make sure that bb1 only have one predecessor and that it
+ is bb. */
+ if (!single_pred_p (bb1)
+ || single_pred (bb1) != bb)
+ continue;
+
/* bb1 is the middle block, bb2 the join block, bb the split block,
e1 the fallthrough edge from bb1 to bb2. We can't do the
optimization if the join block has more than two predecessors. */
@@ -331,10 +330,11 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
node. */
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
- gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
- arg0, arg1,
- cond_stmt);
- if (newphi != NULL)
+ gphi *newphi;
+ if (single_pred_p (bb1)
+ && (newphi = factor_out_conditional_conversion (e1, e2, phi,
+ arg0, arg1,
+ cond_stmt)))
{
phi = newphi;
/* factor_out_conditional_conversion may create a new PHI in
@@ -355,12 +355,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
+ && single_pred_p (bb1)
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ else if (single_pred_p (bb1)
+ && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
}
}
@@ -391,35 +393,41 @@ replace_phi_edge_with_variable (basic_block cond_block,
edge e, gphi *phi, tree new_tree)
{
basic_block bb = gimple_bb (phi);
- basic_block block_to_remove;
gimple_stmt_iterator gsi;
/* Change the PHI argument to new. */
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
/* Remove the empty basic block. */
+ edge edge_to_remove;
if (EDGE_SUCC (cond_block, 0)->dest == bb)
+ edge_to_remove = EDGE_SUCC (cond_block, 1);
+ else
+ edge_to_remove = EDGE_SUCC (cond_block, 0);
+ if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
{
- EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
- EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
-
- block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
+ e->flags |= EDGE_FALLTHRU;
+ e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+ e->probability = profile_probability::always ();
+ delete_basic_block (edge_to_remove->dest);
+
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
+ gsi = gsi_last_bb (cond_block);
+ gsi_remove (&gsi, true);
}
else
{
- EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
- EDGE_SUCC (cond_block, 1)->flags
- &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
-
- block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
+ /* If there are other edges into the middle block make
+ CFG cleanup deal with the edge removal to avoid
+ updating dominators here in a non-trivial way. */
+ gcond *cond = as_a <gcond *> (last_stmt (cond_block));
+ if (edge_to_remove->flags & EDGE_TRUE_VALUE)
+ gimple_cond_make_false (cond);
+ else
+ gimple_cond_make_true (cond);
}
- delete_basic_block (block_to_remove);
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
- gsi = gsi_last_bb (cond_block);
- gsi_remove (&gsi, true);
+ statistics_counter_event (cfun, "Replace PHI with variable", 1);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -846,6 +854,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
allow it and move it once the transformation is done. */
if (!empty_block_p (middle_bb))
{
+ if (!single_pred_p (middle_bb))
+ return false;
+
stmt_to_move = last_and_only_stmt (middle_bb);
if (!stmt_to_move)
return false;
@@ -1225,6 +1236,11 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
else
{
+ if (!single_pred_p (middle_bb))
+ return 0;
+ statistics_counter_event (cfun, "Replace PHI with "
+ "variable/value_replacement", 1);
+
/* Replace the PHI arguments with arg. */
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
@@ -1239,7 +1255,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
return 1;
}
-
}
/* Now optimize (x != 0) ? x + y : y to just x + y. */
--
2.27.0.windows.1

View File

@ -1,250 +0,0 @@
From a2f5e6f38fe7b5b32a252643b00dd2d7ab0e3fac Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 May 2020 09:01:10 +0100
Subject: [PATCH 19/35] [Backport] tree: Add vector_element_bits(_tree)
[PR94980 1/3]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d17a896da1e898928d337596d029f0ece0039d55
A lot of code that wants to know the number of bits in a vector
element gets that information from the element's TYPE_SIZE,
which is always equal to TYPE_SIZE_UNIT * BITS_PER_UNIT.
This doesn't work for SVE and AVX512-style packed boolean vectors,
where several elements can occupy a single byte.
This patch introduces a new pair of helpers for getting the true
(possibly sub-byte) size. I made a token attempt to convert obvious
element size calculations, but I'm sure I missed some.
2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/94980
* tree.h (vector_element_bits, vector_element_bits_tree): Declare.
* tree.c (vector_element_bits, vector_element_bits_tree): New.
* match.pd: Use the new functions instead of determining the
vector element size directly from TYPE_SIZE(_UNIT).
* tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
* tree-vect-stmts.c (vect_is_simple_cond): Likewise.
* tree-vect-generic.c (expand_vector_piecewise): Likewise.
(expand_vector_conversion): Likewise.
(expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
a divisor. Convert the dividend to bits to compensate.
* tree-vect-loop.c (vectorizable_live_operation): Call
vector_element_bits instead of open-coding it.
---
gcc/ChangeLog | 17 +++++++++++++++++
gcc/match.pd | 2 +-
gcc/tree-vect-data-refs.c | 2 +-
gcc/tree-vect-generic.c | 19 +++++++------------
gcc/tree-vect-loop.c | 4 +---
gcc/tree-vect-patterns.c | 3 +--
gcc/tree-vect-stmts.c | 3 +--
gcc/tree.c | 24 ++++++++++++++++++++++++
gcc/tree.h | 2 ++
9 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3b1384e70..07aea9b86 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR tree-optimization/94980
+ * tree.h (vector_element_bits, vector_element_bits_tree): Declare.
+ * tree.c (vector_element_bits, vector_element_bits_tree): New.
+ * match.pd: Use the new functions instead of determining the
+ vector element size directly from TYPE_SIZE(_UNIT).
+ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
+ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
+ * tree-vect-stmts.c (vect_is_simple_cond): Likewise.
+ * tree-vect-generic.c (expand_vector_piecewise): Likewise.
+ (expand_vector_conversion): Likewise.
+ (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
+ a divisor. Convert the dividend to bits to compensate.
+ * tree-vect-loop.c (vectorizable_live_operation): Call
+ vector_element_bits instead of open-coding it.
+
2021-04-08 Release Manager
* GCC 10.3.0 released.
diff --git a/gcc/match.pd b/gcc/match.pd
index 5899eea95..79a0228d2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6236,7 +6236,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
(if (ins)
(bit_insert { op0; } { ins; }
- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
+ { bitsize_int (at * vector_element_bits (type)); })
(if (changed)
(vec_perm { op0; } { op1; } { op2; }))))))))))
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index d78b06455..e4466a4f3 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -3709,7 +3709,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree *offset_vectype_out)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+ unsigned int element_bits = vector_element_bits (vectype);
if (element_bits != memory_bits)
/* For now the vector elements must be the same width as the
memory elements. */
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index c10492034..37c3956a4 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -276,8 +276,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
tree part_width = TYPE_SIZE (inner_type);
tree index = bitsize_int (0);
int nunits = nunits_for_known_piecewise_op (type);
- int delta = tree_to_uhwi (part_width)
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
+ int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
int i;
location_t loc = gimple_location (gsi_stmt (*gsi));
@@ -357,8 +356,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi,
elem_op_func f, elem_op_func f_parallel,
tree type, tree a, tree b, enum tree_code code)
{
- int parts_per_word = UNITS_PER_WORD
- / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+ int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
if (INTEGRAL_TYPE_P (TREE_TYPE (type))
&& parts_per_word >= 4
@@ -1733,19 +1731,17 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
optab optab1 = unknown_optab;
gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
code = FIX_TRUNC_EXPR;
else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
code = FLOAT_EXPR;
- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
- < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
+ unsigned int ret_elt_bits = vector_element_bits (ret_type);
+ unsigned int arg_elt_bits = vector_element_bits (arg_type);
+ if (ret_elt_bits < arg_elt_bits)
modifier = NARROW;
- else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
- > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
+ else if (ret_elt_bits > arg_elt_bits)
modifier = WIDEN;
if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
@@ -1908,8 +1904,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
tree part_width = TYPE_SIZE (compute_type);
tree index = bitsize_int (0);
int nunits = nunits_for_known_piecewise_op (arg_type);
- int delta = tree_to_uhwi (part_width)
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
+ int delta = tree_to_uhwi (part_width) / arg_elt_bits;
int i;
location_t loc = gimple_location (gsi_stmt (*gsi));
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 899b56087..7990e31de 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -8059,9 +8059,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
: gimple_get_lhs (stmt);
lhs_type = TREE_TYPE (lhs);
- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype)
- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype)))
- : TYPE_SIZE (TREE_TYPE (vectype)));
+ bitsize = vector_element_bits_tree (vectype);
vec_bitsize = TYPE_SIZE (vectype);
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 84d7ddb17..b076740ef 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4406,8 +4406,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|| dt == vect_constant_def))
{
tree wide_scalar_type = build_nonstandard_integer_type
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
- TYPE_UNSIGNED (rhs1_type));
+ (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
tree vectype3 = get_vectype_for_scalar_type (vinfo,
wide_scalar_type);
if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 4636b7ba2..0bdf9a547 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10717,8 +10717,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
&& tree_int_cst_lt (TYPE_SIZE (scalar_type),
TYPE_SIZE (TREE_TYPE (vectype))))
scalar_type = build_nonstandard_integer_type
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
- TYPE_UNSIGNED (scalar_type));
+ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
*comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
slp_node);
}
diff --git a/gcc/tree.c b/gcc/tree.c
index 3e6647ae0..9a0cedf10 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -13892,6 +13892,30 @@ vector_type_mode (const_tree t)
return mode;
}
+/* Return the size in bits of each element of vector type TYPE. */
+
+unsigned int
+vector_element_bits (const_tree type)
+{
+ gcc_checking_assert (VECTOR_TYPE_P (type));
+ if (VECTOR_BOOLEAN_TYPE_P (type))
+ return vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (type)),
+ TYPE_VECTOR_SUBPARTS (type));
+ return tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
+}
+
+/* Calculate the size in bits of each element of vector type TYPE
+ and return the result as a tree of type bitsizetype. */
+
+tree
+vector_element_bits_tree (const_tree type)
+{
+ gcc_checking_assert (VECTOR_TYPE_P (type));
+ if (VECTOR_BOOLEAN_TYPE_P (type))
+ return bitsize_int (vector_element_bits (type));
+ return TYPE_SIZE (TREE_TYPE (type));
+}
+
/* Verify that basic properties of T match TV and thus T can be a variant of
TV. TV should be the more specified variant (i.e. the main variant). */
diff --git a/gcc/tree.h b/gcc/tree.h
index bddc6e528..c66207fa0 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1996,6 +1996,8 @@ class auto_suppress_location_wrappers
extern machine_mode element_mode (const_tree);
extern machine_mode vector_type_mode (const_tree);
+extern unsigned int vector_element_bits (const_tree);
+extern tree vector_element_bits_tree (const_tree);
/* The "canonical" type for this type node, which is used by frontends to
compare the type for equality with another type. If two types are
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,379 +0,0 @@
From 21d265af074726b166e08301a2f847c474fcb680 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 30 Nov 2021 09:52:24 +0000
Subject: [PATCH 21/35] [Backport] gimple-match: Add a gimple_extract_op
function
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=33973fa754de1f95d459bfca66c0d80deec36537
code_helper and gimple_match_op seem like generally useful ways
of summing up a gimple_assign or gimple_call (or gimple_cond).
This patch adds a gimple_extract_op function that can be used
for that.
gcc/
* gimple-match.h (code_helper): Add functions for querying whether
the code represents an internal_fn or a built_in_function.
Provide explicit conversion operators for both cases.
(gimple_extract_op): Declare.
* gimple-match-head.c (gimple_extract): New function, extracted from...
(gimple_simplify): ...here.
(gimple_extract_op): New function.
---
gcc/gimple-match-head.c | 219 ++++++++++++++++++++--------------------
gcc/gimple-match.h | 27 +++++
2 files changed, 135 insertions(+), 111 deletions(-)
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 9b3e7298d..c1dea1734 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -884,12 +884,20 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
return true;
}
-/* The main STMT based simplification entry. It is used by the fold_stmt
- and the fold_stmt_to_constant APIs. */
+/* Common subroutine of gimple_extract_op and gimple_simplify. Try to
+ describe STMT in RES_OP, returning true on success. Before recording
+ an operand, call:
-bool
-gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
- tree (*valueize)(tree), tree (*top_valueize)(tree))
+ - VALUEIZE_CONDITION for a COND_EXPR condition
+ - VALUEIZE_OP for every other top-level operand
+
+ Both routines take a tree argument and returns a tree. */
+
+template<typename ValueizeOp, typename ValueizeCondition>
+inline bool
+gimple_extract (gimple *stmt, gimple_match_op *res_op,
+ ValueizeOp valueize_op,
+ ValueizeCondition valueize_condition)
{
switch (gimple_code (stmt))
{
@@ -905,101 +913,50 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|| code == VIEW_CONVERT_EXPR)
{
tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
- bool valueized = false;
- op0 = do_valueize (op0, top_valueize, valueized);
- res_op->set_op (code, type, op0);
- return (gimple_resimplify1 (seq, res_op, valueize)
- || valueized);
+ res_op->set_op (code, type, valueize_op (op0));
+ return true;
}
else if (code == BIT_FIELD_REF)
{
tree rhs1 = gimple_assign_rhs1 (stmt);
- tree op0 = TREE_OPERAND (rhs1, 0);
- bool valueized = false;
- op0 = do_valueize (op0, top_valueize, valueized);
+ tree op0 = valueize_op (TREE_OPERAND (rhs1, 0));
res_op->set_op (code, type, op0,
TREE_OPERAND (rhs1, 1),
TREE_OPERAND (rhs1, 2),
REF_REVERSE_STORAGE_ORDER (rhs1));
- if (res_op->reverse)
- return valueized;
- return (gimple_resimplify3 (seq, res_op, valueize)
- || valueized);
+ return true;
}
- else if (code == SSA_NAME
- && top_valueize)
+ else if (code == SSA_NAME)
{
tree op0 = gimple_assign_rhs1 (stmt);
- tree valueized = top_valueize (op0);
- if (!valueized || op0 == valueized)
- return false;
- res_op->set_op (TREE_CODE (op0), type, valueized);
+ res_op->set_op (TREE_CODE (op0), type, valueize_op (op0));
return true;
}
break;
case GIMPLE_UNARY_RHS:
{
tree rhs1 = gimple_assign_rhs1 (stmt);
- bool valueized = false;
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
- res_op->set_op (code, type, rhs1);
- return (gimple_resimplify1 (seq, res_op, valueize)
- || valueized);
+ res_op->set_op (code, type, valueize_op (rhs1));
+ return true;
}
case GIMPLE_BINARY_RHS:
{
- tree rhs1 = gimple_assign_rhs1 (stmt);
- tree rhs2 = gimple_assign_rhs2 (stmt);
- bool valueized = false;
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
+ tree rhs1 = valueize_op (gimple_assign_rhs1 (stmt));
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
res_op->set_op (code, type, rhs1, rhs2);
- return (gimple_resimplify2 (seq, res_op, valueize)
- || valueized);
+ return true;
}
case GIMPLE_TERNARY_RHS:
{
- bool valueized = false;
tree rhs1 = gimple_assign_rhs1 (stmt);
- /* If this is a [VEC_]COND_EXPR first try to simplify an
- embedded GENERIC condition. */
- if (code == COND_EXPR
- || code == VEC_COND_EXPR)
- {
- if (COMPARISON_CLASS_P (rhs1))
- {
- tree lhs = TREE_OPERAND (rhs1, 0);
- tree rhs = TREE_OPERAND (rhs1, 1);
- lhs = do_valueize (lhs, top_valueize, valueized);
- rhs = do_valueize (rhs, top_valueize, valueized);
- gimple_match_op res_op2 (res_op->cond, TREE_CODE (rhs1),
- TREE_TYPE (rhs1), lhs, rhs);
- if ((gimple_resimplify2 (seq, &res_op2, valueize)
- || valueized)
- && res_op2.code.is_tree_code ())
- {
- valueized = true;
- if (TREE_CODE_CLASS ((enum tree_code) res_op2.code)
- == tcc_comparison)
- rhs1 = build2 (res_op2.code, TREE_TYPE (rhs1),
- res_op2.ops[0], res_op2.ops[1]);
- else if (res_op2.code == SSA_NAME
- || res_op2.code == INTEGER_CST
- || res_op2.code == VECTOR_CST)
- rhs1 = res_op2.ops[0];
- else
- valueized = false;
- }
- }
- }
- tree rhs2 = gimple_assign_rhs2 (stmt);
- tree rhs3 = gimple_assign_rhs3 (stmt);
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
- rhs3 = do_valueize (rhs3, top_valueize, valueized);
+ if (code == COND_EXPR && COMPARISON_CLASS_P (rhs1))
+ rhs1 = valueize_condition (rhs1);
+ else
+ rhs1 = valueize_op (rhs1);
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
+ tree rhs3 = valueize_op (gimple_assign_rhs3 (stmt));
res_op->set_op (code, type, rhs1, rhs2, rhs3);
- return (gimple_resimplify3 (seq, res_op, valueize)
- || valueized);
+ return true;
}
default:
gcc_unreachable ();
@@ -1013,7 +970,6 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
&& gimple_call_num_args (stmt) >= 1
&& gimple_call_num_args (stmt) <= 5)
{
- bool valueized = false;
combined_fn cfn;
if (gimple_call_internal_p (stmt))
cfn = as_combined_fn (gimple_call_internal_fn (stmt));
@@ -1023,7 +979,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
if (!fn)
return false;
- fn = do_valueize (fn, top_valueize, valueized);
+ fn = valueize_op (fn);
if (TREE_CODE (fn) != ADDR_EXPR
|| TREE_CODE (TREE_OPERAND (fn, 0)) != FUNCTION_DECL)
return false;
@@ -1039,47 +995,17 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
unsigned int num_args = gimple_call_num_args (stmt);
res_op->set_op (cfn, TREE_TYPE (gimple_call_lhs (stmt)), num_args);
for (unsigned i = 0; i < num_args; ++i)
- {
- tree arg = gimple_call_arg (stmt, i);
- res_op->ops[i] = do_valueize (arg, top_valueize, valueized);
- }
- if (internal_fn_p (cfn)
- && try_conditional_simplification (as_internal_fn (cfn),
- res_op, seq, valueize))
- return true;
- switch (num_args)
- {
- case 1:
- return (gimple_resimplify1 (seq, res_op, valueize)
- || valueized);
- case 2:
- return (gimple_resimplify2 (seq, res_op, valueize)
- || valueized);
- case 3:
- return (gimple_resimplify3 (seq, res_op, valueize)
- || valueized);
- case 4:
- return (gimple_resimplify4 (seq, res_op, valueize)
- || valueized);
- case 5:
- return (gimple_resimplify5 (seq, res_op, valueize)
- || valueized);
- default:
- gcc_unreachable ();
- }
+ res_op->ops[i] = valueize_op (gimple_call_arg (stmt, i));
+ return true;
}
break;
case GIMPLE_COND:
{
- tree lhs = gimple_cond_lhs (stmt);
- tree rhs = gimple_cond_rhs (stmt);
- bool valueized = false;
- lhs = do_valueize (lhs, top_valueize, valueized);
- rhs = do_valueize (rhs, top_valueize, valueized);
+ tree lhs = valueize_op (gimple_cond_lhs (stmt));
+ tree rhs = valueize_op (gimple_cond_rhs (stmt));
res_op->set_op (gimple_cond_code (stmt), boolean_type_node, lhs, rhs);
- return (gimple_resimplify2 (seq, res_op, valueize)
- || valueized);
+ return true;
}
default:
@@ -1089,6 +1015,77 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
return false;
}
+/* Try to describe STMT in RES_OP, returning true on success.
+ For GIMPLE_CONDs, describe the condition that is being tested.
+ For GIMPLE_ASSIGNs, describe the rhs of the assignment.
+ For GIMPLE_CALLs, describe the call. */
+
+bool
+gimple_extract_op (gimple *stmt, gimple_match_op *res_op)
+{
+ auto nop = [](tree op) { return op; };
+ return gimple_extract (stmt, res_op, nop, nop);
+}
+
+/* The main STMT based simplification entry. It is used by the fold_stmt
+ and the fold_stmt_to_constant APIs. */
+
+bool
+gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
+ tree (*valueize)(tree), tree (*top_valueize)(tree))
+{
+ bool valueized = false;
+ auto valueize_op = [&](tree op)
+ {
+ return do_valueize (op, top_valueize, valueized);
+ };
+ auto valueize_condition = [&](tree op) -> tree
+ {
+ bool cond_valueized = false;
+ tree lhs = do_valueize (TREE_OPERAND (op, 0), top_valueize,
+ cond_valueized);
+ tree rhs = do_valueize (TREE_OPERAND (op, 1), top_valueize,
+ cond_valueized);
+ gimple_match_op res_op2 (res_op->cond, TREE_CODE (op),
+ TREE_TYPE (op), lhs, rhs);
+ if ((gimple_resimplify2 (seq, &res_op2, valueize)
+ || cond_valueized)
+ && res_op2.code.is_tree_code ())
+ {
+ if (TREE_CODE_CLASS ((tree_code) res_op2.code) == tcc_comparison)
+ {
+ valueized = true;
+ return build2 (res_op2.code, TREE_TYPE (op),
+ res_op2.ops[0], res_op2.ops[1]);
+ }
+ else if (res_op2.code == SSA_NAME
+ || res_op2.code == INTEGER_CST
+ || res_op2.code == VECTOR_CST)
+ {
+ valueized = true;
+ return res_op2.ops[0];
+ }
+ }
+ return valueize_op (op);
+ };
+
+ if (!gimple_extract (stmt, res_op, valueize_op, valueize_condition))
+ return false;
+
+ if (res_op->code.is_internal_fn ())
+ {
+ internal_fn ifn = internal_fn (res_op->code);
+ if (try_conditional_simplification (ifn, res_op, seq, valueize))
+ return true;
+ }
+
+ if (!res_op->reverse
+ && res_op->num_ops
+ && res_op->resimplify (seq, valueize))
+ return true;
+
+ return valueized;
+}
/* Helper for the autogenerated code, valueize OP. */
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
index 097898aed..39858c45f 100644
--- a/gcc/gimple-match.h
+++ b/gcc/gimple-match.h
@@ -33,13 +33,39 @@ public:
code_helper (combined_fn fn) : rep (-(int) fn) {}
operator tree_code () const { return (tree_code) rep; }
operator combined_fn () const { return (combined_fn) -rep; }
+ explicit operator internal_fn () const;
+ explicit operator built_in_function () const;
bool is_tree_code () const { return rep > 0; }
bool is_fn_code () const { return rep < 0; }
+ bool is_internal_fn () const;
+ bool is_builtin_fn () const;
int get_rep () const { return rep; }
private:
int rep;
};
+inline code_helper::operator internal_fn () const
+{
+ return as_internal_fn (combined_fn (*this));
+}
+
+inline code_helper::operator built_in_function () const
+{
+ return as_builtin_fn (combined_fn (*this));
+}
+
+inline bool
+code_helper::is_internal_fn () const
+{
+ return is_fn_code () && internal_fn_p (combined_fn (*this));
+}
+
+inline bool
+code_helper::is_builtin_fn () const
+{
+ return is_fn_code () && builtin_fn_p (combined_fn (*this));
+}
+
/* Represents the condition under which an operation should happen,
and the value to use otherwise. The condition applies elementwise
(as for VEC_COND_EXPR) if the values are vectors. */
@@ -333,6 +359,7 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op)
extern tree (*mprts_hook) (gimple_match_op *);
+bool gimple_extract_op (gimple *, gimple_match_op *);
bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *,
tree (*)(tree), tree (*)(tree));
tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,31 +0,0 @@
From b57c55b282e7a9a7b2cc0d3843e58fd7998685e6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Fri, 4 Nov 2022 23:19:44 +0800
Subject: [PATCH 23/35] [PHIOPT] Disable the match A?CST1:0 when the CST1 is
negitive value
Fix the regression of gcc.target/aarch64/sve/vcond_3.c
gcc:
* match.pd (A?CST1:CST2): Disable the simplifcations A? (-CST1):0
---
gcc/match.pd | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 79a0228d2..fc1a34dd3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3347,7 +3347,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (integer_onep (@1))
(convert (convert:boolean_type_node @0)))
/* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (TREE_TYPE (@1))
+ && integer_pow2p (@1))
(with {
tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
}
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,89 +0,0 @@
From 9af03694082c462bee86c167c78717089a93a188 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Sat, 5 Nov 2022 13:22:33 +0800
Subject: [PATCH 25/35] [PHIOPT] Add A ? B op CST : B match and simplify
optimizations
Refer to commit b6bdd7a4, use pattern match to simple
A ? B op CST : B (where CST is power of 2) simplifications.
Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue.
gcc/
* match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B
gcc/testsuite/
* gcc.dg/pr107190.c: New test.
---
gcc/match.pd | 21 +++++++++++++++++++++
gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr107190.c
diff --git a/gcc/match.pd b/gcc/match.pd
index fc1a34dd3..5c5b5f89e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3383,6 +3383,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+(if (canonicalize_math_p ())
+/* These patterns are mostly used by PHIOPT to move some operations outside of
+ the if statements. They should be done late because it gives jump threading
+ and few other passes to reduce what is going on. */
+/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */
+ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate)
+ (simplify
+ (cond @0 (op:s @1 integer_pow2p@2) @1)
+ /* powerof2cst */
+ (if (INTEGRAL_TYPE_P (type))
+ (with {
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+ }
+ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; })))
+ )
+ )
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
new file mode 100644
index 000000000..235b2761a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr107190.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
+
+# define BN_BITS4 32
+# define BN_MASK2 (0xffffffffffffffffL)
+# define BN_MASK2l (0xffffffffL)
+# define BN_MASK2h (0xffffffff00000000L)
+# define BN_MASK2h1 (0xffffffff80000000L)
+# define LBITS(a) ((a)&BN_MASK2l)
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
+
+unsigned int test_m(unsigned long in0, unsigned long in1) {
+ unsigned long m, m1, lt, ht, bl, bh;
+ lt = LBITS(in0);
+ ht = HBITS(in0);
+ bl = LBITS(in1);
+ bh = HBITS(in1);
+ m = bh * lt;
+ m1 = bl * ht;
+ ht = bh * ht;
+ m = (m + m1) & BN_MASK2;
+ if (m < m1) ht += L2HBITS((unsigned long)1);
+ return ht + m;
+}
+
+/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */
--
2.27.0.windows.1

View File

@ -1,130 +0,0 @@
From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Wed, 9 Nov 2022 17:04:13 +0800
Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul
Merge the low part of series instructions into mul
gcc/
* match.pd: Add simplifcations for low part of mul
* common.opt: Add new option fmerge-mull enable with -O2
* opts.c: default_options_table
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: New test.
---
gcc/common.opt | 4 +++
gcc/match.pd | 27 ++++++++++++++++++++
gcc/opts.c | 1 +
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
diff --git a/gcc/common.opt b/gcc/common.opt
index ad147f7a9..6a7f66624 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2069,6 +2069,10 @@ fmerge-debug-strings
Common Report Var(flag_merge_debug_strings) Init(1)
Attempt to merge identical debug strings across compilation units.
+fmerge-mull
+Common Report Var(flag_merge_mull) Init(0) Optimization
+Attempt to merge series instructions into mul.
+
fmessage-length=
Common RejectNegative Joined UInteger
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
diff --git a/gcc/match.pd b/gcc/match.pd
index 5c5b5f89e..f6c5befd7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
+ simple IR. The following scenario should be matched:
+ In0Lo = In0(D) & 4294967295;
+ In0Hi = In0(D) >> 32;
+ In1Lo = In1(D) & 4294967295;
+ In1Hi = In1(D) >> 32;
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
+ addc32 = Addc << 32;
+ ResLo = In0Lo * In1Lo + addc32 */
+(simplify
+ (plus:c (mult @4 @5)
+ (lshift
+ (plus:c
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
+ INTEGER_CST@3
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (mult (convert:type @0) (convert:type @1))
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/opts.c b/gcc/opts.c
index f12b13599..751965e46 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
/* -O2 and above optimizations, but not -Os or -Og. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
new file mode 100644
index 000000000..2a3b74604
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+
+# define BN_BITS4 32
+# define BN_MASK2 (0xffffffffffffffffL)
+# define BN_MASK2l (0xffffffffL)
+# define BN_MASK2h (0xffffffff00000000L)
+# define BN_MASK2h1 (0xffffffff80000000L)
+# define LBITS(a) ((a)&BN_MASK2l)
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
+
+void mul64(unsigned long in0, unsigned long in1,
+ unsigned long &retLo, unsigned long &retHi) {
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
+ unsigned long Addc, addc32, low;
+ al = LBITS(in0);
+ ah = HBITS(in0);
+ bl = LBITS(in1);
+ bh = HBITS(in1);
+ m10 = bh * al;
+ m00 = bl * al;
+ m01 = bl * ah;
+ m11 = bh * ah;
+ Addc = (m10 + m01) & BN_MASK2;
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
+ m11 += HBITS(Addc);
+ addc32 = L2HBITS(Addc);
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
+ retLo = low;
+ retHi = m11;
+}
+
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
--
2.27.0.windows.1

View File

@ -1,105 +0,0 @@
From 315911bd3ae6f42366779e262ab76d9ed79359a0 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Fri, 11 Nov 2022 11:30:37 +0800
Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh
Merge the high part of series instructions into umulh
gcc/
* match.pd: Add simplifcations for high part of umulh
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
---
gcc/match.pd | 56 ++++++++++++++++++++++++++
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index f6c5befd7..433682afb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
+ the if statements. They should be done late because it gives jump threading
+ and few other passes to reduce what is going on. */
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
+ integers to one 128-bit integer. Try to match the high part of mul pattern
+ after the low part of mul pattern is simplified. The following scenario
+ should be matched:
+ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
+ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
+ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
+ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
+ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
+ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
+ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
+ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
+ addc32 = Addc << 32; -- lshift@10 @9 @3
+ ResLo = In0(D) * In1(D); -- mult @0 @1
+ ResHi = ((long unsigned int) (addc32 > ResLo)) +
+ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
+ } */
+(simplify
+ (plus:c
+ (plus:c
+ (convert
+ (gt (lshift@10 @9 @3)
+ (mult:c @0 @1)))
+ (lshift
+ (convert
+ (gt @8 @9))
+ @3))
+ (plus:c@11
+ (rshift
+ (plus:c@9
+ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
+ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
+ @3)
+ (mult:c (rshift@5 SSA_NAME@0 @3)
+ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (with {
+ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
+ tree shift = build_int_cst (integer_type_node, 64);
+ }
+ (convert:type (rshift
+ (mult (convert:i128_type @0)
+ (convert:i128_type @1))
+ { shift; })))
+ )
+)
+#endif
+
#if GIMPLE
/* These patterns are mostly used by FORWPROP1 to fold some operations into more
simple IR. The following scenario should be matched:
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
index 2a3b74604..f61cf5e6f 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
retHi = m11;
}
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
--
2.27.0.windows.1

View File

@ -1,38 +0,0 @@
From b669b4512e8425f4d752ef76bf61097cf40d9b35 Mon Sep 17 00:00:00 2001
From: zgat <1071107108@qq.com>
Date: Thu, 17 Nov 2022 02:55:48 +0000
Subject: [PATCH 28/35] [Struct Reorg] Fix speccpu2006 462 double free #I60YUV
modify gcc/tree.c. Normal operation speccpu 462 after modifed
Signed-off-by: zgat <1071107108@qq.com>
---
gcc/tree.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/gcc/tree.c b/gcc/tree.c
index 2a532d15a..a61788651 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -5224,8 +5224,7 @@ fld_simplified_type_name (tree type)
optimizations. */
if (flag_ipa_struct_reorg
&& lang_c_p ()
- && flag_lto_partition == LTO_PARTITION_ONE
- && (in_lto_p || flag_whole_program))
+ && flag_lto_partition == LTO_PARTITION_ONE)
return TYPE_NAME (type);
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
@@ -5471,8 +5470,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
optimizations. */
if (flag_ipa_struct_reorg
&& lang_c_p ()
- && flag_lto_partition == LTO_PARTITION_ONE
- && (in_lto_p || flag_whole_program))
+ && flag_lto_partition == LTO_PARTITION_ONE)
return t;
if (POINTER_TYPE_P (t))
return fld_incomplete_type_of (t, fld);
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,267 +0,0 @@
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Mon, 8 Aug 2022 09:13:53 +0800
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
loops For vectorized stores in loop, if all succeed loops immediately use the
data, transfer data using registers instead of load store to prevent overhead
from memory access.
---
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
2 files changed, 226 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
new file mode 100644
index 000000000..d8b29fbd5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static unsigned inline abs2 (unsigned a)
+{
+ unsigned s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
+{
+ unsigned tmp[4][4];
+ unsigned a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = a00[i] + a11[i];
+ int t1 = a00[i] - a11[i];
+ int t2 = a22[i] + a33[i];
+ int t3 = a22[i] - a33[i];
+ tmp[i][0] = t0 + t2;
+ tmp[i][2] = t0 - t2;
+ tmp[i][1] = t1 + t3;
+ tmp[i][3] = t1 - t3;
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = tmp[0][i] + tmp[1][i];
+ int t1 = tmp[0][i] - tmp[1][i];
+ int t2 = tmp[2][i] + tmp[3][i];
+ int t3 = tmp[2][i] - tmp[3][i];
+ a0 = t0 + t2;
+ a2 = t0 - t2;
+ a1 = t1 + t3;
+ a3 = t1 - t3;
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2c2197022..98b233718 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
return NULL_TREE;
}
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
+ BB in DFS. */
+
+static unsigned
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
+{
+ unsigned num = 0;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
+ && !gimple_has_volatile_ops (stmt))
+ {
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
+ {
+ stmts.safe_push (stmt);
+ num++;
+ }
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
+ num++;
+ }
+ }
+ return num;
+}
+
+static bool
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
+{
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
+ {
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
+ if (TREE_CODE (op1) != TREE_CODE (op2))
+ continue;
+ if (TREE_CODE (op1) == ADDR_EXPR)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ op2 = TREE_OPERAND (op2, 0);
+ }
+ enum tree_code code = TREE_CODE (op1);
+ switch (code)
+ {
+ case VAR_DECL:
+ if (DECL_NAME (op1) == DECL_NAME (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ case SSA_NAME:
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
+ Otherwise, set false to SUCCESS. */
+
+static void
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
+ stmt_vec_info stmt_info, bool &success)
+{
+ if (stmt_info == NULL)
+ {
+ success = false;
+ return;
+ }
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
+ {
+ success = false;
+ return;
+ }
+ unsigned ui = 0;
+ gimple *candidate = NULL;
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
+ {
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
+ continue;
+
+ if (candidate->bb != candidate->bb->loop_father->header)
+ {
+ success = false;
+ return;
+ }
+ auto_vec<data_reference_p> datarefs;
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
+ candidate->bb, &datarefs);
+ if (res == chrec_dont_know)
+ {
+ success = false;
+ return;
+ }
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
+ return;
+ }
+ success = false;
+}
+
+/* Deep first search from present BB. If succeedor has load STMTS,
+ stop further searching. */
+
+static void
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
+ bool &success, vec<basic_block> &visited_bbs)
+{
+ if (bb == cfun->cfg->x_exit_block_ptr)
+ {
+ success = false;
+ return;
+ }
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
+ return;
+
+ visited_bbs.safe_push (bb);
+ auto_vec<gimple *> stmts;
+ unsigned num = mem_refs_in_bb (bb, stmts);
+ /* Empty BB. */
+ if (num == 0)
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
+ if (!success)
+ return;
+ }
+ return;
+ }
+ /* Non-empty BB. */
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
+}
+
+/* For grouped store, if all succeedors of present BB have vectorized load
+ from same base of store. If so, set memory_access_type using
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
+
+static bool
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
+{
+ gimple *stmt = stmt_vinfo->stmt;
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ return false;
+
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
+ return false;
+
+ basic_block bb = stmt->bb;
+ bool success = true;
+ auto_vec<basic_block> visited_bbs;
+ visited_bbs.safe_push (bb);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
+ return success;
+}
+
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load
or store.
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
overrun_p = would_overrun_p;
}
+
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
+ loop_vinfo->vectorization_factor)
+ && conti_perm (stmt_info, loop_vinfo)
+ && (vls_type == VLS_LOAD
+ ? vect_grouped_load_supported (vectype, single_element_p,
+ group_size)
+ : vect_grouped_store_supported (vectype, group_size)))
+ {
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ overrun_p = would_overrun_p;
+ }
}
/* As a last resort, trying using a gather load or scatter store.
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,826 +0,0 @@
From ca2a541ed3425bec64f97fe277c6c02bf4f20049 Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Thu, 27 Oct 2022 10:26:34 +0800
Subject: [PATCH 33/35] [Loop-distribution] Insert temp arrays built from
isomorphic stmts Use option -ftree-slp-transpose-vectorize Build temp arrays
for isomorphic stmt and regard them as new seed_stmts for loop distribution.
---
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c | 67 +++
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c | 17 +
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c | 19 +
gcc/tree-loop-distribution.c | 577 +++++++++++++++++++-
4 files changed, 663 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
new file mode 100644
index 000000000..649463647
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
@@ -0,0 +1,67 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-do run { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static unsigned inline abs2 (unsigned a)
+{
+ unsigned s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
+{
+ unsigned tmp[4][4];
+ unsigned a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
+ {
+ a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
+ a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
+ a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
+ a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
+ int t0 = a0 + a1;
+ int t1 = a0 - a1;
+ int t2 = a2 + a3;
+ int t3 = a2 - a3;
+ tmp[i][0] = t0 + t2;
+ tmp[i][2] = t0 - t2;
+ tmp[i][1] = t1 + t3;
+ tmp[i][3] = t1 - t3;
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = tmp[0][i] + tmp[1][i];
+ int t1 = tmp[0][i] - tmp[1][i];
+ int t2 = tmp[2][i] + tmp[3][i];
+ int t3 = tmp[2][i] - tmp[3][i];
+ a0 = t0 + t2;
+ a2 = t0 - t2;
+ a1 = t1 + t3;
+ a3 = t1 - t3;
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
+}
+
+int main ()
+{
+ unsigned char oxa[128] = {0};
+ unsigned char oxb[128] = {0};
+ for (int i = 0; i < 128; i++)
+ {
+ oxa[i] += i * 3;
+ oxb[i] = i * 2;
+ }
+ int sum = foo (oxa, 16, oxb, 32);
+ if (sum != 736)
+ {
+ abort ();
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
new file mode 100644
index 000000000..1b50fd27d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
+
+unsigned a0[4], a1[4], a2[4], a3[4];
+
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
+{
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
+ {
+ a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
+ a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
+ a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
+ a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
new file mode 100644
index 000000000..94b992b05
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
+
+unsigned a0[4], a1[4], a2[4], a3[4];
+
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
+{
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
+ {
+ a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1;
+ a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2;
+ a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3;
+ a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
\ No newline at end of file
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index c08af6562..88b56379c 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see
| D(I) = A(I-1)*E
|ENDDO
+ If an unvectorizable loop has grouped loads, and calculations from grouped
+ loads are isomorphic, build temp arrays using stmts where isomorphic
+ calculations end. Afer distribution, the partition built from temp
+ arrays can be vectorized in pass SLP after loop unrolling. For example,
+
+ |DO I = 1, N
+ | A = FOO (ARG_1);
+ | B = FOO (ARG_2);
+ | C = BAR_0 (A);
+ | D = BAR_1 (B);
+ |ENDDO
+
+ is transformed to
+
+ |DO I = 1, N
+ | J = FOO (ARG_1);
+ | K = FOO (ARG_2);
+ | X[I] = J;
+ | Y[I] = K;
+ | A = X[I];
+ | B = Y[I];
+ | C = BAR_0 (A);
+ | D = BAR_1 (B);
+ |ENDDO
+
+ and is then distributed to
+
+ |DO I = 1, N
+ | J = FOO (ARG_1);
+ | K = FOO (ARG_2);
+ | X[I] = J;
+ | Y[I] = K;
+ |ENDDO
+
+ |DO I = 1, N
+ | A = X[I];
+ | B = Y[I];
+ | C = BAR_0 (A);
+ | D = BAR_1 (B);
+ |ENDDO
+
Loop distribution is the dual of loop fusion. It separates statements
of a loop (or loop nest) into multiple loops (or loop nests) with the
same loop header. The major goal is to separate statements which may
@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see
1) Seed partitions with specific type statements. For now we support
two types seed statements: statement defining variable used outside
- of loop; statement storing to memory.
+ of loop; statement storing to memory. Moreover, for unvectorizable
+ loops, we try to find isomorphic stmts from grouped load and build
+ temp arrays as new seed statements.
2) Build reduced dependence graph (RDG) for loop to be distributed.
The vertices (RDG:V) model all statements in the loop and the edges
(RDG:E) model flow and control dependencies between statements.
@@ -643,7 +686,8 @@ class loop_distribution
/* Returns true when PARTITION1 and PARTITION2 access the same memory
object in RDG. */
bool share_memory_accesses (struct graph *rdg,
- partition *partition1, partition *partition2);
+ partition *partition1, partition *partition2,
+ hash_set<tree> *excluded_arrays);
/* For each seed statement in STARTING_STMTS, this function builds
partition for it by adding depended statements according to RDG.
@@ -686,8 +730,9 @@ class loop_distribution
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
- void finalize_partitions (class loop *loop, vec<struct partition *>
- *partitions, vec<ddr_p> *alias_ddrs);
+ void finalize_partitions (class loop *loop,
+ vec<struct partition *> *partitions,
+ vec<ddr_p> *alias_ddrs, bitmap producers);
/* Analyze loop form and if it's vectorizable to decide if we need to
insert temp arrays to distribute it. */
@@ -701,6 +746,28 @@ class loop_distribution
inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
control_dependences *cd);
+
+ /* If loop is not distributed, remove inserted temp arrays. */
+ void remove_insertion (loop_p loop, struct graph *flow_only_rdg,
+ bitmap producers, struct partition *partition);
+
+ /* Insert temp arrays if isomorphic computation exists. Temp arrays will be
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
+ bool insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
+ hash_set<tree> *tmp_array_vars, bitmap producers);
+
+ void build_producers (loop_p loop, bitmap producers,
+ vec<gimple *> &transformed);
+
+ void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv,
+ bitmap cut_points, hash_set <tree> *tmp_array_vars,
+ bitmap producers);
+
+ /* Fuse PARTITIONS built from inserted temp arrays into one partition,
+ fuse the rest into another. */
+ void merge_remaining_partitions (vec<struct partition *> *partitions,
+ bitmap producers);
+
/* Distributes the code from LOOP in such a way that producer statements
are placed before consumer statements. Tries to separate only the
statements from STMTS into separate loops. Returns the number of
@@ -1913,7 +1980,8 @@ loop_distribution::classify_partition (loop_p loop,
bool
loop_distribution::share_memory_accesses (struct graph *rdg,
- partition *partition1, partition *partition2)
+ partition *partition1, partition *partition2,
+ hash_set <tree> *excluded_arrays)
{
unsigned i, j;
bitmap_iterator bi, bj;
@@ -1947,7 +2015,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg,
if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
&& operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
&& operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
- && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+ && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)
+ /* An exception, if PARTITION1 and PARTITION2 contain the
+ temp array we inserted, do not merge them. */
+ && !excluded_arrays->contains (DR_REF (dr1)))
return true;
}
}
@@ -2909,13 +2980,47 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
}
}
+void
+loop_distribution::merge_remaining_partitions
+ (vec<struct partition *> *partitions,
+ bitmap producers)
+{
+ struct partition *partition = NULL;
+ struct partition *p1 = NULL, *p2 = NULL;
+ for (unsigned i = 0; partitions->iterate (i, &partition); i++)
+ {
+ if (bitmap_intersect_p (producers, partition->stmts))
+ {
+ if (p1 == NULL)
+ {
+ p1 = partition;
+ continue;
+ }
+ partition_merge_into (NULL, p1, partition, FUSE_FINALIZE);
+ }
+ else
+ {
+ if (p2 == NULL)
+ {
+ p2 = partition;
+ continue;
+ }
+ partition_merge_into (NULL, p2, partition, FUSE_FINALIZE);
+ }
+ partitions->unordered_remove (i);
+ partition_free (partition);
+ i--;
+ }
+}
+
void
loop_distribution::finalize_partitions (class loop *loop,
vec<struct partition *> *partitions,
- vec<ddr_p> *alias_ddrs)
+ vec<ddr_p> *alias_ddrs,
+ bitmap producers)
{
unsigned i;
- struct partition *partition, *a;
+ struct partition *partition;
if (partitions->length () == 1
|| alias_ddrs->length () > 0)
@@ -2947,13 +3052,7 @@ loop_distribution::finalize_partitions (class loop *loop,
|| (loop->inner == NULL
&& i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
{
- a = (*partitions)[0];
- for (i = 1; partitions->iterate (i, &partition); ++i)
- {
- partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
- partition_free (partition);
- }
- partitions->truncate (1);
+ merge_remaining_partitions (partitions, producers);
}
/* Fuse memset builtins if possible. */
@@ -3758,6 +3857,404 @@ find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
return decide_stmts_by_profit (candi_stmts, stmts);
}
+/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index
+ and all indices are the same. */
+
+static tree
+find_index (vec<gimple *> seed_stmts)
+{
+ if (seed_stmts.length () == 0)
+ return NULL;
+ bool found_index = false;
+ tree index = NULL;
+ unsigned ui = 0;
+ for (ui = 0; ui < seed_stmts.length (); ui++)
+ {
+ if (!gimple_vdef (seed_stmts[ui]))
+ return NULL;
+ tree lhs = gimple_assign_lhs (seed_stmts[ui]);
+ unsigned num_index = 0;
+ while (TREE_CODE (lhs) == ARRAY_REF)
+ {
+ if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME)
+ {
+ num_index++;
+ if (num_index > 1)
+ return NULL;
+ if (index == NULL)
+ {
+ index = TREE_OPERAND (lhs, 1);
+ found_index = true;
+ }
+ else if (index != TREE_OPERAND (lhs, 1))
+ return NULL;
+ }
+ lhs = TREE_OPERAND (lhs, 0);
+ }
+ if (!found_index)
+ return NULL;
+ }
+ return index;
+}
+
+/* Check if expression of phi is an increament of a const. */
+
+static void
+check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc)
+{
+ struct graph_edge *e_phi;
+ for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next)
+ {
+ struct vertex *v_inc = &(rdg->vertices[e_phi->dest]);
+ if (!is_gimple_assign (RDGV_STMT (v_inc))
+ || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR)
+ continue;
+ tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc));
+ tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc));
+ if (!(integer_onep (rhs1) || integer_onep (rhs2)))
+ continue;
+ struct graph_edge *e_inc;
+ /* find cycle with only two vertices inc and phi: inc <--> phi. */
+ bool found_cycle = false;
+ for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next)
+ {
+ if (e_inc->dest == e_phi->src)
+ {
+ found_cycle = true;
+ break;
+ }
+ }
+ if (!found_cycle)
+ continue;
+ found_inc = true;
+ }
+}
+
+/* Check if phi satisfies form like PHI <0, i>. */
+
+static inline bool
+iv_check_phi_stmt (gimple *phi_stmt)
+{
+ return gimple_phi_num_args (phi_stmt) == 2
+ && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0))
+ || integer_zerop (gimple_phi_arg_def (phi_stmt, 1)));
+}
+
+/* Make sure the iteration varible is a phi. */
+
+static tree
+get_iv_from_seed (struct graph *flow_only_rdg, vec<gimple *> seed_stmts)
+{
+ tree index = find_index (seed_stmts);
+ if (index == NULL)
+ return NULL;
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
+ {
+ struct vertex *v = &(flow_only_rdg->vertices[i]);
+ if (RDGV_STMT (v) != seed_stmts[0])
+ continue;
+ struct graph_edge *e;
+ bool found_phi = false;
+ for (e = v->pred; e; e = e->pred_next)
+ {
+ struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]);
+ gimple *phi_stmt = RDGV_STMT (v_phi);
+ if (gimple_code (phi_stmt) != GIMPLE_PHI
+ || gimple_phi_result (phi_stmt) != index)
+ continue;
+ if (!iv_check_phi_stmt (phi_stmt))
+ return NULL;
+ /* find inc expr in succ of phi. */
+ bool found_inc = false;
+ check_phi_inc (v_phi, flow_only_rdg, found_inc);
+ if (!found_inc)
+ return NULL;
+ found_phi = true;
+ break;
+ }
+ if (!found_phi)
+ return NULL;
+ break;
+ }
+ return index;
+}
+
+/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in
+ FLOW_ONLY_RDG. */
+
+static bool
+check_no_dependency (struct graph *flow_only_rdg, bitmap root_map)
+{
+ bitmap_iterator bi;
+ unsigned ui;
+ auto_vec<unsigned, 16> visited_nodes;
+ auto_bitmap visited_map;
+ EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi)
+ visited_nodes.safe_push (ui);
+ for (ui = 0; ui < visited_nodes.length (); ui++)
+ {
+ struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]);
+ struct graph_edge *e;
+ for (e = v->succ; e; e = e->succ_next)
+ {
+ if (bitmap_bit_p (root_map, e->dest))
+ return false;
+ if (bitmap_bit_p (visited_map, e->dest))
+ continue;
+ visited_nodes.safe_push (e->dest);
+ bitmap_set_bit (visited_map, e->dest);
+ }
+ }
+ return true;
+}
+
+/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure
+ there is no dependency among those STMT we found. */
+
+static unsigned
+get_cut_points (struct graph *flow_only_rdg, bitmap cut_points,
+ loop_vec_info vinfo)
+{
+ unsigned n_stmts = 0;
+
+ /* STMTS that may be CUT_POINTS. */
+ auto_vec<gimple *> stmts;
+ if (!find_isomorphic_stmts (vinfo, stmts))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "No temp array insertion: no isomorphic stmts"
+ " were found.\n");
+ return 0;
+ }
+
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
+ {
+ if (stmts.contains (RDG_STMT (flow_only_rdg, i)))
+ bitmap_set_bit (cut_points, i);
+ }
+ n_stmts = bitmap_count_bits (cut_points);
+
+ bool succ = check_no_dependency (flow_only_rdg, cut_points);
+ if (!succ)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "No temp array inserted: data dependency"
+ " among isomorphic stmts.\n");
+ return 0;
+ }
+ return n_stmts;
+}
+
+static void
+build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi,
+ poly_uint64 array_extent, tree iv,
+ hash_set<tree> *tmp_array_vars, vec<gimple *> *transformed)
+{
+ gimple *stmt = RDGV_STMT (v);
+ tree lhs = gimple_assign_lhs (stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "original stmt:\t");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
+ }
+ tree var_ssa = duplicate_ssa_name (lhs, stmt);
+ gimple_assign_set_lhs (stmt, var_ssa);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "changed to:\t");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
+ }
+ gimple_set_uid (gsi_stmt (gsi), -1);
+ tree vect_elt_type = TREE_TYPE (lhs);
+ tree array_type = build_array_type_nelts (vect_elt_type, array_extent);
+ tree array = create_tmp_var (array_type);
+ tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
+ tmp_array_vars->add (array_ssa);
+ gimple *store = gimple_build_assign (array_ssa, var_ssa);
+ tree new_vdef = make_ssa_name (gimple_vop (cfun), store);
+ gsi_insert_after (&gsi, store, GSI_NEW_STMT);
+ gimple_set_vdef (store, new_vdef);
+ transformed->safe_push (store);
+ gimple_set_uid (gsi_stmt (gsi), -1);
+ tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
+ tmp_array_vars->add (array_ssa2);
+ gimple *load = gimple_build_assign (lhs, array_ssa2);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "insert stmt:\t");
+ print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (dump_file, " and stmt:\t");
+ print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS);
+ }
+ gimple_set_vuse (load, new_vdef);
+ gsi_insert_after (&gsi, load, GSI_NEW_STMT);
+ gimple_set_uid (gsi_stmt (gsi), -1);
+}
+
+/* Set bitmap PRODUCERS based on vec TRANSFORMED. */
+
+void
+loop_distribution::build_producers (loop_p loop, bitmap producers,
+ vec<gimple *> &transformed)
+{
+ auto_vec<gimple *, 10> stmts;
+ stmts_from_loop (loop, &stmts);
+ int i = 0;
+ gimple *stmt = NULL;
+
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
+ gimple_set_uid (stmt, i);
+ i = 0;
+ FOR_EACH_VEC_ELT (transformed, i, stmt)
+ bitmap_set_bit (producers, stmt->uid);
+}
+
+/* Transform stmt
+
+ A = FOO (ARG_1);
+
+ to
+
+ STMT_1: A1 = FOO (ARG_1);
+ STMT_2: X[I] = A1;
+ STMT_3: A = X[I];
+
+ Producer is STMT_2 who defines the temp array and consumer is
+ STMT_3 who uses the temp array. */
+
+void
+loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg,
+ tree iv, bitmap cut_points,
+ hash_set<tree> *tmp_array_vars,
+ bitmap producers)
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "=== do insertion ===\n");
+
+ auto_vec<gimple *> transformed;
+
+ /* Execution times of loop. */
+ poly_uint64 array_extent
+ = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1;
+
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
+ bb_top_order_cmp_r);
+
+ for (int i = 0; i < int (loop->num_nodes); i++)
+ {
+ basic_block bb = bbs[i];
+
+ /* Find all cut points in bb and transform them. */
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ unsigned j = gimple_uid (gsi_stmt (gsi));
+ if (bitmap_bit_p (cut_points, j))
+ {
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
+ build_temp_array (v, gsi, array_extent, iv, tmp_array_vars,
+ &transformed);
+ }
+ }
+ }
+ build_producers (loop, producers, transformed);
+ update_ssa (TODO_update_ssa);
+ free (bbs);
+}
+
+/* After temp array insertion, given stmts
+ STMT_1: M = FOO (ARG_1);
+ STMT_2: X[I] = M;
+ STMT_3: A = X[I];
+ STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next.
+ Replace M with A, and remove STMT_2 and STMT_3. */
+
+static void
+reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition,
+ gimple_stmt_iterator &gsi, int j)
+{
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
+ gimple *stmt = RDGV_STMT (v);
+ gimple *prev = stmt->prev;
+ gimple *next = stmt->next;
+ tree n_lhs = gimple_assign_lhs (next);
+ gimple_assign_set_lhs (prev, n_lhs);
+ unlink_stmt_vdef (stmt);
+ if (partition)
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ if (partition)
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
+ gsi_remove (&gsi, true);
+}
+
+void
+loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg,
+ bitmap producers, struct partition *partition)
+{
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
+ bb_top_order_cmp_r);
+ for (int i = 0; i < int (loop->num_nodes); i++)
+ {
+ basic_block bb = bbs[i];
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ unsigned j = gimple_uid (gsi_stmt (gsi));
+ if (bitmap_bit_p (producers, j))
+ reset_gimple_assign (flow_only_rdg, partition, gsi, j);
+ }
+ }
+ update_ssa (TODO_update_ssa);
+ free (bbs);
+}
+
+/* Insert temp arrays if isomorphic computation exists. Temp arrays will be
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
+
+bool
+loop_distribution::insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
+ hash_set<tree> *tmp_array_vars, bitmap producers)
+{
+ struct graph *flow_only_rdg = build_rdg (loop, NULL);
+ gcc_checking_assert (flow_only_rdg != NULL);
+ tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts);
+ if (iv == NULL)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Loop %d no temp array insertion: failed to get"
+ " iteration variable.\n", loop->num);
+ free_rdg (flow_only_rdg);
+ return false;
+ }
+ auto_bitmap cut_points;
+ loop_vec_info vinfo = loop_vec_info_for_loop (loop);
+ unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo);
+ delete vinfo;
+ loop->aux = NULL;
+ if (n_cut_points == 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Loop %d no temp array insertion: no cut points"
+ " found.\n", loop->num);
+ free_rdg (flow_only_rdg);
+ return false;
+ }
+ do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers);
+ if (dump_enabled_p ())
+ {
+ dump_user_location_t loc = find_loop_location (loop);
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:"
+ " %d temp arrays inserted in Loop %d.\n",
+ n_cut_points, loop->num);
+ }
+ free_rdg (flow_only_rdg);
+ return true;
+}
+
+static bool find_seed_stmts_for_distribution (class loop *, vec<gimple *> *);
+
/* Distributes the code from LOOP in such a way that producer statements
are placed before consumer statements. Tries to separate only the
statements from STMTS into separate loops. Returns the number of
@@ -3814,6 +4311,34 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
return 0;
}
+ /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize.
+ If LOOP has grouped loads, recursively find isomorphic stmts and insert
+ temp arrays, rebuild RDG and call find_seed_stmts_for_distribution
+ to replace STMTS. */
+
+ hash_set<tree> tmp_array_vars;
+
+ /* STMTs that define those inserted TMP_ARRAYs. */
+ auto_bitmap producers;
+
+ /* New SEED_STMTS after insertion. */
+ auto_vec<gimple *> work_list;
+ bool insert_success = false;
+ if (may_insert_temp_arrays (loop, rdg, cd))
+ {
+ if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers))
+ {
+ if (find_seed_stmts_for_distribution (loop, &work_list))
+ {
+ insert_success = true;
+ stmts = work_list;
+ }
+ else
+ remove_insertion (loop, rdg, producers, NULL);
+ rebuild_rdg (loop, rdg, cd);
+ }
+ }
+
data_reference_p dref;
for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
dref->aux = (void *) (uintptr_t) i;
@@ -3894,7 +4419,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
for (int j = i + 1;
partitions.iterate (j, &partition); ++j)
{
- if (share_memory_accesses (rdg, into, partition))
+ if (share_memory_accesses (rdg, into, partition, &tmp_array_vars))
{
partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
partitions.unordered_remove (j);
@@ -3944,7 +4469,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
}
}
- finalize_partitions (loop, &partitions, &alias_ddrs);
+ finalize_partitions (loop, &partitions, &alias_ddrs, producers);
/* If there is a reduction in all partitions make sure the last one
is not classified for builtin code generation. */
@@ -3962,6 +4487,24 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
}
nbp = partitions.length ();
+
+ /* If we have inserted TMP_ARRAYs but there is only one partition left in
+ the succeeding processes, remove those inserted TMP_ARRAYs back to the
+ original version. */
+
+ if (nbp == 1 && insert_success)
+ {
+ struct partition *partition = NULL;
+ partitions.iterate (0, &partition);
+ remove_insertion (loop, rdg, producers, partition);
+ if (dump_enabled_p ())
+ {
+ dump_user_location_t loc = find_loop_location (loop);
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:"
+ " unable to distribute loop %d.\n", loop->num);
+ }
+ }
+
if (nbp == 0
|| (nbp == 1 && !partition_builtin_p (partitions[0]))
|| (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
--
2.27.0.windows.1

View File

@ -1,206 +0,0 @@
From 717782ec36469eb81650b07e8b5536281a59993d Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Tue, 29 Nov 2022 22:12:29 +0800
Subject: [PATCH 34/35] Revert "[Backport] tree-optimization/102880 - make
PHI-OPT recognize more CFGs"
This reverts commit 77398954ce517aa011b7a254c7aa2858521b2093.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 ---------
gcc/tree-ssa-phiopt.c | 73 +++++++++-------------
2 files changed, 29 insertions(+), 75 deletions(-)
delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
deleted file mode 100644
index 21aa66e38..000000000
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
-
-int __GIMPLE (ssa,startwith("phiopt"))
-foo (int a, int b, int flag)
-{
- int res;
-
- __BB(2):
- if (flag_2(D) != 0)
- goto __BB6;
- else
- goto __BB4;
-
- __BB(4):
- if (a_3(D) > b_4(D))
- goto __BB7;
- else
- goto __BB6;
-
- __BB(6):
- goto __BB7;
-
- __BB(7):
- res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
- return res_1;
-}
-
-/* We should be able to detect MAX despite the extra edge into
- the middle BB. */
-/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 079d29e74..21ac08145 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -219,6 +219,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
if (EDGE_COUNT (bb1->succs) == 0
+ || bb2 == NULL
|| EDGE_COUNT (bb2->succs) == 0)
continue;
@@ -278,14 +279,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|| (e1->flags & EDGE_FALLTHRU) == 0)
continue;
+ /* Also make sure that bb1 only have one predecessor and that it
+ is bb. */
+ if (!single_pred_p (bb1)
+ || single_pred (bb1) != bb)
+ continue;
+
if (do_store_elim)
{
- /* Also make sure that bb1 only have one predecessor and that it
- is bb. */
- if (!single_pred_p (bb1)
- || single_pred (bb1) != bb)
- continue;
-
/* bb1 is the middle block, bb2 the join block, bb the split block,
e1 the fallthrough edge from bb1 to bb2. We can't do the
optimization if the join block has more than two predecessors. */
@@ -330,11 +331,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
node. */
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
- gphi *newphi;
- if (single_pred_p (bb1)
- && (newphi = factor_out_conditional_conversion (e1, e2, phi,
- arg0, arg1,
- cond_stmt)))
+ gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
+ arg0, arg1,
+ cond_stmt);
+ if (newphi != NULL)
{
phi = newphi;
/* factor_out_conditional_conversion may create a new PHI in
@@ -355,14 +355,12 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && single_pred_p (bb1)
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (single_pred_p (bb1)
- && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
}
}
@@ -393,41 +391,35 @@ replace_phi_edge_with_variable (basic_block cond_block,
edge e, gphi *phi, tree new_tree)
{
basic_block bb = gimple_bb (phi);
+ basic_block block_to_remove;
gimple_stmt_iterator gsi;
/* Change the PHI argument to new. */
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
/* Remove the empty basic block. */
- edge edge_to_remove;
if (EDGE_SUCC (cond_block, 0)->dest == bb)
- edge_to_remove = EDGE_SUCC (cond_block, 1);
- else
- edge_to_remove = EDGE_SUCC (cond_block, 0);
- if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
{
- e->flags |= EDGE_FALLTHRU;
- e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- e->probability = profile_probability::always ();
- delete_basic_block (edge_to_remove->dest);
-
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
- gsi = gsi_last_bb (cond_block);
- gsi_remove (&gsi, true);
+ EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
+ EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+ EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
+
+ block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
}
else
{
- /* If there are other edges into the middle block make
- CFG cleanup deal with the edge removal to avoid
- updating dominators here in a non-trivial way. */
- gcond *cond = as_a <gcond *> (last_stmt (cond_block));
- if (edge_to_remove->flags & EDGE_TRUE_VALUE)
- gimple_cond_make_false (cond);
- else
- gimple_cond_make_true (cond);
+ EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
+ EDGE_SUCC (cond_block, 1)->flags
+ &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+ EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
+
+ block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
}
+ delete_basic_block (block_to_remove);
- statistics_counter_event (cfun, "Replace PHI with variable", 1);
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
+ gsi = gsi_last_bb (cond_block);
+ gsi_remove (&gsi, true);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -854,9 +846,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
allow it and move it once the transformation is done. */
if (!empty_block_p (middle_bb))
{
- if (!single_pred_p (middle_bb))
- return false;
-
stmt_to_move = last_and_only_stmt (middle_bb);
if (!stmt_to_move)
return false;
@@ -1236,11 +1225,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
else
{
- if (!single_pred_p (middle_bb))
- return 0;
- statistics_counter_event (cfun, "Replace PHI with "
- "variable/value_replacement", 1);
-
/* Replace the PHI arguments with arg. */
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
@@ -1255,6 +1239,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
return 1;
}
+
}
/* Now optimize (x != 0) ? x + y : y to just x + y. */
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -1,64 +0,0 @@
From fb86109ebb10cdb82e1e3ffa37bb7e770fb7c066 Mon Sep 17 00:00:00 2001
From: eastb233 <xiezhiheng@huawei.com>
Date: Wed, 7 Dec 2022 09:43:15 +0800
Subject: [PATCH] [MULL64] Disable mull64 transformation by default
This commit disables mull64 transformation by default since
it shows some runtime failure in workloads.
---
gcc/match.pd | 2 +-
gcc/opts.c | 1 -
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +-
gcc/testsuite/gcc.dg/pr107190.c | 2 +-
4 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 433682afb..01f81b063 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3393,7 +3393,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(simplify
(cond @0 (op:s @1 integer_pow2p@2) @1)
/* powerof2cst */
- (if (INTEGRAL_TYPE_P (type))
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type))
(with {
tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
}
diff --git a/gcc/opts.c b/gcc/opts.c
index 751965e46..f12b13599 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -511,7 +511,6 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
- { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
/* -O2 and above optimizations, but not -Os or -Og. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
index f61cf5e6f..cad891e62 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
+/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
index 235b2761a..d1e72e5df 100644
--- a/gcc/testsuite/gcc.dg/pr107190.c
+++ b/gcc/testsuite/gcc.dg/pr107190.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
+/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
--
2.25.1

View File

@ -1,58 +0,0 @@
From d73cd8783ca930724def3e9909fc484ec15404f5 Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Mon, 19 Dec 2022 11:48:12 +0800
Subject: [PATCH 1/3] [loop-distribution] Bugfix for loop-distribution Add
exception in function BUILD_QUEUE when there is a null pointer in
grouped_loads.
---
gcc/tree-loop-distribution.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 88b56379c..b68b9c7eb 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -3208,16 +3208,31 @@ build_queue (loop_vec_info vinfo, unsigned vf,
{
unsigned group_size = stmt_info->size;
stmt_vec_info c_stmt_info = stmt_info;
+ bool succ = true;
while (group_size >= vf)
{
vec_alloc (worklist, vf);
for (unsigned j = 0; j < vf; ++j)
{
+ if (c_stmt_info == NULL)
+ {
+ succ = false;
+ break;
+ }
ginfo = new _group_info ();
ginfo->stmt = c_stmt_info->stmt;
worklist->safe_push (ginfo);
c_stmt_info = c_stmt_info->next_element;
}
+ if (!succ)
+ {
+ unsigned k = 0;
+ ginfo = NULL;
+ FOR_EACH_VEC_ELT (*worklist, k, ginfo)
+ delete ginfo;
+ vec_free (worklist);
+ break;
+ }
worklists.safe_push (worklist);
group_size -= vf;
}
@@ -3711,6 +3726,7 @@ free_ginfos (vec<vec<group_info> *> &worklists)
unsigned j = 0;
FOR_EACH_VEC_ELT (*worklist, j, ginfo)
delete ginfo;
+ vec_free (worklist);
}
}
--
2.27.0.windows.1

View File

@ -1,26 +0,0 @@
From b2b710238e13eb2fced77d89cd8dcc86f77b6c6c Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Mon, 19 Dec 2022 15:12:24 +0800
Subject: [PATCH 2/3] [semi-relayout] Bugfix for struct semi-relayout Bugfix
when relayout candidate type is null.
---
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 4751711fe..2cac340c7 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -6408,6 +6408,8 @@ ipa_struct_reorg::is_semi_relayout_candidate (tree xhs)
{
tree type = TREE_TYPE (mem);
srtype *old_type = get_relayout_candidate_type (type);
+ if (!old_type)
+ return false;
if (types_compatible_p (type, old_type->type)
&& old_type->semi_relayout)
return true;
--
2.27.0.windows.1

View File

@ -1,55 +0,0 @@
From ae15300352b0fa47a533af852f88e7244c2820cc Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Tue, 29 Sep 2020 14:38:06 +0200
Subject: [PATCH 3/3] [Backport] tree-optimization/97238 - fix typo causing ICE
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=29aef377d814bd342dd5a306f99e0d614623ce0e
This fixes a typo causing a NULL dereference.
2020-09-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/97238
* tree-ssa-reassoc.c (ovce_extract_ops): Fix typo.
* gcc.dg/pr97238.c: New testcase.
---
gcc/testsuite/gcc.dg/pr97238.c | 12 ++++++++++++
gcc/tree-ssa-reassoc.c | 2 +-
2 files changed, 13 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/pr97238.c
diff --git a/gcc/testsuite/gcc.dg/pr97238.c b/gcc/testsuite/gcc.dg/pr97238.c
new file mode 100644
index 000000000..746e93a97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97238.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -Wno-psabi -w" } */
+
+typedef int __attribute__ ((__vector_size__ (8))) V;
+int b, c, e;
+V d;
+
+V
+foo (void)
+{
+ return (b || e) | c > d | ((b || e) | c > d);
+}
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 5f978ac78..62e7c8dca 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3853,7 +3853,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
return ERROR_MARK;
gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
- if (stmt == NULL
+ if (assign == NULL
|| TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
return ERROR_MARK;
--
2.27.0.windows.1

View File

@ -1,25 +0,0 @@
From d631be52d401d834261f86113b3a738014540b6c Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Fri, 30 Dec 2022 20:15:11 +0800
Subject: [PATCH] Replace *vcond with vcond as we check for NULL pointer.
---
gcc/tree-ssa-reassoc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 62e7c8dca..1ad43dba1 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3839,7 +3839,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
if (stmt == NULL)
return ERROR_MARK;
- if (*vcond)
+ if (vcond)
*vcond = stmt;
/* ??? If we start creating more COND_EXPR, we could perform
--
2.33.0

View File

@ -1,46 +0,0 @@
From 655fcd3bad37d6d601c869a6471284cc8255e220 Mon Sep 17 00:00:00 2001
From: liyancheng <412998149@qq.com>
Date: Wed, 23 Nov 2022 21:12:51 +0800
Subject: [PATCH] [Backport] libsanitizer: cherry-pick 9cf13067cb5088626ba7
from upstream
[Reference] https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=2701442d0cf6292f6624443c15813d6d1a3562fe
9cf13067cb5088626ba7ee1ec4c42ec59c7995a0
[sanitizer] Remove #include <linux/fs.h> to resolve fsconfig_command/mount_attr conflict with glibc 2.36
---
.../sanitizer_platform_limits_posix.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
index aa845df4d..8b4162bcd 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -72,7 +72,9 @@
#include <sys/vt.h>
#include <linux/cdrom.h>
#include <linux/fd.h>
+#if SANITIZER_ANDROID
#include <linux/fs.h>
+#endif
#include <linux/hdreg.h>
#include <linux/input.h>
#include <linux/ioctl.h>
@@ -833,10 +835,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
unsigned IOCTL_EVIOCGPROP = IOCTL_NOT_PRESENT;
unsigned IOCTL_EVIOCSKEYCODE_V2 = IOCTL_NOT_PRESENT;
#endif
- unsigned IOCTL_FS_IOC_GETFLAGS = FS_IOC_GETFLAGS;
- unsigned IOCTL_FS_IOC_GETVERSION = FS_IOC_GETVERSION;
- unsigned IOCTL_FS_IOC_SETFLAGS = FS_IOC_SETFLAGS;
- unsigned IOCTL_FS_IOC_SETVERSION = FS_IOC_SETVERSION;
+ unsigned IOCTL_FS_IOC_GETFLAGS = _IOR('f', 1, long);
+ unsigned IOCTL_FS_IOC_GETVERSION = _IOR('v', 1, long);
+ unsigned IOCTL_FS_IOC_SETFLAGS = _IOW('f', 2, long);
+ unsigned IOCTL_FS_IOC_SETVERSION = _IOW('v', 2, long);
unsigned IOCTL_GIO_CMAP = GIO_CMAP;
unsigned IOCTL_GIO_FONT = GIO_FONT;
unsigned IOCTL_GIO_UNIMAP = GIO_UNIMAP;
--
2.25.1

View File

@ -1,30 +0,0 @@
From a7c23eb36641d605df37f5942d188a764a2480f9 Mon Sep 17 00:00:00 2001
From: huitailangzju <804544223@qq.com>
Date: Tue, 14 Feb 2023 10:54:10 +0800
Subject: [PATCH] State --sysroot option as validated once processed
[Reference] https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8e86086bd33134467cc9c2a75327d1238dc71df9
Since we now save the option in the "switches" table
to let specs use it more generally, we need to explicitly
state that the option was validated else the driver
will consider it "unrecognized".
---
gcc/gcc.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 655beffcc..efa0b53ce 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -4193,6 +4193,7 @@ driver_handle_option (struct gcc_options *opts,
/* Saving this option is useful to let self-specs decide to
provide a default one. */
do_save = true;
+ validated = true;
break;
case OPT_time_:
--
2.28.0.windows.1

View File

@ -1,129 +0,0 @@
From bf537e82d452ee9b79f438df721c2e0dfaae12a0 Mon Sep 17 00:00:00 2001
From: Xiong Zhou <xiongzhou4@huawei.com>
Date: Fri, 5 May 2023 11:57:40 +0800
Subject: [PATCH 1/2] - bogus -Wstringop-overflow with VLA of elements larger
than byte
---
gcc/calls.c | 5 ++
gcc/testsuite/gcc.dg/Wstringop-overflow-67.c | 92 ++++++++++++++++++++
2 files changed, 97 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-67.c
diff --git a/gcc/calls.c b/gcc/calls.c
index 26894342c..45c137cee 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -2112,6 +2112,11 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp)
}
else
{
+ /* If the size cannot be determined clear it to keep it from
+ being taken as real (and excessive). */
+ if (objsize && integer_all_onesp (objsize))
+ objsize = NULL_TREE;
+
/* For read-only and read-write attributes also set the source
size. */
srcsize = objsize;
diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-67.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-67.c
new file mode 100644
index 000000000..7b8f3f014
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-67.c
@@ -0,0 +1,92 @@
+/* PR middle-end/100571 - bogus -Wstringop-overflow with VLA of elements
+ larger than byte
+ { dg-do compile }
+ { dg-options "-O2 -Wall" } */
+
+__attribute__ ((access (read_only, 1, 2))) void fro (int *, int);
+__attribute__ ((access (write_only, 1, 2))) void fwo (int *, int);
+__attribute__ ((access (read_write, 1, 2))) void frw (int *, int);
+
+extern __SIZE_TYPE__ n;
+
+void alloca_ro (void)
+{
+ int *a = __builtin_alloca (n * sizeof *a);
+ a[0] = 0;
+ fro (a, n);
+}
+
+void alloca_wo (void)
+{
+ int *a = __builtin_alloca (n * sizeof *a);
+ fwo (a, n);
+}
+
+void alloca_rw (void)
+{
+ int *a = __builtin_alloca (n * sizeof *a);
+ a[0] = 0;
+ frw (a, n);
+}
+
+
+void calloc_ro (void)
+{
+ int *a = __builtin_calloc (n, sizeof *a);
+ fro (a, n);
+}
+
+void calloc_wo (void)
+{
+ int *a = __builtin_calloc (n, sizeof *a);
+ fwo (a, n);
+}
+
+void calloc_rw (void)
+{
+ int *a = __builtin_calloc (n, sizeof *a);
+ a[0] = 0;
+ frw (a, n);
+}
+
+
+void malloc_ro (void)
+{
+ int *a = __builtin_malloc (n * sizeof *a);
+ a[0] = 0;
+ fro (a, n);
+}
+
+void malloc_wo (void)
+{
+ int *a = __builtin_malloc (n * sizeof *a);
+ fwo (a, n);
+}
+
+void malloc_rw (void)
+{
+ int *a = __builtin_malloc (n * sizeof *a);
+ a[0] = 0;
+ frw (a, n);
+}
+
+
+void vla_ro (void)
+{
+ int a[n];
+ a[0] = 0;
+ fro (a, n);
+}
+
+void vla_wo (void)
+{
+ int a[n];
+ fwo (a, n);
+}
+
+void vla_rw (void)
+{
+ int a[n];
+ a[0] = 0;
+ frw (a, n);
+}
--
2.33.0

View File

@ -1,183 +0,0 @@
From bc6537191e91c854cc6bee3319290d7a86768957 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Wed, 10 May 2023 18:39:47 +0800
Subject: [PATCH 2/2] [phiopt2] Add option to control the simplify
The phiopt is brought in https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791
But may be also has some bug fixed by later commit, so disable it default temporary.
This optimization is expected to enable after we update the gcc'base to gcc12's release version.
---
gcc/common.opt | 4 ++++
gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/bool-1.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/bool-2.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr18134.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr21829.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 4 ++--
gcc/tree-ssa-phiopt.c | 3 +++
13 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/gcc/common.opt b/gcc/common.opt
index be7bfee60..5ad2def18 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2781,6 +2781,10 @@ ftree-store-ccp
Common Ignore
Does nothing. Preserved for backward compatibility.
+ftree-fold-phiopt
+Common Report Var(flag_fold_phiopt) Init(0) Optimization
+Attempt to simply the phi node with ssa form.
+
ftree-ch
Common Report Var(flag_tree_ch) Optimization
Enable loop header copying on trees.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c
index 364ce6a69..b04316d55 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040514-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-phiopt2-details" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-phiopt2-details" } */
int t( int i)
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c b/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c
index 401357f2f..892654108 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bool-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
int f(_Bool x)
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c b/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c
index add9cca1e..5ead90f06 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bool-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
int f(_Bool x)
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c
index 4c190e6af..7b678fafc 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-10.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
int nem1_phi (unsigned long a) { return a ? -1 : 0; }
int eqm1_phi (unsigned long a) { return a ? 0 : -1; }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
index fd3706666..23b679644 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
@@ -1,6 +1,6 @@
/* PR tree-optimization/97690 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-options "-O2 -ftree-fold-phiopt -fdump-tree-phiopt2" } */
int foo (_Bool d) { return d ? 2 : 0; }
int bar (_Bool d) { return d ? 1 : 0; }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c
index 3bdb85609..4efd9afc4 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
_Bool t();
_Bool t1();
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c
index 18ecbd52a..60dcc6733 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-7.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
int g(int,int);
int f(int t, int c)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c
index 98c596b6a..aaa71a317 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-8.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-optimized -fdump-tree-phiopt2" } */
+/* { dg-options "-O -ftree-fold-phiopt -fdump-tree-optimized -fdump-tree-phiopt2" } */
int g(int,int);
int f(int t, int c)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c b/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c
index cd40ab2c1..efb1907cf 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr18134.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -ftree-fold-phiopt -fdump-tree-optimized" } */
int foo (int a)
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c b/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c
index 8f5ae5127..8c8ada905 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -ftree-fold-phiopt -fdump-tree-optimized" } */
int test(int v)
{
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
index a2770e5e8..88c13806a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
@@ -1,9 +1,9 @@
/* PR tree-optimization/96928 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-options "-O2 -ftree-fold-phiopt -fdump-tree-phiopt2 -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
-/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 51a2d3684..b7012932f 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -839,6 +839,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
tree result;
gimple *stmt_to_move = NULL;
+ if (!flag_fold_phiopt)
+ return false;
+
/* Special case A ? B : B as this will always simplify to B. */
if (operand_equal_for_phi_arg_p (arg0, arg1))
return false;
--
2.33.0

743
gcc.spec

File diff suppressed because it is too large Load Diff