gcc/div-opti.patch

From dbf3dc75888623e9d4bb7cc5e9c30caa9b24ffe7 Mon Sep 17 00:00:00 2001
From: Bu Le <bule1@huawei.com>
Date: Thu, 12 Mar 2020 22:39:12 +0000
Subject: [PATCH] aarch64: Add --params to control the number of recip steps
 [PR94154]

-mlow-precision-div hard-coded the number of iterations to 2 for double
and 1 for float.  This patch adds a --param to control the number.

2020-03-13  Bu Le  <bule1@huawei.com>

gcc/
	PR target/94154
	* config/aarch64/aarch64.opt (-param=aarch64-float-recp-precision=)
	(-param=aarch64-double-recp-precision=): New options.
	* doc/invoke.texi: Document them.
	* config/aarch64/aarch64.c (aarch64_emit_approx_div): Use them
	instead of hard-coding the choice of 1 for float and 2 for double.
---
 gcc/ChangeLog                  |  9 +++++++++
 gcc/config/aarch64/aarch64.c   |  8 +++++---
 gcc/config/aarch64/aarch64.opt |  9 +++++++++
 gcc/doc/invoke.texi            | 11 +++++++++++
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c320d5ba51d..2c81f86dd2a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12911,10 +12911,12 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
   /* Iterate over the series twice for SF and thrice for DF.  */
   int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
 
-  /* Optionally iterate over the series once less for faster performance,
-     while sacrificing the accuracy.  */
+  /* Optionally iterate over the series less for faster performance,
+     while sacrificing the accuracy.  The default is 2 for DF and 1 for SF.  */
   if (flag_mlow_precision_div)
-    iterations--;
+    iterations = (GET_MODE_INNER (mode) == DFmode
+		  ? PARAM_VALUE (PARAM_AARCH64_DOUBLE_RECP_PRECISION)
+		  : PARAM_VALUE (PARAM_AARCH64_FLOAT_RECP_PRECISION));
 
   /* Iterate over the series to calculate the approximate reciprocal.  */
   rtx xtmp = gen_reg_rtx (mode);

--- a/gcc/params.def	2020-04-15 17:24:31.984000000 +0800
+++ b/gcc/params.def	2020-04-15 16:59:21.752000000 +0800
@@ -1420,6 +1414,17 @@ DEFPARAM(PARAM_SSA_NAME_DEF_CHAIN_LIMIT,
 	 "a value.",
 	 512, 0, 0)
 
+DEFPARAM(PARAM_AARCH64_FLOAT_RECP_PRECISION,
+	 "aarch64-float-recp-precision",
+	 "The number of Newton iterations for calculating the reciprocal "
+	 "for float type. ",
+	 1, 1, 5)
+
+DEFPARAM(PARAM_AARCH64_DOUBLE_RECP_PRECISION,
+	 "aarch64-double-recp-precision",
+	 "The number of Newton iterations for calculating the reciprocal "
+	 "for double type.",
+	 2, 1, 5)
 /*
 
 Local variables:
-- 
2.18.2
Init gcc-9.3.0 2020-05-14 10:48:46 +08:00			`From dbf3dc75888623e9d4bb7cc5e9c30caa9b24ffe7 Mon Sep 17 00:00:00 2001`
			`From: Bu Le <bule1@huawei.com>`
			`Date: Thu, 12 Mar 2020 22:39:12 +0000`
			`Subject: [PATCH] aarch64: Add --params to control the number of recip steps`
			`[PR94154]`

			`-mlow-precision-div hard-coded the number of iterations to 2 for double`
			`and 1 for float. This patch adds a --param to control the number.`

			`2020-03-13 Bu Le <bule1@huawei.com>`

			`gcc/`
			`PR target/94154`
			`* config/aarch64/aarch64.opt (-param=aarch64-float-recp-precision=)`
			`(-param=aarch64-double-recp-precision=): New options.`
			`* doc/invoke.texi: Document them.`
			`* config/aarch64/aarch64.c (aarch64_emit_approx_div): Use them`
			`instead of hard-coding the choice of 1 for float and 2 for double.`
			`---`
			`gcc/ChangeLog \| 9 +++++++++`
			`gcc/config/aarch64/aarch64.c \| 8 +++++---`
			`gcc/config/aarch64/aarch64.opt \| 9 +++++++++`
			`gcc/doc/invoke.texi \| 11 +++++++++++`
			`4 files changed, 34 insertions(+), 3 deletions(-)`

			`diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c`
			`index c320d5ba51d..2c81f86dd2a 100644`
			`--- a/gcc/config/aarch64/aarch64.c`
			`+++ b/gcc/config/aarch64/aarch64.c`
			`@@ -12911,10 +12911,12 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)`
			`/* Iterate over the series twice for SF and thrice for DF. */`
			`int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;`

			`- /* Optionally iterate over the series once less for faster performance,`
			`- while sacrificing the accuracy. */`
			`+ /* Optionally iterate over the series less for faster performance,`
			`+ while sacrificing the accuracy. The default is 2 for DF and 1 for SF. */`
			`if (flag_mlow_precision_div)`
			`- iterations--;`
			`+ iterations = (GET_MODE_INNER (mode) == DFmode`
			`+ ? PARAM_VALUE (PARAM_AARCH64_DOUBLE_RECP_PRECISION)`
			`+ : PARAM_VALUE (PARAM_AARCH64_FLOAT_RECP_PRECISION));`

			`/* Iterate over the series to calculate the approximate reciprocal. */`
			`rtx xtmp = gen_reg_rtx (mode);`

			`--- a/gcc/params.def 2020-04-15 17:24:31.984000000 +0800`
			`+++ b/gcc/params.def 2020-04-15 16:59:21.752000000 +0800`
			`@@ -1420,6 +1414,17 @@ DEFPARAM(PARAM_SSA_NAME_DEF_CHAIN_LIMIT,`
			`"a value.",`
			`512, 0, 0)`

			`+DEFPARAM(PARAM_AARCH64_FLOAT_RECP_PRECISION,`
			`+ "aarch64-float-recp-precision",`
			`+ "The number of Newton iterations for calculating the reciprocal "`
			`+ "for float type. ",`
			`+ 1, 1, 5)`
			`+`
			`+DEFPARAM(PARAM_AARCH64_DOUBLE_RECP_PRECISION,`
			`+ "aarch64-double-recp-precision",`
			`+ "The number of Newton iterations for calculating the reciprocal "`
			`+ "for double type.",`
			`+ 2, 1, 5)`
			`/*`

			`Local variables:`
			`--`
			`2.18.2`