qemu/tcg-optimize-Fix-folding-of-vector-ops.patch

From 8eef73d0714c8bb5a4fc5a8b6ae007752d0061b2 Mon Sep 17 00:00:00 2001
From: tangbinzy <tangbin_yewu@cmss.chinamobile.com>
Date: Wed, 23 Nov 2022 15:35:25 -0800
Subject: [PATCH 13/29] tcg/optimize: Fix folding of vector ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mainline inclusion
commit c578ff18584666499c3141b2d770b9e36b5e9d7e
category: bugfix

------------------------------------------------------------

Bitwise operations are easy to fold, because the operation is
identical regardless of element size.  But add and sub need
extra element size info that is not currently propagated.

Fixes: 2f9f08ba43d
Cc: qemu-stable@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Signed-off-by: tangbinzy <tangbin_yewu@cmss.chinamobile.com>
---
 tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2397f2cf93..e573000951 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -308,13 +308,13 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     CASE_OP_32_64(mul):
         return x * y;

-    CASE_OP_32_64(and):
+    CASE_OP_32_64_VEC(and):
         return x & y;

-    CASE_OP_32_64(or):
+    CASE_OP_32_64_VEC(or):
         return x | y;

-    CASE_OP_32_64(xor):
+    CASE_OP_32_64_VEC(xor):
         return x ^ y;

     case INDEX_op_shl_i32:
@@ -347,16 +347,16 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     case INDEX_op_rotl_i64:
         return rol64(x, y & 63);

-    CASE_OP_32_64(not):
+    CASE_OP_32_64_VEC(not):
         return ~x;

     CASE_OP_32_64(neg):
         return -x;

-    CASE_OP_32_64(andc):
+    CASE_OP_32_64_VEC(andc):
         return x & ~y;

-    CASE_OP_32_64(orc):
+    CASE_OP_32_64_VEC(orc):
         return x | ~y;

     CASE_OP_32_64(eqv):
@@ -751,6 +751,12 @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
     return false;
 }

+static bool fold_commutative(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+    return false;
+}
+
 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
 {
     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
@@ -905,6 +911,16 @@ static bool fold_add(OptContext *ctx, TCGOp *op)
     return false;
 }

+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
+{
+    if (fold_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
 {
     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
@@ -1938,10 +1954,10 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
     return false;
 }

-static bool fold_sub(OptContext *ctx, TCGOp *op)
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
 {
-    if (fold_const2(ctx, op) ||
-        fold_xx_to_i(ctx, op, 0) ||
+    if (fold_xx_to_i(ctx, op, 0) ||
         fold_xi_to_x(ctx, op, 0) ||
         fold_sub_to_neg(ctx, op)) {
         return true;
@@ -1949,6 +1965,11 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
     return false;
 }

+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+    return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+}
+
 static bool fold_sub2(OptContext *ctx, TCGOp *op)
 {
     return fold_addsub2(ctx, op, false);
@@ -2052,9 +2073,12 @@ void tcg_optimize(TCGContext *s)
          * Sorted alphabetically by opcode as much as possible.
          */
         switch (opc) {
-        CASE_OP_32_64_VEC(add):
+        CASE_OP_32_64(add):
             done = fold_add(&ctx, op);
             break;
+        case INDEX_op_add_vec:
+            done = fold_add_vec(&ctx, op);
+            break;
         CASE_OP_32_64(add2):
             done = fold_add2(&ctx, op);
             break;
@@ -2193,9 +2217,12 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(sextract):
             done = fold_sextract(&ctx, op);
             break;
-        CASE_OP_32_64_VEC(sub):
+        CASE_OP_32_64(sub):
             done = fold_sub(&ctx, op);
             break;
+        case INDEX_op_sub_vec:
+            done = fold_sub_vec(&ctx, op);
+            break;
         CASE_OP_32_64(sub2):
             done = fold_sub2(&ctx, op);
             break;
--
2.27.0