256 lines
10 KiB
Diff
256 lines
10 KiB
Diff
From d33a3a2e991ef69db9711df53ba74a0a7ff918a6 Mon Sep 17 00:00:00 2001
|
|
Date: Fri, 22 Jan 2021 16:30:59 +0800
|
|
Subject: 8160425: Vectorization with signalling NaN returns
|
|
wrong result
|
|
|
|
Summary: <hotspot>: Should not use doubles/floats for vector constants in the C code
|
|
---
|
|
hotspot/src/cpu/sparc/vm/sparc.ad | 10 +--
|
|
hotspot/src/cpu/x86/vm/x86.ad | 10 +--
|
|
hotspot/src/share/vm/asm/assembler.hpp | 9 ++
|
|
hotspot/src/share/vm/opto/compile.cpp | 3 +
|
|
hotspot/src/share/vm/opto/compile.hpp | 9 ++
|
|
.../compiler/vectorization/TestNaNVector.java | 84 +++++++++++++++++++
|
|
6 files changed, 113 insertions(+), 12 deletions(-)
|
|
create mode 100644 hotspot/test/compiler/vectorization/TestNaNVector.java
|
|
|
|
diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad
|
|
index 20ec462a7..c8763c411 100644
|
|
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
|
|
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
|
|
@@ -720,7 +720,7 @@ intptr_t get_offset_from_base_2(const MachNode* n, const TypePtr* atype, int dis
|
|
return offset;
|
|
}
|
|
|
|
-static inline jdouble replicate_immI(int con, int count, int width) {
|
|
+static inline jlong replicate_immI(int con, int count, int width) {
|
|
// Load a constant replicated "count" times with width "width"
|
|
assert(count*width == 8 && width <= 4, "sanity");
|
|
int bit_width = width * 8;
|
|
@@ -729,17 +729,15 @@ static inline jdouble replicate_immI(int con, int count, int width) {
|
|
for (int i = 0; i < count - 1; i++) {
|
|
val |= (val << bit_width);
|
|
}
|
|
- jdouble dval = *((jdouble*) &val); // coerce to double type
|
|
- return dval;
|
|
+ return val;
|
|
}
|
|
|
|
-static inline jdouble replicate_immF(float con) {
|
|
+static inline jlong replicate_immF(float con) {
|
|
// Replicate float con 2 times and pack into vector.
|
|
int val = *((int*)&con);
|
|
jlong lval = val;
|
|
lval = (lval << 32) | (lval & 0xFFFFFFFFl);
|
|
- jdouble dval = *((jdouble*) &lval); // coerce to double type
|
|
- return dval;
|
|
+ return lval;
|
|
}
|
|
|
|
// Standard Sparc opcode form2 field breakdown
|
|
diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad
|
|
index 36d6d96ae..48a0b95b4 100644
|
|
--- a/hotspot/src/cpu/x86/vm/x86.ad
|
|
+++ b/hotspot/src/cpu/x86/vm/x86.ad
|
|
@@ -856,7 +856,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
|
return 5+offset_size;
|
|
}
|
|
|
|
-static inline jfloat replicate4_imm(int con, int width) {
|
|
+static inline jint replicate4_imm(int con, int width) {
|
|
// Load a constant of "width" (in bytes) and replicate it to fill 32bit.
|
|
assert(width == 1 || width == 2, "only byte or short types here");
|
|
int bit_width = width * 8;
|
|
@@ -866,11 +866,10 @@ static inline jfloat replicate4_imm(int con, int width) {
|
|
val |= (val << bit_width);
|
|
bit_width <<= 1;
|
|
}
|
|
- jfloat fval = *((jfloat*) &val); // coerce to float type
|
|
- return fval;
|
|
+ return val;
|
|
}
|
|
|
|
-static inline jdouble replicate8_imm(int con, int width) {
|
|
+static inline jlong replicate8_imm(int con, int width) {
|
|
// Load a constant of "width" (in bytes) and replicate it to fill 64bit.
|
|
assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
|
|
int bit_width = width * 8;
|
|
@@ -880,8 +879,7 @@ static inline jdouble replicate8_imm(int con, int width) {
|
|
val |= (val << bit_width);
|
|
bit_width <<= 1;
|
|
}
|
|
- jdouble dval = *((jdouble*) &val); // coerce to double type
|
|
- return dval;
|
|
+ return val;
|
|
}
|
|
|
|
#ifndef PRODUCT
|
|
diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp
|
|
index 889dd361d..ec72bc3a1 100644
|
|
--- a/hotspot/src/share/vm/asm/assembler.hpp
|
|
+++ b/hotspot/src/share/vm/asm/assembler.hpp
|
|
@@ -368,6 +368,15 @@ class AbstractAssembler : public ResourceObj {
|
|
//
|
|
// We must remember the code section (insts or stubs) in c1
|
|
// so we can reset to the proper section in end_a_const().
|
|
+ address int_constant(jint c) {
|
|
+ CodeSection* c1 = _code_section;
|
|
+ address ptr = start_a_const(sizeof(c), sizeof(c));
|
|
+ if (ptr != NULL) {
|
|
+ emit_int32(c);
|
|
+ end_a_const(c1);
|
|
+ }
|
|
+ return ptr;
|
|
+ }
|
|
address long_constant(jlong c) {
|
|
CodeSection* c1 = _code_section;
|
|
address ptr = start_a_const(sizeof(c), sizeof(c));
|
|
diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp
|
|
index b1faf5dc4..540cae5d6 100644
|
|
--- a/hotspot/src/share/vm/opto/compile.cpp
|
|
+++ b/hotspot/src/share/vm/opto/compile.cpp
|
|
@@ -3765,6 +3765,7 @@ bool Compile::Constant::operator==(const Constant& other) {
|
|
if (can_be_reused() != other.can_be_reused()) return false;
|
|
// For floating point values we compare the bit pattern.
|
|
switch (type()) {
|
|
+ case T_INT:
|
|
case T_FLOAT: return (_v._value.i == other._v._value.i);
|
|
case T_LONG:
|
|
case T_DOUBLE: return (_v._value.j == other._v._value.j);
|
|
@@ -3779,6 +3780,7 @@ bool Compile::Constant::operator==(const Constant& other) {
|
|
|
|
static int type_to_size_in_bytes(BasicType t) {
|
|
switch (t) {
|
|
+ case T_INT: return sizeof(jint );
|
|
case T_LONG: return sizeof(jlong );
|
|
case T_FLOAT: return sizeof(jfloat );
|
|
case T_DOUBLE: return sizeof(jdouble);
|
|
@@ -3847,6 +3849,7 @@ void Compile::ConstantTable::emit(CodeBuffer& cb) {
|
|
Constant con = _constants.at(i);
|
|
address constant_addr = NULL;
|
|
switch (con.type()) {
|
|
+ case T_INT: constant_addr = _masm.int_constant( con.get_jint() ); break;
|
|
case T_LONG: constant_addr = _masm.long_constant( con.get_jlong() ); break;
|
|
case T_FLOAT: constant_addr = _masm.float_constant( con.get_jfloat() ); break;
|
|
case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
|
|
diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp
|
|
index cea7ad867..fd750b10e 100644
|
|
--- a/hotspot/src/share/vm/opto/compile.hpp
|
|
+++ b/hotspot/src/share/vm/opto/compile.hpp
|
|
@@ -203,6 +203,7 @@ class Compile : public Phase {
|
|
|
|
BasicType type() const { return _type; }
|
|
|
|
+ jint get_jint() const { return _v._value.i; }
|
|
jlong get_jlong() const { return _v._value.j; }
|
|
jfloat get_jfloat() const { return _v._value.f; }
|
|
jdouble get_jdouble() const { return _v._value.d; }
|
|
@@ -259,6 +260,14 @@ class Compile : public Phase {
|
|
Constant add(MachConstantNode* n, BasicType type, jvalue value);
|
|
Constant add(Metadata* metadata);
|
|
Constant add(MachConstantNode* n, MachOper* oper);
|
|
+ Constant add(MachConstantNode* n, jint i) {
|
|
+ jvalue value; value.i = i;
|
|
+ return add(n, T_INT, value);
|
|
+ }
|
|
+ Constant add(MachConstantNode* n, jlong j) {
|
|
+ jvalue value; value.j = j;
|
|
+ return add(n, T_LONG, value);
|
|
+ }
|
|
Constant add(MachConstantNode* n, jfloat f) {
|
|
jvalue value; value.f = f;
|
|
return add(n, T_FLOAT, value);
|
|
diff --git a/hotspot/test/compiler/vectorization/TestNaNVector.java b/hotspot/test/compiler/vectorization/TestNaNVector.java
|
|
new file mode 100644
|
|
index 000000000..302657951
|
|
--- /dev/null
|
|
+++ b/hotspot/test/compiler/vectorization/TestNaNVector.java
|
|
@@ -0,0 +1,84 @@
|
|
+/*
|
|
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+
|
|
+/**
|
|
+ * @test
|
|
+ * @bug 8160425
|
|
+ * @summary Test vectorization with a signalling NaN.
|
|
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill TestNaNVector
|
|
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-OptimizeFill -XX:MaxVectorSize=4 TestNaNVector
|
|
+ */
|
|
+public class TestNaNVector {
|
|
+ private char[] array;
|
|
+ private static final int LEN = 1024;
|
|
+
|
|
+ public static void main(String args[]) {
|
|
+ TestNaNVector test = new TestNaNVector();
|
|
+ // Check double precision NaN
|
|
+ for (int i = 0; i < 10_000; ++i) {
|
|
+ test.vectorizeNaNDP();
|
|
+ }
|
|
+ System.out.println("Checking double precision Nan");
|
|
+ test.checkResult(0xfff7);
|
|
+
|
|
+ // Check single precision NaN
|
|
+ for (int i = 0; i < 10_000; ++i) {
|
|
+ test.vectorizeNaNSP();
|
|
+ }
|
|
+ System.out.println("Checking single precision Nan");
|
|
+ test.checkResult(0xff80);
|
|
+ }
|
|
+
|
|
+ public TestNaNVector() {
|
|
+ array = new char[LEN];
|
|
+ }
|
|
+
|
|
+ public void vectorizeNaNDP() {
|
|
+ // This loop will be vectorized and the array store will be replaced by
|
|
+ // a 64-bit vector store to four subsequent array elements. The vector
|
|
+ // should look like this '0xfff7fff7fff7fff7' and is read from the constant
|
|
+ // table. However, in floating point arithmetic this is a signalling NaN
|
|
+ // which may be converted to a quiet NaN when processed by the x87 FPU.
|
|
+ // If the signalling bit is set, the vector ends up in the constant table
|
|
+ // as '0xfffffff7fff7fff7' which leads to an incorrect result.
|
|
+ for (int i = 0; i < LEN; ++i) {
|
|
+ array[i] = 0xfff7;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public void vectorizeNaNSP() {
|
|
+ // Same as above but with single precision
|
|
+ for (int i = 0; i < LEN; ++i) {
|
|
+ array[i] = 0xff80;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public void checkResult(int expected) {
|
|
+ for (int i = 0; i < LEN; ++i) {
|
|
+ if (array[i] != expected) {
|
|
+ throw new RuntimeException("Invalid result: array[" + i + "] = " + (int)array[i] + " != " + expected);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
--
|
|
2.19.0
|
|
|