Add several enhancement patches
This commit is contained in:
parent
c8453dae8b
commit
1d2c0ca210
87
8046294-Generate-the-4-byte-timestamp-randomly.patch
Normal file
87
8046294-Generate-the-4-byte-timestamp-randomly.patch
Normal file
@ -0,0 +1,87 @@
|
||||
diff --git a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
|
||||
index 5f414c408..ce27f0df4 100644
|
||||
--- a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
|
||||
+++ b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright (c) 1996, 2007, Oracle and/or its affiliates. All rights reserved.
|
||||
+ * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@@ -41,21 +41,8 @@ final class RandomCookie {
|
||||
byte random_bytes[]; // exactly 32 bytes
|
||||
|
||||
RandomCookie(SecureRandom generator) {
|
||||
- long temp = System.currentTimeMillis() / 1000;
|
||||
- int gmt_unix_time;
|
||||
- if (temp < Integer.MAX_VALUE) {
|
||||
- gmt_unix_time = (int) temp;
|
||||
- } else {
|
||||
- gmt_unix_time = Integer.MAX_VALUE; // Whoops!
|
||||
- }
|
||||
-
|
||||
random_bytes = new byte[32];
|
||||
generator.nextBytes(random_bytes);
|
||||
-
|
||||
- random_bytes[0] = (byte)(gmt_unix_time >> 24);
|
||||
- random_bytes[1] = (byte)(gmt_unix_time >> 16);
|
||||
- random_bytes[2] = (byte)(gmt_unix_time >> 8);
|
||||
- random_bytes[3] = (byte)gmt_unix_time;
|
||||
}
|
||||
|
||||
RandomCookie(HandshakeInStream m) throws IOException {
|
||||
@@ -68,22 +55,15 @@ final class RandomCookie {
|
||||
}
|
||||
|
||||
void print(PrintStream s) {
|
||||
- int i, gmt_unix_time;
|
||||
-
|
||||
- gmt_unix_time = random_bytes[0] << 24;
|
||||
- gmt_unix_time += random_bytes[1] << 16;
|
||||
- gmt_unix_time += random_bytes[2] << 8;
|
||||
- gmt_unix_time += random_bytes[3];
|
||||
-
|
||||
- s.print("GMT: " + gmt_unix_time + " ");
|
||||
- s.print("bytes = { ");
|
||||
-
|
||||
- for (i = 4; i < 32; i++) {
|
||||
- if (i != 4) {
|
||||
- s.print(", ");
|
||||
+ s.print("random_bytes = {");
|
||||
+ for (int i = 0; i < 32; i++) {
|
||||
+ int k = random_bytes[i] & 0xFF;
|
||||
+ if (i != 0) {
|
||||
+ s.print(' ');
|
||||
}
|
||||
- s.print(random_bytes[i] & 0x0ff);
|
||||
+ s.print(Utilities.hexDigits[k >>> 4]);
|
||||
+ s.print(Utilities.hexDigits[k & 0xf]);
|
||||
}
|
||||
- s.println(" }");
|
||||
+ s.println("}");
|
||||
}
|
||||
}
|
||||
diff --git a/jdk/src/share/classes/sun/security/ssl/Utilities.java b/jdk/src/share/classes/sun/security/ssl/Utilities.java
|
||||
index aefb02c9a..9b267f6e1 100644
|
||||
--- a/jdk/src/share/classes/sun/security/ssl/Utilities.java
|
||||
+++ b/jdk/src/share/classes/sun/security/ssl/Utilities.java
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@@ -33,6 +33,11 @@ import sun.net.util.IPAddressUtil;
|
||||
* A utility class to share the static methods.
|
||||
*/
|
||||
final class Utilities {
|
||||
+ /**
|
||||
+ * hex digits
|
||||
+ */
|
||||
+ static final char[] hexDigits = "0123456789ABCDEF".toCharArray();
|
||||
+
|
||||
/**
|
||||
* Puts {@code hostname} into the {@code serverNames} list.
|
||||
* <P>
|
||||
@ -0,0 +1,28 @@
|
||||
diff --git a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
|
||||
index 65a441240..1e534d3da 100644
|
||||
--- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
|
||||
+++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
|
||||
@@ -71,10 +71,20 @@ bool frame::safe_for_sender(JavaThread *thread) {
|
||||
return false;
|
||||
}
|
||||
|
||||
- // unextended sp must be within the stack and above or equal sp
|
||||
- bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
|
||||
- (unextended_sp >= sp);
|
||||
+ // When we are running interpreted code the machine stack pointer, SP, is
|
||||
+ // set low enough so that the Java expression stack can grow and shrink
|
||||
+ // without ever exceeding the machine stack bounds. So, ESP >= SP.
|
||||
|
||||
+ // When we call out of an interpreted method, SP is incremented so that
|
||||
+ // the space between SP and ESP is removed. The SP saved in the callee's
|
||||
+ // frame is the SP *before* this increment. So, when we walk a stack of
|
||||
+ // interpreter frames the sender's SP saved in a frame might be less than
|
||||
+ // the SP at the point of call.
|
||||
+
|
||||
+ // So unextended sp must be within the stack but we need not to check
|
||||
+ // that unextended sp >= sp
|
||||
+
|
||||
+ bool unextended_sp_safe = (unextended_sp < thread->stack_base());
|
||||
if (!unextended_sp_safe) {
|
||||
return false;
|
||||
}
|
||||
@ -0,0 +1,558 @@
|
||||
diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp
|
||||
index de6d443cd..abbd4449f 100644
|
||||
--- a/hotspot/src/share/vm/opto/chaitin.hpp
|
||||
+++ b/hotspot/src/share/vm/opto/chaitin.hpp
|
||||
@@ -111,9 +111,9 @@ public:
|
||||
_msize_valid=1;
|
||||
if (_is_vector) {
|
||||
assert(!_fat_proj, "sanity");
|
||||
- _mask.verify_sets(_num_regs);
|
||||
+ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
|
||||
} else if (_num_regs == 2 && !_fat_proj) {
|
||||
- _mask.verify_pairs();
|
||||
+ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp
|
||||
index 352ccfb9d..d92f09eb6 100644
|
||||
--- a/hotspot/src/share/vm/opto/regmask.cpp
|
||||
+++ b/hotspot/src/share/vm/opto/regmask.cpp
|
||||
@@ -74,7 +74,8 @@ int find_lowest_bit( uint32 mask ) {
|
||||
}
|
||||
|
||||
// Find highest 1, or return 32 if empty
|
||||
-int find_hihghest_bit( uint32 mask ) {
|
||||
+int find_highest_bit( uint32 mask ) {
|
||||
+ assert(mask != 0, "precondition");
|
||||
int n = 0;
|
||||
if( mask > 0xffff ) {
|
||||
mask >>= 16;
|
||||
@@ -167,13 +168,14 @@ OptoReg::Name RegMask::find_first_pair() const {
|
||||
//------------------------------ClearToPairs-----------------------------------
|
||||
// Clear out partial bits; leave only bit pairs
|
||||
void RegMask::clear_to_pairs() {
|
||||
- for( int i = 0; i < RM_SIZE; i++ ) {
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for( int i = _lwm; i < _hwm; i++ ) {
|
||||
int bits = _A[i];
|
||||
bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
|
||||
bits |= (bits>>1); // Smear 1 hi-bit into a pair
|
||||
_A[i] = bits;
|
||||
}
|
||||
- verify_pairs();
|
||||
+ assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs");
|
||||
}
|
||||
|
||||
//------------------------------SmearToPairs-----------------------------------
|
||||
@@ -188,10 +190,14 @@ void RegMask::smear_to_pairs() {
|
||||
verify_pairs();
|
||||
}
|
||||
|
||||
-//------------------------------is_aligned_pairs-------------------------------
|
||||
+bool RegMask::is_misaligned_pair() const {
|
||||
+ return Size() == 2 && !is_aligned_pairs();
|
||||
+}
|
||||
+
|
||||
bool RegMask::is_aligned_pairs() const {
|
||||
// Assert that the register mask contains only bit pairs.
|
||||
- for( int i = 0; i < RM_SIZE; i++ ) {
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for( int i = _lwm; i < _hwm; i++ ) {
|
||||
int bits = _A[i];
|
||||
while( bits ) { // Check bits for pairing
|
||||
int bit = bits & -bits; // Extract low bit
|
||||
@@ -206,39 +212,28 @@ bool RegMask::is_aligned_pairs() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
-//------------------------------is_bound1--------------------------------------
|
||||
-// Return TRUE if the mask contains a single bit
|
||||
-int RegMask::is_bound1() const {
|
||||
- if( is_AllStack() ) return false;
|
||||
- int bit = -1; // Set to hold the one bit allowed
|
||||
- for( int i = 0; i < RM_SIZE; i++ ) {
|
||||
- if( _A[i] ) { // Found some bits
|
||||
- if( bit != -1 ) return false; // Already had bits, so fail
|
||||
- bit = _A[i] & -_A[i]; // Extract 1 bit from mask
|
||||
- if( bit != _A[i] ) return false; // Found many bits, so fail
|
||||
- }
|
||||
- }
|
||||
- // True for both the empty mask and for a single bit
|
||||
- return true;
|
||||
+bool RegMask::is_bound1() const {
|
||||
+ if (is_AllStack()) return false;
|
||||
+ return Size() == 1;
|
||||
}
|
||||
|
||||
//------------------------------is_bound2--------------------------------------
|
||||
// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
|
||||
-int RegMask::is_bound_pair() const {
|
||||
+bool RegMask::is_bound_pair() const {
|
||||
if( is_AllStack() ) return false;
|
||||
-
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
int bit = -1; // Set to hold the one bit allowed
|
||||
- for( int i = 0; i < RM_SIZE; i++ ) {
|
||||
- if( _A[i] ) { // Found some bits
|
||||
- if( bit != -1 ) return false; // Already had bits, so fail
|
||||
- bit = _A[i] & -(_A[i]); // Extract 1 bit from mask
|
||||
- if( (bit << 1) != 0 ) { // Bit pair stays in same word?
|
||||
+ for( int i = _lwm; i <= _hwm; i++ ) {
|
||||
+ if( _A[i] ) { // Found some bits
|
||||
+ if( bit != -1) return false; // Already had bits, so fail
|
||||
+ bit = _A[i] & -(_A[i]); // Extract 1 bit from mask
|
||||
+ if( (bit << 1) != 0 ) { // Bit pair stays in same word?
|
||||
if( (bit | (bit<<1)) != _A[i] )
|
||||
- return false; // Require adjacent bit pair and no more bits
|
||||
- } else { // Else its a split-pair case
|
||||
+ return false; // Require adjacent bit pair and no more bits
|
||||
+ } else { // Else its a split-pair case
|
||||
if( bit != _A[i] ) return false; // Found many bits, so fail
|
||||
- i++; // Skip iteration forward
|
||||
- if( i >= RM_SIZE || _A[i] != 1 )
|
||||
+ i++; // Skip iteration forward
|
||||
+ if( i > _hwm || _A[i] != 1 )
|
||||
return false; // Require 1 lo bit in next word
|
||||
}
|
||||
}
|
||||
@@ -247,31 +242,44 @@ int RegMask::is_bound_pair() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
+// Test for a single adjacent set of ideal register's size.
|
||||
+bool RegMask::is_bound(uint ireg) const {
|
||||
+ if (is_vector(ireg)) {
|
||||
+ if (is_bound_set(num_registers(ireg)))
|
||||
+ return true;
|
||||
+ } else if (is_bound1() || is_bound_pair()) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+
|
||||
static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
|
||||
-//------------------------------find_first_set---------------------------------
|
||||
+
|
||||
// Find the lowest-numbered register set in the mask. Return the
|
||||
// HIGHEST register number in the set, or BAD if no sets.
|
||||
// Works also for size 1.
|
||||
OptoReg::Name RegMask::find_first_set(const int size) const {
|
||||
- verify_sets(size);
|
||||
- for (int i = 0; i < RM_SIZE; i++) {
|
||||
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
if (_A[i]) { // Found some bits
|
||||
- int bit = _A[i] & -_A[i]; // Extract low bit
|
||||
// Convert to bit number, return hi bit in pair
|
||||
- return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
|
||||
+ return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(_A[i])+(size-1));
|
||||
}
|
||||
}
|
||||
return OptoReg::Bad;
|
||||
}
|
||||
|
||||
-//------------------------------clear_to_sets----------------------------------
|
||||
// Clear out partial bits; leave only aligned adjacent bit pairs
|
||||
void RegMask::clear_to_sets(const int size) {
|
||||
if (size == 1) return;
|
||||
assert(2 <= size && size <= 8, "update low bits table");
|
||||
assert(is_power_of_2(size), "sanity");
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
int low_bits_mask = low_bits[size>>2];
|
||||
- for (int i = 0; i < RM_SIZE; i++) {
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
int bits = _A[i];
|
||||
int sets = (bits & low_bits_mask);
|
||||
for (int j = 1; j < size; j++) {
|
||||
@@ -286,17 +294,17 @@ void RegMask::clear_to_sets(const int size) {
|
||||
}
|
||||
_A[i] = sets;
|
||||
}
|
||||
- verify_sets(size);
|
||||
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
||||
}
|
||||
|
||||
-//------------------------------smear_to_sets----------------------------------
|
||||
// Smear out partial bits to aligned adjacent bit sets
|
||||
void RegMask::smear_to_sets(const int size) {
|
||||
if (size == 1) return;
|
||||
assert(2 <= size && size <= 8, "update low bits table");
|
||||
assert(is_power_of_2(size), "sanity");
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
int low_bits_mask = low_bits[size>>2];
|
||||
- for (int i = 0; i < RM_SIZE; i++) {
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
int bits = _A[i];
|
||||
int sets = 0;
|
||||
for (int j = 0; j < size; j++) {
|
||||
@@ -312,17 +320,17 @@ void RegMask::smear_to_sets(const int size) {
|
||||
}
|
||||
_A[i] = sets;
|
||||
}
|
||||
- verify_sets(size);
|
||||
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
||||
}
|
||||
|
||||
-//------------------------------is_aligned_set--------------------------------
|
||||
+// Assert that the register mask contains only bit sets.
|
||||
bool RegMask::is_aligned_sets(const int size) const {
|
||||
if (size == 1) return true;
|
||||
assert(2 <= size && size <= 8, "update low bits table");
|
||||
assert(is_power_of_2(size), "sanity");
|
||||
int low_bits_mask = low_bits[size>>2];
|
||||
- // Assert that the register mask contains only bit sets.
|
||||
- for (int i = 0; i < RM_SIZE; i++) {
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
int bits = _A[i];
|
||||
while (bits) { // Check bits for pairing
|
||||
int bit = bits & -bits; // Extract low bit
|
||||
@@ -339,14 +347,14 @@ bool RegMask::is_aligned_sets(const int size) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
-//------------------------------is_bound_set-----------------------------------
|
||||
// Return TRUE if the mask contains one adjacent set of bits and no other bits.
|
||||
// Works also for size 1.
|
||||
int RegMask::is_bound_set(const int size) const {
|
||||
if( is_AllStack() ) return false;
|
||||
assert(1 <= size && size <= 8, "update low bits table");
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
int bit = -1; // Set to hold the one bit allowed
|
||||
- for (int i = 0; i < RM_SIZE; i++) {
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
if (_A[i] ) { // Found some bits
|
||||
if (bit != -1)
|
||||
return false; // Already had bits, so fail
|
||||
@@ -364,7 +372,7 @@ int RegMask::is_bound_set(const int size) const {
|
||||
int set = bit>>24;
|
||||
set = set & -set; // Remove sign extension.
|
||||
set = (((set << size) - 1) >> 8);
|
||||
- if (i >= RM_SIZE || _A[i] != set)
|
||||
+ if (i > _hwm || _A[i] != set)
|
||||
return false; // Require expected low bits in next word
|
||||
}
|
||||
}
|
||||
@@ -373,7 +381,6 @@ int RegMask::is_bound_set(const int size) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
-//------------------------------is_UP------------------------------------------
|
||||
// UP means register only, Register plus stack, or stack only is DOWN
|
||||
bool RegMask::is_UP() const {
|
||||
// Quick common case check for DOWN (any stack slot is legal)
|
||||
@@ -386,22 +393,22 @@ bool RegMask::is_UP() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
-//------------------------------Size-------------------------------------------
|
||||
// Compute size of register mask in bits
|
||||
uint RegMask::Size() const {
|
||||
extern uint8 bitsInByte[256];
|
||||
uint sum = 0;
|
||||
- for( int i = 0; i < RM_SIZE; i++ )
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for( int i = _lwm; i <= _hwm; i++ ) {
|
||||
sum +=
|
||||
bitsInByte[(_A[i]>>24) & 0xff] +
|
||||
bitsInByte[(_A[i]>>16) & 0xff] +
|
||||
bitsInByte[(_A[i]>> 8) & 0xff] +
|
||||
bitsInByte[ _A[i] & 0xff];
|
||||
+ }
|
||||
return sum;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
-//------------------------------print------------------------------------------
|
||||
void RegMask::dump(outputStream *st) const {
|
||||
st->print("[");
|
||||
RegMask rm = *this; // Structure copy into local temp
|
||||
diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp
|
||||
index 5ceebb3fb..6cef16ad7 100644
|
||||
--- a/hotspot/src/share/vm/opto/regmask.hpp
|
||||
+++ b/hotspot/src/share/vm/opto/regmask.hpp
|
||||
@@ -44,27 +44,12 @@
|
||||
# include "adfiles/adGlobals_ppc_64.hpp"
|
||||
#endif
|
||||
|
||||
-// Some fun naming (textual) substitutions:
|
||||
-//
|
||||
-// RegMask::get_low_elem() ==> RegMask::find_first_elem()
|
||||
-// RegMask::Special ==> RegMask::Empty
|
||||
-// RegMask::_flags ==> RegMask::is_AllStack()
|
||||
-// RegMask::operator<<=() ==> RegMask::Insert()
|
||||
-// RegMask::operator>>=() ==> RegMask::Remove()
|
||||
-// RegMask::Union() ==> RegMask::OR
|
||||
-// RegMask::Inter() ==> RegMask::AND
|
||||
-//
|
||||
-// OptoRegister::RegName ==> OptoReg::Name
|
||||
-//
|
||||
-// OptoReg::stack0() ==> _last_Mach_Reg or ZERO in core version
|
||||
-//
|
||||
-// numregs in chaitin ==> proper degree in chaitin
|
||||
|
||||
//-------------Non-zero bit search methods used by RegMask---------------------
|
||||
// Find lowest 1, or return 32 if empty
|
||||
int find_lowest_bit( uint32 mask );
|
||||
// Find highest 1, or return 32 if empty
|
||||
-int find_hihghest_bit( uint32 mask );
|
||||
+int find_highest_bit( uint32 mask );
|
||||
|
||||
//------------------------------RegMask----------------------------------------
|
||||
// The ADL file describes how to print the machine-specific registers, as well
|
||||
@@ -97,6 +82,12 @@ class RegMask VALUE_OBJ_CLASS_SPEC {
|
||||
|
||||
public:
|
||||
enum { CHUNK_SIZE = RM_SIZE*_WordBits };
|
||||
+ // The low and high water marks represents the lowest and highest word
|
||||
+ // that might contain set register mask bits, respectively. We guarantee
|
||||
+ // that there are no bits in words outside this range, but any word at
|
||||
+ // and between the two marks can still be 0.
|
||||
+ int _lwm;
|
||||
+ int _hwm;
|
||||
|
||||
// SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
|
||||
// Also, consider the maximum alignment size for a normally allocated
|
||||
@@ -126,13 +117,21 @@ public:
|
||||
# define BODY(I) _A[I] = a##I;
|
||||
FORALL_BODY
|
||||
# undef BODY
|
||||
+ _lwm = 0;
|
||||
+ _hwm = RM_SIZE - 1;
|
||||
+ while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
|
||||
+ while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++;
|
||||
+ assert(valid_watermarks(), "post-condition");
|
||||
}
|
||||
|
||||
// Handy copying constructor
|
||||
RegMask( RegMask *rm ) {
|
||||
-# define BODY(I) _A[I] = rm->_A[I];
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ _hwm = rm->_hwm;
|
||||
+ _lwm = rm->_lwm;
|
||||
+ for (int i = 0; i < RM_SIZE; i++) {
|
||||
+ _A[i] = rm->_A[i];
|
||||
+ }
|
||||
+ assert(valid_watermarks(), "post-condition");
|
||||
}
|
||||
|
||||
// Construct an empty mask
|
||||
@@ -162,30 +161,36 @@ public:
|
||||
|
||||
// Test for being a not-empty mask.
|
||||
int is_NotEmpty( ) const {
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
int tmp = 0;
|
||||
-# define BODY(I) tmp |= _A[I];
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
+ tmp |= _A[i];
|
||||
+ }
|
||||
return tmp;
|
||||
}
|
||||
|
||||
// Find lowest-numbered register from mask, or BAD if mask is empty.
|
||||
OptoReg::Name find_first_elem() const {
|
||||
- int base, bits;
|
||||
-# define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
- { base = OptoReg::Bad; bits = 1<<0; }
|
||||
- return OptoReg::Name(base + find_lowest_bit(bits));
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
+ int bits = _A[i];
|
||||
+ if (bits) {
|
||||
+ return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
|
||||
+ }
|
||||
+ }
|
||||
+ return OptoReg::Name(OptoReg::Bad);
|
||||
}
|
||||
+
|
||||
// Get highest-numbered register from mask, or BAD if mask is empty.
|
||||
OptoReg::Name find_last_elem() const {
|
||||
- int base, bits;
|
||||
-# define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
- { base = OptoReg::Bad; bits = 1<<0; }
|
||||
- return OptoReg::Name(base + find_hihghest_bit(bits));
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
+ for (int i = _hwm; i >= _lwm; i--) {
|
||||
+ int bits = _A[i];
|
||||
+ if (bits) {
|
||||
+ return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
|
||||
+ }
|
||||
+ }
|
||||
+ return OptoReg::Name(OptoReg::Bad);
|
||||
}
|
||||
|
||||
// Find the lowest-numbered register pair in the mask. Return the
|
||||
@@ -199,25 +204,34 @@ public:
|
||||
void smear_to_pairs();
|
||||
// Verify that the mask contains only aligned adjacent bit pairs
|
||||
void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
|
||||
+
|
||||
+#ifdef ASSERT
|
||||
+ // Verify watermarks are sane, i.e., within bounds and that no
|
||||
+ // register words below or above the watermarks have bits set.
|
||||
+ bool valid_watermarks() const {
|
||||
+ assert(_hwm >= 0 && _hwm < RM_SIZE, err_msg("_hwm out of range: %d", _hwm));
|
||||
+ assert(_lwm >= 0 && _lwm < RM_SIZE, err_msg("_lwm out of range: %d", _lwm));
|
||||
+ for (int i = 0; i < _lwm; i++) {
|
||||
+ assert(_A[i] == 0, err_msg("_lwm too high: %d regs at: %d", _lwm, i));
|
||||
+ }
|
||||
+ for (int i = _hwm + 1; i < RM_SIZE; i++) {
|
||||
+ assert(_A[i] == 0, err_msg("_hwm too low: %d regs at: %d", _hwm, i));
|
||||
+ }
|
||||
+ return true;
|
||||
+ }
|
||||
+#endif // !ASSERT
|
||||
+
|
||||
// Test that the mask contains only aligned adjacent bit pairs
|
||||
bool is_aligned_pairs() const;
|
||||
|
||||
// mask is a pair of misaligned registers
|
||||
- bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
|
||||
+ bool is_misaligned_pair() const;
|
||||
// Test for single register
|
||||
- int is_bound1() const;
|
||||
+ bool is_bound1() const;
|
||||
// Test for a single adjacent pair
|
||||
- int is_bound_pair() const;
|
||||
+ bool is_bound_pair() const;
|
||||
// Test for a single adjacent set of ideal register's size.
|
||||
- int is_bound(uint ireg) const {
|
||||
- if (is_vector(ireg)) {
|
||||
- if (is_bound_set(num_registers(ireg)))
|
||||
- return true;
|
||||
- } else if (is_bound1() || is_bound_pair()) {
|
||||
- return true;
|
||||
- }
|
||||
- return false;
|
||||
- }
|
||||
+ bool is_bound(uint ireg) const;
|
||||
|
||||
// Find the lowest-numbered register set in the mask. Return the
|
||||
// HIGHEST register number in the set, or BAD if no sets.
|
||||
@@ -228,8 +242,6 @@ public:
|
||||
void clear_to_sets(const int size);
|
||||
// Smear out partial bits to aligned adjacent bit sets.
|
||||
void smear_to_sets(const int size);
|
||||
- // Verify that the mask contains only aligned adjacent bit sets
|
||||
- void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
|
||||
// Test that the mask contains only aligned adjacent bit sets
|
||||
bool is_aligned_sets(const int size) const;
|
||||
|
||||
@@ -244,11 +256,14 @@ public:
|
||||
|
||||
// Fast overlap test. Non-zero if any registers in common.
|
||||
int overlap( const RegMask &rm ) const {
|
||||
- return
|
||||
-# define BODY(I) (_A[I] & rm._A[I]) |
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
- 0 ;
|
||||
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
|
||||
+ int hwm = MIN2(_hwm, rm._hwm);
|
||||
+ int lwm = MAX2(_lwm, rm._lwm);
|
||||
+ int result = 0;
|
||||
+ for (int i = lwm; i <= hwm; i++) {
|
||||
+ result |= _A[i] & rm._A[i];
|
||||
+ }
|
||||
+ return result;
|
||||
}
|
||||
|
||||
// Special test for register pressure based splitting
|
||||
@@ -257,22 +272,29 @@ public:
|
||||
|
||||
// Clear a register mask
|
||||
void Clear( ) {
|
||||
-# define BODY(I) _A[I] = 0;
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ _lwm = RM_SIZE - 1;
|
||||
+ _hwm = 0;
|
||||
+ memset(_A, 0, sizeof(int)*RM_SIZE);
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
}
|
||||
|
||||
// Fill a register mask with 1's
|
||||
void Set_All( ) {
|
||||
-# define BODY(I) _A[I] = -1;
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ _lwm = 0;
|
||||
+ _hwm = RM_SIZE - 1;
|
||||
+ memset(_A, 0xFF, sizeof(int)*RM_SIZE);
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
}
|
||||
|
||||
// Insert register into mask
|
||||
void Insert( OptoReg::Name reg ) {
|
||||
- assert( reg < CHUNK_SIZE, "" );
|
||||
- _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
|
||||
+ assert(reg < CHUNK_SIZE, "sanity");
|
||||
+ assert(valid_watermarks(), "pre-condition");
|
||||
+ int index = reg>>_LogWordBits;
|
||||
+ if (index > _hwm) _hwm = index;
|
||||
+ if (index < _lwm) _lwm = index;
|
||||
+ _A[index] |= (1<<(reg&(_WordBits-1)));
|
||||
+ assert(valid_watermarks(), "post-condition");
|
||||
}
|
||||
|
||||
// Remove register from mask
|
||||
@@ -283,23 +305,38 @@ public:
|
||||
|
||||
// OR 'rm' into 'this'
|
||||
void OR( const RegMask &rm ) {
|
||||
-# define BODY(I) this->_A[I] |= rm._A[I];
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
|
||||
+ // OR widens the live range
|
||||
+ if (_lwm > rm._lwm) _lwm = rm._lwm;
|
||||
+ if (_hwm < rm._hwm) _hwm = rm._hwm;
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
+ _A[i] |= rm._A[i];
|
||||
+ }
|
||||
+ assert(valid_watermarks(), "sanity");
|
||||
}
|
||||
|
||||
// AND 'rm' into 'this'
|
||||
void AND( const RegMask &rm ) {
|
||||
-# define BODY(I) this->_A[I] &= rm._A[I];
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
|
||||
+ // Do not evaluate words outside the current watermark range, as they are
|
||||
+ // already zero and an &= would not change that
|
||||
+ for (int i = _lwm; i <= _hwm; i++) {
|
||||
+ _A[i] &= rm._A[i];
|
||||
+ }
|
||||
+ // Narrow the watermarks if &rm spans a narrower range.
|
||||
+ // Update after to ensure non-overlapping words are zeroed out.
|
||||
+ if (_lwm < rm._lwm) _lwm = rm._lwm;
|
||||
+ if (_hwm > rm._hwm) _hwm = rm._hwm;
|
||||
}
|
||||
|
||||
// Subtract 'rm' from 'this'
|
||||
void SUBTRACT( const RegMask &rm ) {
|
||||
-# define BODY(I) _A[I] &= ~rm._A[I];
|
||||
- FORALL_BODY
|
||||
-# undef BODY
|
||||
+ assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
|
||||
+ int hwm = MIN2(_hwm, rm._hwm);
|
||||
+ int lwm = MAX2(_lwm, rm._lwm);
|
||||
+ for (int i = lwm; i <= hwm; i++) {
|
||||
+ _A[i] &= ~rm._A[i];
|
||||
+ }
|
||||
}
|
||||
|
||||
// Compute size of register mask: number of bits
|
||||
1186
8234003-Improve-IndexSet-iteration.patch
Normal file
1186
8234003-Improve-IndexSet-iteration.patch
Normal file
File diff suppressed because it is too large
Load Diff
479
Ddot-intrinsic-implement.patch
Normal file
479
Ddot-intrinsic-implement.patch
Normal file
@ -0,0 +1,479 @@
|
||||
diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
||||
index 1e9b1cb91..c0fd37d05 100644
|
||||
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
||||
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
||||
@@ -2061,6 +2061,14 @@ public:
|
||||
ld_st(Vt, T, a, op1, op2); \
|
||||
}
|
||||
|
||||
+ void ld1_d(FloatRegister Vt, int index, const Address &a) {
|
||||
+ starti;
|
||||
+ assert(index == 0 || index == 1, "Index must be 0 or 1 for Vx.2D");
|
||||
+ f(0, 31), f(index & 1, 30);
|
||||
+ f(0b001101110, 29, 21), rf(a.index(), 16), f(0b1000, 15, 12);
|
||||
+ f(0b01, 11, 10), rf(a.base(), 5), rf(Vt, 0);
|
||||
+ }
|
||||
+
|
||||
INSN1(ld1, 0b001100010, 0b0111);
|
||||
INSN2(ld1, 0b001100010, 0b1010);
|
||||
INSN3(ld1, 0b001100010, 0b0110);
|
||||
@@ -2186,6 +2194,13 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
+ void faddp_d(FloatRegister Vd, FloatRegister Vn) {
|
||||
+ starti;
|
||||
+ f(0b01, 31, 30), f(0b1111100, 29, 23), f(0b1, 22), f(0b11000, 21, 17);
|
||||
+ f(0b0110110, 16, 10);
|
||||
+ rf(Vn, 5), rf(Vd, 0);
|
||||
+ }
|
||||
+
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
|
||||
starti; \
|
||||
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
|
||||
index f2f85df60..873da580b 100644
|
||||
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
|
||||
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
|
||||
@@ -2853,6 +2853,124 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
|
||||
eor(crc, crc, tmp);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * Multiply and summation of 1 double-precision floating number pairs(sparse)
|
||||
+ */
|
||||
+void MacroAssembler::f2j_ddot_s1(Register dx, Register incx,
|
||||
+ Register dy, Register incy) {
|
||||
+ const FloatRegister tmpx = v2;
|
||||
+ const FloatRegister tmpy = v3;
|
||||
+
|
||||
+ ld1_d(tmpx, 0, Address(dx, incx));
|
||||
+ ld1_d(tmpy, 0, Address(dy, incy));
|
||||
+ fmaddd(v0, tmpx, tmpy, v0);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * Multiply and summation of 1 double-precision floating number pairs(dense)
|
||||
+ */
|
||||
+void MacroAssembler::f2j_ddot_d1(Register dx, Register dy, int size) {
|
||||
+ const FloatRegister tmpx = v2;
|
||||
+ const FloatRegister tmpy = v3;
|
||||
+
|
||||
+ ldrd(tmpx, post(dx, size));
|
||||
+ ldrd(tmpy, post(dy, size));
|
||||
+ fmaddd(v0, tmpx, tmpy, v0);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * Multiply and summation of 4 double-precision floating numbers
|
||||
+ */
|
||||
+void MacroAssembler::f2j_ddot_d4(Register dx, Register dy) {
|
||||
+ ld1(v2, v3, T2D, post(dx, 32));
|
||||
+ ld1(v4, v5, T2D, post(dy, 32));
|
||||
+ fmul(v2, T2D, v2, v4);
|
||||
+ fmul(v3, T2D, v3, v5);
|
||||
+ fadd(v0, T2D, v0, v2);
|
||||
+ fadd(v6, T2D, v6, v3);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * @param n register containing the number of doubles in array
|
||||
+ * @param dx register pointing to input array
|
||||
+ * @param incx register containing step len for dx
|
||||
+ * @param dy register pointing to another input array
|
||||
+ * @param incy register containing step len for dy
|
||||
+ * @param temp_reg register containing loop variable
|
||||
+ */
|
||||
+void MacroAssembler::f2j_ddot(Register n, Register dx, Register incx,
|
||||
+ Register dy, Register incy, Register temp_reg) {
|
||||
+ Label Ldot_EXIT, Ldot_S_BEGIN, Ldot_S1, Ldot_S10, Ldot_S4, Ldot_D_BEGIN,
|
||||
+ Ldot_D1, Ldot_D10, Ldot_D4;
|
||||
+
|
||||
+ const int SZ = 8;
|
||||
+
|
||||
+ enter();
|
||||
+ fmovd(v0, zr);
|
||||
+ fmovd(v6, v0);
|
||||
+
|
||||
+ cmp(n, zr);
|
||||
+ br(Assembler::LE, Ldot_EXIT);
|
||||
+
|
||||
+ cmp(incx, 1);
|
||||
+ br(Assembler::NE, Ldot_S_BEGIN);
|
||||
+ cmp(incy, 1);
|
||||
+ br(Assembler::NE, Ldot_S_BEGIN);
|
||||
+
|
||||
+ BIND(Ldot_D_BEGIN);
|
||||
+ asr(temp_reg, n, 2);
|
||||
+ cmp(temp_reg, zr);
|
||||
+ br(Assembler::LE, Ldot_D1);
|
||||
+
|
||||
+ BIND(Ldot_D4);
|
||||
+ f2j_ddot_d4(dx, dy);
|
||||
+ subs(temp_reg, temp_reg, 1);
|
||||
+ br(Assembler::NE, Ldot_D4);
|
||||
+
|
||||
+ fadd(v0, T2D, v0, v6);
|
||||
+ faddp_d(v0, v0);
|
||||
+
|
||||
+ BIND(Ldot_D1);
|
||||
+ ands(temp_reg, n, 3);
|
||||
+ br(Assembler::LE, Ldot_EXIT);
|
||||
+
|
||||
+ BIND(Ldot_D10);
|
||||
+ f2j_ddot_d1(dx, dy, SZ);
|
||||
+ subs(temp_reg, temp_reg, 1);
|
||||
+ br(Assembler::NE, Ldot_D10);
|
||||
+ leave();
|
||||
+ ret(lr);
|
||||
+
|
||||
+ BIND(Ldot_S_BEGIN);
|
||||
+ lsl(incx, incx, 3);
|
||||
+ lsl(incy, incy, 3);
|
||||
+
|
||||
+ asr(temp_reg, n, 2);
|
||||
+ cmp(temp_reg, zr);
|
||||
+ br(Assembler::LE, Ldot_S1);
|
||||
+
|
||||
+ BIND(Ldot_S4);
|
||||
+ f2j_ddot_s1(dx, incx, dy, incy);
|
||||
+ f2j_ddot_s1(dx, incx, dy, incy);
|
||||
+ f2j_ddot_s1(dx, incx, dy, incy);
|
||||
+ f2j_ddot_s1(dx, incx, dy, incy);
|
||||
+ subs(temp_reg, temp_reg, 1);
|
||||
+ br(Assembler::NE, Ldot_S4);
|
||||
+
|
||||
+ BIND(Ldot_S1);
|
||||
+ ands(temp_reg, n, 3);
|
||||
+ br(Assembler::LE, Ldot_EXIT);
|
||||
+
|
||||
+ BIND(Ldot_S10);
|
||||
+ f2j_ddot_s1(dx, incx, dy, incy);
|
||||
+ subs(temp_reg, temp_reg, 1);
|
||||
+ br(Assembler::NE, Ldot_S10);
|
||||
+
|
||||
+ BIND(Ldot_EXIT);
|
||||
+ leave();
|
||||
+ ret(lr);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* @param crc register containing existing CRC (32-bit)
|
||||
* @param buf register pointing to input byte buffer (byte*)
|
||||
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
|
||||
index 388177589..1abc7e3b0 100644
|
||||
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
|
||||
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
|
||||
@@ -1180,6 +1180,9 @@ public:
|
||||
Register table0, Register table1, Register table2, Register table3,
|
||||
bool upper = false);
|
||||
|
||||
+ void f2j_ddot(Register n, Register dx, Register incx,
|
||||
+ Register dy, Register incy, Register temp_reg);
|
||||
+
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1);
|
||||
@@ -1236,6 +1239,11 @@ private:
|
||||
// Uses rscratch2 if the address is not directly reachable
|
||||
Address spill_address(int size, int offset, Register tmp=rscratch2);
|
||||
|
||||
+private:
|
||||
+ void f2j_ddot_s1(Register dx, Register incx, Register dy, Register incy);
|
||||
+ void f2j_ddot_d1(Register dx, Register dy, int size);
|
||||
+ void f2j_ddot_d4(Register dx, Register dy);
|
||||
+
|
||||
public:
|
||||
void spill(Register Rx, bool is64, int offset) {
|
||||
if (is64) {
|
||||
diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
||||
index 0d73c0c0c..337d5c1dd 100644
|
||||
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
||||
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
||||
@@ -45,6 +45,7 @@
|
||||
|
||||
#include "stubRoutines_aarch64.hpp"
|
||||
|
||||
+
|
||||
#ifdef COMPILER2
|
||||
#include "opto/runtime.hpp"
|
||||
#endif
|
||||
@@ -3220,6 +3221,39 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
+ /**
|
||||
+ * Arguments:
|
||||
+ *
|
||||
+ * Inputs:
|
||||
+ * c_rarg0 - int n
|
||||
+ * c_rarg1 - double[] dx
|
||||
+ * c_rarg2 - int incx
|
||||
+ * c_rarg3 - double[] dy
|
||||
+ * c_rarg4 - int incy
|
||||
+ *
|
||||
+ * Output:
|
||||
+ * d0 - ddot result
|
||||
+ *
|
||||
+ */
|
||||
+ address generate_ddotF2jBLAS() {
|
||||
+ __ align(CodeEntryAlignment);
|
||||
+ StubCodeMark mark(this, "StubRoutines", "f2jblas_ddot");
|
||||
+
|
||||
+ address start = __ pc();
|
||||
+
|
||||
+ const Register n = c_rarg0;
|
||||
+ const Register dx = c_rarg1;
|
||||
+ const Register incx = c_rarg2;
|
||||
+ const Register dy = c_rarg3;
|
||||
+ const Register incy = c_rarg4;
|
||||
+
|
||||
+ BLOCK_COMMENT("Entry:");
|
||||
+
|
||||
+ __ f2j_ddot(n, dx, incx, dy, incy, rscratch2);
|
||||
+
|
||||
+ return start;
|
||||
+ }
|
||||
+
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
@@ -4262,6 +4296,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_montgomerySquare = g.generate_multiply();
|
||||
}
|
||||
|
||||
+ if (UseF2jBLASIntrinsics) {
|
||||
+ StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS();
|
||||
+ }
|
||||
+
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
|
||||
index 148f9212e..6bd8dbedd 100644
|
||||
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
|
||||
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
|
||||
@@ -852,6 +852,12 @@
|
||||
do_name( implCompress_name, "implCompress0") \
|
||||
do_signature(implCompress_signature, "([BI)V") \
|
||||
\
|
||||
+ /* support for com.github.fommil.netlib.F2jBLAS */ \
|
||||
+ do_class(com_github_fommil_netlib_f2jblas, "com/github/fommil/netlib/F2jBLAS") \
|
||||
+ do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R) \
|
||||
+ do_name( ddot_name, "ddot") \
|
||||
+ do_signature(ddot_signature, "(I[DI[DI)D") \
|
||||
+ \
|
||||
/* support for sun.security.provider.SHA2 */ \
|
||||
do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \
|
||||
do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R) \
|
||||
diff --git a/hotspot/src/share/vm/oops/method.cpp b/hotspot/src/share/vm/oops/method.cpp
|
||||
index 24fae4d30..64cdae9c7 100644
|
||||
--- a/hotspot/src/share/vm/oops/method.cpp
|
||||
+++ b/hotspot/src/share/vm/oops/method.cpp
|
||||
@@ -1281,7 +1281,9 @@ vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
|
||||
// which does not use the class default class loader so we check for its loader here
|
||||
InstanceKlass* ik = InstanceKlass::cast(holder);
|
||||
if ((ik->class_loader() != NULL) && !SystemDictionary::is_ext_class_loader(ik->class_loader())) {
|
||||
- return vmSymbols::NO_SID; // regardless of name, no intrinsics here
|
||||
+ if (!EnableIntrinsicExternal) {
|
||||
+ return vmSymbols::NO_SID; // regardless of name, no intrinsics here
|
||||
+ }
|
||||
}
|
||||
|
||||
// see if the klass name is well-known:
|
||||
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
|
||||
index 9ef1c5e69..aa1b1ac3a 100644
|
||||
--- a/hotspot/src/share/vm/opto/escape.cpp
|
||||
+++ b/hotspot/src/share/vm/opto/escape.cpp
|
||||
@@ -978,7 +978,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
||||
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
|
||||
- strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
|
||||
+ strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
|
||||
+ strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0)
|
||||
))) {
|
||||
call->dump();
|
||||
fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
|
||||
diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
|
||||
index 89ebabe6f..5cbc0f012 100644
|
||||
--- a/hotspot/src/share/vm/opto/library_call.cpp
|
||||
+++ b/hotspot/src/share/vm/opto/library_call.cpp
|
||||
@@ -335,6 +335,7 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_mulAdd();
|
||||
bool inline_montgomeryMultiply();
|
||||
bool inline_montgomerySquare();
|
||||
+ bool inline_ddotF2jBLAS();
|
||||
|
||||
bool inline_profileBoolean();
|
||||
};
|
||||
@@ -587,6 +588,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
||||
if (!UseCRC32Intrinsics) return NULL;
|
||||
break;
|
||||
|
||||
+ case vmIntrinsics::_f2jblas_ddot:
|
||||
+ if (!UseF2jBLASIntrinsics) return NULL;
|
||||
+ break;
|
||||
+
|
||||
case vmIntrinsics::_incrementExactI:
|
||||
case vmIntrinsics::_addExactI:
|
||||
if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
|
||||
@@ -983,6 +988,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
|
||||
case vmIntrinsics::_profileBoolean:
|
||||
return inline_profileBoolean();
|
||||
+ case vmIntrinsics::_f2jblas_ddot:
|
||||
+ return inline_ddotF2jBLAS();
|
||||
|
||||
default:
|
||||
// If you get here, it may be that someone has added a new intrinsic
|
||||
@@ -6303,6 +6310,49 @@ bool LibraryCallKit::inline_updateBytesCRC32() {
|
||||
return true;
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * double com.github.fommil.netlib.F2jBLAS.ddot(int n, double[] dx, int incx, double[] dy, int incy)
|
||||
+ */
|
||||
+bool LibraryCallKit::inline_ddotF2jBLAS() {
|
||||
+ assert(callee()->signature()->size() == 5, "update has 5 parameters");
|
||||
+ Node* n = argument(1); // type: int
|
||||
+ Node* dx = argument(2); // type: double[]
|
||||
+ Node* incx = argument(3); // type: int
|
||||
+ Node* dy = argument(4); // type: double[]
|
||||
+ Node* incy = argument(5); // type: int
|
||||
+
|
||||
+ const Type* dx_type = dx->Value(&_gvn);
|
||||
+ const Type* dy_type = dy->Value(&_gvn);
|
||||
+ const TypeAryPtr* dx_top_src = dx_type->isa_aryptr();
|
||||
+ const TypeAryPtr* dy_top_src = dy_type->isa_aryptr();
|
||||
+ if (dx_top_src == NULL || dx_top_src->klass() == NULL ||
|
||||
+ dy_top_src == NULL || dy_top_src->klass() == NULL) {
|
||||
+ // failed array check
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ // Figure out the size and type of the elements we will be copying.
|
||||
+ BasicType dx_elem = dx_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||
+ BasicType dy_elem = dy_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||
+ if (dx_elem != T_DOUBLE || dy_elem != T_DOUBLE) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ // 'dx_start' points to dx array + scaled offset
|
||||
+ Node* dx_start = array_element_address(dx, intcon(0), dx_elem);
|
||||
+ Node* dy_start = array_element_address(dy, intcon(0), dy_elem);
|
||||
+
|
||||
+ address stubAddr = StubRoutines::ddotF2jBLAS();
|
||||
+ const char *stubName = "f2jblas_ddot";
|
||||
+ Node* call;
|
||||
+ call = make_runtime_call(RC_LEAF, OptoRuntime::ddotF2jBLAS_Type(),
|
||||
+ stubAddr, stubName, TypePtr::BOTTOM,
|
||||
+ n, dx_start, incx, dy_start, incy);
|
||||
+ Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
|
||||
+ set_result(result);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* Calculate CRC32 for ByteBuffer.
|
||||
* int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
|
||||
diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
|
||||
index ba8f42e49..f1fe4d666 100644
|
||||
--- a/hotspot/src/share/vm/opto/runtime.cpp
|
||||
+++ b/hotspot/src/share/vm/opto/runtime.cpp
|
||||
@@ -920,6 +920,30 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * double ddot(int n, double *dx, int incx, double *dy, int incy)
|
||||
+ */
|
||||
+const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() {
|
||||
+ // create input type (domain)
|
||||
+ int num_args = 5;
|
||||
+ int argcnt = num_args;
|
||||
+ const Type** fields = TypeTuple::fields(argcnt);
|
||||
+ int argp = TypeFunc::Parms;
|
||||
+ fields[argp++] = TypeInt::INT; // n
|
||||
+ fields[argp++] = TypeAryPtr::DOUBLES; // dx
|
||||
+ fields[argp++] = TypeInt::INT; // incx
|
||||
+ fields[argp++] = TypeAryPtr::DOUBLES; // dy
|
||||
+ fields[argp++] = TypeInt::INT; // incy
|
||||
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
||||
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
|
||||
+
|
||||
+ // result type needed
|
||||
+ fields = TypeTuple::fields(1);
|
||||
+ fields[TypeFunc::Parms + 0] = Type::DOUBLE;
|
||||
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
|
||||
+ return TypeFunc::make(domain, range);
|
||||
+}
|
||||
+
|
||||
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
|
||||
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
|
||||
// create input type (domain)
|
||||
diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
|
||||
index e3bdfdf9c..66d393c5c 100644
|
||||
--- a/hotspot/src/share/vm/opto/runtime.hpp
|
||||
+++ b/hotspot/src/share/vm/opto/runtime.hpp
|
||||
@@ -317,6 +317,8 @@ private:
|
||||
|
||||
static const TypeFunc* updateBytesCRC32_Type();
|
||||
|
||||
+ static const TypeFunc* ddotF2jBLAS_Type();
|
||||
+
|
||||
// leaf on stack replacement interpreter accessor types
|
||||
static const TypeFunc* osr_end_Type();
|
||||
|
||||
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
|
||||
index 7b17e623b..520cc3187 100644
|
||||
--- a/hotspot/src/share/vm/runtime/globals.hpp
|
||||
+++ b/hotspot/src/share/vm/runtime/globals.hpp
|
||||
@@ -743,6 +743,12 @@ class CommandLineFlags {
|
||||
product(bool, UseCRC32Intrinsics, false, \
|
||||
"use intrinsics for java.util.zip.CRC32") \
|
||||
\
|
||||
+ experimental(bool, UseF2jBLASIntrinsics, false, \
|
||||
+ "use intrinsics for com.github.fommil.netlib.F2jBLAS on aarch64") \
|
||||
+ \
|
||||
+ experimental(bool, EnableIntrinsicExternal, false, \
|
||||
+ "enable intrinsics for methods of external packages") \
|
||||
+ \
|
||||
develop(bool, TraceCallFixup, false, \
|
||||
"Trace all call fixups") \
|
||||
\
|
||||
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
|
||||
index d943248da..10f438bc5 100644
|
||||
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
|
||||
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
|
||||
@@ -136,6 +136,8 @@ address StubRoutines::_sha512_implCompressMB = NULL;
|
||||
address StubRoutines::_updateBytesCRC32 = NULL;
|
||||
address StubRoutines::_crc_table_adr = NULL;
|
||||
|
||||
+address StubRoutines::_ddotF2jBLAS = NULL;
|
||||
+
|
||||
address StubRoutines::_multiplyToLen = NULL;
|
||||
address StubRoutines::_squareToLen = NULL;
|
||||
address StubRoutines::_mulAdd = NULL;
|
||||
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
|
||||
index e18b9127d..a4eeb910d 100644
|
||||
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
|
||||
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
|
||||
@@ -214,6 +214,8 @@ class StubRoutines: AllStatic {
|
||||
static address _updateBytesCRC32;
|
||||
static address _crc_table_adr;
|
||||
|
||||
+ static address _ddotF2jBLAS;
|
||||
+
|
||||
static address _multiplyToLen;
|
||||
static address _squareToLen;
|
||||
static address _mulAdd;
|
||||
@@ -377,6 +379,8 @@ class StubRoutines: AllStatic {
|
||||
static address updateBytesCRC32() { return _updateBytesCRC32; }
|
||||
static address crc_table_addr() { return _crc_table_adr; }
|
||||
|
||||
+ static address ddotF2jBLAS() { return _ddotF2jBLAS; }
|
||||
+
|
||||
static address multiplyToLen() {return _multiplyToLen; }
|
||||
static address squareToLen() {return _squareToLen; }
|
||||
static address mulAdd() {return _mulAdd; }
|
||||
@ -0,0 +1,46 @@
|
||||
diff --git a/jdk/src/share/classes/java/lang/Long.java b/jdk/src/share/classes/java/lang/Long.java
|
||||
index 58c2cc3ba..7b6e14a97 100644
|
||||
--- a/jdk/src/share/classes/java/lang/Long.java
|
||||
+++ b/jdk/src/share/classes/java/lang/Long.java
|
||||
@@ -812,12 +812,11 @@ public final class Long extends Number implements Comparable<Long> {
|
||||
static final Long cache[];
|
||||
|
||||
static {
|
||||
-
|
||||
+ int h = 127;
|
||||
String longCacheHighPropValue =
|
||||
sun.misc.VM.getSavedProperty("java.lang.Long.LongCache.high");
|
||||
if (longCacheHighPropValue != null) {
|
||||
// high value may be configured by property
|
||||
- int h = 0;
|
||||
try {
|
||||
int i = Integer.parseInt(longCacheHighPropValue);
|
||||
i = Math.max(i, 127);
|
||||
@@ -826,21 +825,13 @@ public final class Long extends Number implements Comparable<Long> {
|
||||
} catch( NumberFormatException nfe) {
|
||||
// If the property cannot be parsed into an int, ignore it.
|
||||
}
|
||||
- high = h;
|
||||
- low = -h - 1;
|
||||
- cache = new Long[(high - low) + 1];
|
||||
- int j = low;
|
||||
- for(int k = 0; k < cache.length; k++)
|
||||
- cache[k] = new Long(j++);
|
||||
-
|
||||
- } else {
|
||||
- low = -128;
|
||||
- high = 127;
|
||||
- cache = new Long[(high - low) + 1];
|
||||
- int j = low;
|
||||
- for(int k = 0; k < cache.length; k++)
|
||||
- cache[k] = new Long(j++);
|
||||
}
|
||||
+ high = h;
|
||||
+ low = -h - 1;
|
||||
+ cache = new Long[(high - low) + 1];
|
||||
+ int j = low;
|
||||
+ for(int k = 0; k < cache.length; k++)
|
||||
+ cache[k] = new Long(j++);
|
||||
}
|
||||
}
|
||||
|
||||
@ -915,7 +915,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r
|
||||
|
||||
Name: java-%{javaver}-%{origin}
|
||||
Version: %{javaver}.%{updatever}.%{buildver}
|
||||
Release: 0
|
||||
Release: 1
|
||||
# java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
|
||||
# and this change was brought into RHEL-4. java-1.5.0-ibm packages
|
||||
# also included the epoch in their virtual provides. This created a
|
||||
@ -1037,6 +1037,13 @@ Patch95: 8205921-Optimizing-best-of-2-work-stealing-queue-selection.patch
|
||||
|
||||
# 8u265
|
||||
Patch96: fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
|
||||
Patch97: leaf-optimize-in-ParallelScanvageGC.patch
|
||||
Patch98: 8046294-Generate-the-4-byte-timestamp-randomly.patch
|
||||
Patch100: 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
|
||||
Patch102: fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
|
||||
Patch103: Ddot-intrinsic-implement.patch
|
||||
Patch104: 8234003-Improve-IndexSet-iteration.patch
|
||||
Patch105: 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
|
||||
|
||||
#############################################
|
||||
#
|
||||
@ -1432,6 +1439,14 @@ pushd %{top_level_dir_name}
|
||||
%patch94 -p1
|
||||
%patch95 -p1
|
||||
%patch96 -p1
|
||||
%patch97 -p1
|
||||
%patch98 -p1
|
||||
%patch100 -p1
|
||||
%patch102 -p1
|
||||
%patch103 -p1
|
||||
%patch104 -p1
|
||||
%patch105 -p1
|
||||
|
||||
|
||||
popd
|
||||
|
||||
@ -2051,6 +2066,16 @@ require "copy_jdk_configs.lua"
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Tue Aug 29 2020 jdkboy <guoge1@huawei.com> - 1:1.8.0.265-b10.1
|
||||
- Add leaf-optimize-in-ParallelScanvageGC.patch
|
||||
- Add 8046294-Generate-the-4-byte-timestamp-randomly.patch
|
||||
- Add 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
|
||||
- Add fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
|
||||
- Add Ddot-intrinsic-implement.patch
|
||||
- Add 8234003-Improve-IndexSet-iteration.patch
|
||||
- Add 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
|
||||
- Remove prohibition-of-irreducible-loop-in-mergers.patch
|
||||
|
||||
* Tue Aug 25 2020 noah <hedongbo@huawei.com> - 1:1.8.0.265-b10.0
|
||||
- Update to aarch64-shenandoah-jdk8u-8u265-b01
|
||||
- add fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
|
||||
|
||||
210
leaf-optimize-in-ParallelScanvageGC.patch
Normal file
210
leaf-optimize-in-ParallelScanvageGC.patch
Normal file
@ -0,0 +1,210 @@
|
||||
diff --git a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
|
||||
index fba64e15f..1c92314f9 100644
|
||||
--- a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
|
||||
+++ b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
|
||||
@@ -131,6 +131,14 @@ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t*
|
||||
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
|
||||
}
|
||||
|
||||
+inline intptr_t Atomic::relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value)
|
||||
+{
|
||||
+ intptr_t value = compare_value;
|
||||
+ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */false,
|
||||
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
||||
+ return value;
|
||||
+}
|
||||
+
|
||||
inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value)
|
||||
{
|
||||
return (void *) cmpxchg_ptr((intptr_t) exchange_value,
|
||||
diff --git a/hotspot/src/share/vm/classfile/classFileParser.cpp b/hotspot/src/share/vm/classfile/classFileParser.cpp
|
||||
index 07d07e4f2..f001a94e7 100644
|
||||
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
|
||||
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
|
||||
@@ -4393,6 +4393,11 @@ void ClassFileParser::fill_oop_maps(instanceKlassHandle k,
|
||||
OopMapBlock* this_oop_map = k->start_of_nonstatic_oop_maps();
|
||||
const InstanceKlass* const super = k->superklass();
|
||||
const unsigned int super_count = super ? super->nonstatic_oop_map_count() : 0;
|
||||
+
|
||||
+ const bool super_is_gc_leaf = super ? super->oop_is_gc_leaf() : true;
|
||||
+ bool this_is_gc_leaf = super_is_gc_leaf && (nonstatic_oop_map_count == 0);
|
||||
+ k->set_oop_is_gc_leaf(this_is_gc_leaf);
|
||||
+
|
||||
if (super_count > 0) {
|
||||
// Copy maps from superklass
|
||||
OopMapBlock* super_oop_map = super->start_of_nonstatic_oop_maps();
|
||||
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
|
||||
index b2de74d41..dde9ac426 100644
|
||||
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
|
||||
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
|
||||
@@ -49,7 +49,12 @@ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
|
||||
}
|
||||
oopDesc::encode_store_heap_oop_not_null(p, o);
|
||||
} else {
|
||||
- push_depth(p);
|
||||
+ // leaf object copy in advanced, reduce cost of push and pop
|
||||
+ if (!o->klass()->oop_is_gc_leaf()) {
|
||||
+ push_depth(p);
|
||||
+ } else {
|
||||
+ PSScavenge::copy_and_push_safe_barrier<T, false>(this, p);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -171,7 +176,15 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
|
||||
Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
|
||||
|
||||
// Now we have to CAS in the header.
|
||||
+#ifdef AARCH64
|
||||
+ // CAS with memory fence cost a lot within copy_to_survivor_space on aarch64.
|
||||
+ // To minimize the cost, we use a normal CAS to do object forwarding, plus a
|
||||
+ // memory fence only upon CAS succeeds. To further reduce the fence insertion,
|
||||
+ // we can skip the fence insertion for leaf objects (objects don't have reference fields).
|
||||
+ if (o->relax_cas_forward_to(new_obj, test_mark)) {
|
||||
+#else
|
||||
if (o->cas_forward_to(new_obj, test_mark)) {
|
||||
+#endif
|
||||
// We won any races, we "own" this object.
|
||||
assert(new_obj == o->forwardee(), "Sanity");
|
||||
|
||||
@@ -195,10 +208,13 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
|
||||
push_depth(masked_o);
|
||||
TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
|
||||
} else {
|
||||
- // we'll just push its contents
|
||||
- new_obj->push_contents(this);
|
||||
+ // leaf object don't have contents, never need push_contents
|
||||
+ if (!o->klass()->oop_is_gc_leaf()) {
|
||||
+ // we'll just push its contents
|
||||
+ new_obj->push_contents(this);
|
||||
+ }
|
||||
}
|
||||
- } else {
|
||||
+ } else {
|
||||
// We lost, someone else "owns" this object
|
||||
guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed.");
|
||||
|
||||
diff --git a/hotspot/src/share/vm/oops/klass.cpp b/hotspot/src/share/vm/oops/klass.cpp
|
||||
index 7fda7ce62..6e8f9acde 100644
|
||||
--- a/hotspot/src/share/vm/oops/klass.cpp
|
||||
+++ b/hotspot/src/share/vm/oops/klass.cpp
|
||||
@@ -207,6 +207,8 @@ Klass::Klass() {
|
||||
clear_modified_oops();
|
||||
clear_accumulated_modified_oops();
|
||||
_shared_class_path_index = -1;
|
||||
+
|
||||
+ set_oop_is_gc_leaf(false);
|
||||
}
|
||||
|
||||
jint Klass::array_layout_helper(BasicType etype) {
|
||||
diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp
|
||||
index 22ae48f5c..4aea54795 100644
|
||||
--- a/hotspot/src/share/vm/oops/klass.hpp
|
||||
+++ b/hotspot/src/share/vm/oops/klass.hpp
|
||||
@@ -177,6 +177,8 @@ class Klass : public Metadata {
|
||||
jbyte _modified_oops; // Card Table Equivalent (YC/CMS support)
|
||||
jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
|
||||
|
||||
+ bool _is_gc_leaf;
|
||||
+
|
||||
private:
|
||||
// This is an index into FileMapHeader::_classpath_entry_table[], to
|
||||
// associate this class with the JAR file where it's loaded from during
|
||||
@@ -569,6 +571,9 @@ protected:
|
||||
oop_is_typeArray_slow()); }
|
||||
#undef assert_same_query
|
||||
|
||||
+ void set_oop_is_gc_leaf(bool is_gc_leaf) { _is_gc_leaf = is_gc_leaf; }
|
||||
+ inline bool oop_is_gc_leaf() const { return _is_gc_leaf; }
|
||||
+
|
||||
// Access flags
|
||||
AccessFlags access_flags() const { return _access_flags; }
|
||||
void set_access_flags(AccessFlags flags) { _access_flags = flags; }
|
||||
diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
|
||||
index a703a54ef..41a7bce4d 100644
|
||||
--- a/hotspot/src/share/vm/oops/oop.hpp
|
||||
+++ b/hotspot/src/share/vm/oops/oop.hpp
|
||||
@@ -76,6 +76,9 @@ class oopDesc {
|
||||
|
||||
void release_set_mark(markOop m);
|
||||
markOop cas_set_mark(markOop new_mark, markOop old_mark);
|
||||
+#ifdef AARCH64
|
||||
+ markOop relax_cas_set_mark(markOop new_mark, markOop old_mark);
|
||||
+#endif
|
||||
|
||||
// Used only to re-initialize the mark word (e.g., of promoted
|
||||
// objects during a GC) -- requires a valid klass pointer
|
||||
@@ -317,6 +320,10 @@ class oopDesc {
|
||||
void forward_to(oop p);
|
||||
bool cas_forward_to(oop p, markOop compare);
|
||||
|
||||
+#ifdef AARCH64
|
||||
+ bool relax_cas_forward_to(oop p, markOop compare);
|
||||
+#endif
|
||||
+
|
||||
#if INCLUDE_ALL_GCS
|
||||
// Like "forward_to", but inserts the forwarding pointer atomically.
|
||||
// Exactly one thread succeeds in inserting the forwarding pointer, and
|
||||
diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp
|
||||
index d4c4d75c0..c3abdb128 100644
|
||||
--- a/hotspot/src/share/vm/oops/oop.inline.hpp
|
||||
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp
|
||||
@@ -76,6 +76,12 @@ inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) {
|
||||
return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark);
|
||||
}
|
||||
|
||||
+#ifdef AARCH64
|
||||
+inline markOop oopDesc::relax_cas_set_mark(markOop new_mark, markOop old_mark) {
|
||||
+ return (markOop)Atomic::relax_cmpxchg_ptr((intptr_t)new_mark, (volatile intptr_t*)&_mark, (intptr_t)old_mark);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
inline Klass* oopDesc::klass() const {
|
||||
if (UseCompressedClassPointers) {
|
||||
return Klass::decode_klass_not_null(_metadata._compressed_klass);
|
||||
@@ -715,6 +721,30 @@ inline bool oopDesc::cas_forward_to(oop p, markOop compare) {
|
||||
return cas_set_mark(m, compare) == compare;
|
||||
}
|
||||
|
||||
+#ifdef AARCH64
|
||||
+inline bool oopDesc::relax_cas_forward_to(oop p, markOop compare) {
|
||||
+ assert(check_obj_alignment(p),
|
||||
+ "forwarding to something not aligned");
|
||||
+ assert(Universe::heap()->is_in_reserved(p),
|
||||
+ "forwarding to something not in heap");
|
||||
+ markOop m = markOopDesc::encode_pointer_as_mark(p);
|
||||
+ assert(m->decode_pointer() == p, "encoding must be reversable");
|
||||
+ markOop old_markoop = relax_cas_set_mark(m, compare);
|
||||
+ // If CAS succeeded, we must ensure the copy visible to threads reading the forwardee.
|
||||
+ // (We might delay the fence insertion till pushing contents to task stack as other threads
|
||||
+ // only need to touch the copied object after stolen the task.)
|
||||
+ if (old_markoop == compare) {
|
||||
+ // Once the CAS succeeds, leaf object never needs to be visible to other threads (finished
|
||||
+ // collection by current thread), so we can save the fence.
|
||||
+ if (!p->klass()->oop_is_gc_leaf()) {
|
||||
+ OrderAccess::fence();
|
||||
+ }
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
// Note that the forwardee is not the same thing as the displaced_mark.
|
||||
// The forwardee is used when copying during scavenge and mark-sweep.
|
||||
// It does need to clear the low two locking- and GC-related bits.
|
||||
diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp
|
||||
index 9ca5fce97..015178b61 100644
|
||||
--- a/hotspot/src/share/vm/runtime/atomic.hpp
|
||||
+++ b/hotspot/src/share/vm/runtime/atomic.hpp
|
||||
@@ -94,6 +94,10 @@ class Atomic : AllStatic {
|
||||
unsigned int compare_value);
|
||||
|
||||
inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
|
||||
+#ifdef AARCH64
|
||||
+ inline static intptr_t relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
|
||||
+#endif
|
||||
+
|
||||
inline static void* cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value);
|
||||
};
|
||||
|
||||
@ -1,27 +0,0 @@
|
||||
From 34712f6bbc3c2c664ee641c78d4a2f8cfe427880 Mon Sep 17 00:00:00 2001
|
||||
Date: Fri, 28 Feb 2020 15:17:44 +0000
|
||||
Subject: [PATCH] prohibition of irreducible loop in mergers
|
||||
|
||||
Summary: C2Compiler: irreducible loop should not enter merge_many_backedges
|
||||
LLT: NA
|
||||
Bug url: NA
|
||||
---
|
||||
hotspot/src/share/vm/opto/loopnode.cpp | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp
|
||||
index e2c0645cf8..bbb2e2bf98 100644
|
||||
--- a/hotspot/src/share/vm/opto/loopnode.cpp
|
||||
+++ b/hotspot/src/share/vm/opto/loopnode.cpp
|
||||
@@ -1542,7 +1542,7 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
|
||||
// If I am a shared header (multiple backedges), peel off the many
|
||||
// backedges into a private merge point and use the merge point as
|
||||
// the one true backedge.
|
||||
- if( _head->req() > 3 ) {
|
||||
+ if( _head->req() > 3 && !_irreducible) {
|
||||
// Merge the many backedges into a single backedge but leave
|
||||
// the hottest backedge as separate edge for the following peel.
|
||||
merge_many_backedges( phase );
|
||||
--
|
||||
2.12.3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user