openjdk-1.8.0/8160369.patch

445 lines
20 KiB
Diff

From be5a699028cd0b8fd49eb2df0c4b3d1653eca4f3 Mon Sep 17 00:00:00 2001
Date: Mon, 25 Jan 2021 17:22:52 +0800
Subject: Backport of JDK-8160369
Summary:<GC>:[Backport of JDK-8160369 and it's subtasks] Memory fences needed around setting and reading object lengths
LLT:
bug url: https://bugs.openjdk.java.net/browse/JDK-8160369
---
.../g1/g1BlockOffsetTable.cpp | 2 +-
.../g1/g1BlockOffsetTable.inline.hpp | 4 +-
.../vm/gc_implementation/g1/g1RemSet.cpp | 101 +++++++++----
.../vm/gc_implementation/g1/heapRegion.cpp | 140 ++++++++++--------
.../vm/gc_implementation/g1/heapRegion.hpp | 2 +
.../gc_implementation/g1/heapRegionType.hpp | 3 +
.../parNew/parNewGeneration.cpp | 21 ++-
7 files changed, 173 insertions(+), 100 deletions(-)
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
index ead98e24a..1977fc83d 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
@@ -264,7 +264,7 @@ G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
while (n <= next_boundary) {
q = n;
oop obj = oop(q);
- if (obj->klass_or_null() == NULL) return q;
+ if (obj->klass_or_null_acquire() == NULL) return q;
n += block_size(q);
}
assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
index bcfd52a4a..ffc56a0ba 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
@@ -166,7 +166,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
while (n <= addr) {
q = n;
oop obj = oop(q);
- if (obj->klass_or_null() == NULL) return q;
+ if (obj->klass_or_null_acquire() == NULL) return q;
n += block_size(q);
}
assert(q <= n, "wrong order for q and addr");
@@ -177,7 +177,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
inline HeapWord*
G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
const void* addr) {
- if (oop(q)->klass_or_null() == NULL) return q;
+ if (oop(q)->klass_or_null_acquire() == NULL) return q;
HeapWord* n = q + block_size(q);
// In the normal case, where the query "addr" is a card boundary, and the
// offset table chunks are the same size as cards, the block starting at
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
index 4cad9234c..b062947c8 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@@ -460,18 +460,26 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
// And find the region containing it.
HeapRegion* r = _g1->heap_region_containing(start);
- // Why do we have to check here whether a card is on a young region,
- // given that we dirty young regions and, as a result, the
- // post-barrier is supposed to filter them out and never to enqueue
- // them? When we allocate a new region as the "allocation region" we
- // actually dirty its cards after we release the lock, since card
- // dirtying while holding the lock was a performance bottleneck. So,
- // as a result, it is possible for other threads to actually
- // allocate objects in the region (after the acquire the lock)
- // before all the cards on the region are dirtied. This is unlikely,
- // and it doesn't happen often, but it can happen. So, the extra
- // check below filters out those cards.
- if (r->is_young()) {
+ // This check is needed for some uncommon cases where we should
+ // ignore the card.
+ //
+ // The region could be young. Cards for young regions are
+ // distinctly marked (set to g1_young_gen), so the post-barrier will
+ // filter them out. However, that marking is performed
+ // concurrently. A write to a young object could occur before the
+ // card has been marked young, slipping past the filter.
+ //
+ // The card could be stale, because the region has been freed since
+ // the card was recorded. In this case the region type could be
+ // anything. If (still) free or (reallocated) young, just ignore
+ // it. If (reallocated) old or humongous, the later card trimming
+ // and additional checks in iteration may detect staleness. At
+ // worst, we end up processing a stale card unnecessarily.
+ //
+ // In the normal (non-stale) case, the synchronization between the
+ // enqueueing of the card and processing it here will have ensured
+ // we see the up-to-date region type here.
+ if (!r->is_old_or_humongous()) {
return false;
}
@@ -503,26 +511,69 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
assert(!check_for_refs_into_cset, "sanity");
assert(!SafepointSynchronize::is_at_safepoint(), "sanity");
+ const jbyte* orig_card_ptr = card_ptr;
card_ptr = hot_card_cache->insert(card_ptr);
if (card_ptr == NULL) {
// There was no eviction. Nothing to do.
return false;
- }
-
- start = _ct_bs->addr_for(card_ptr);
- r = _g1->heap_region_containing(start);
+ } else if (card_ptr != orig_card_ptr) {
+ // Original card was inserted and an old card was evicted.
+ start = _ct_bs->addr_for(card_ptr);
+ r = _g1->heap_region_containing(start);
+
+ // Check whether the region formerly in the cache should be
+ // ignored, as discussed earlier for the original card. The
+ // region could have been freed while in the cache. The cset is
+ // not relevant here, since we're in concurrent phase.
+ if (!r->is_old_or_humongous()) {
+ return false;
+ }
+ } // Else we still have the original card.
+ }
- // Checking whether the region we got back from the cache
- // is young here is inappropriate. The region could have been
- // freed, reallocated and tagged as young while in the cache.
- // Hence we could see its young type change at any time.
+ // Trim the region designated by the card to what's been allocated
+ // in the region. The card could be stale, or the card could cover
+ // (part of) an object at the end of the allocated space and extend
+ // beyond the end of allocation.
+ HeapWord* scan_limit;
+ if (_g1->is_gc_active()) {
+ // If we're in a STW GC, then a card might be in a GC alloc region
+ // and extend onto a GC LAB, which may not be parsable. Stop such
+ // at the "scan_top" of the region.
+ scan_limit = r->scan_top();
+ } else {
+ // Non-humongous objects are only allocated in the old-gen during
+ // GC, so if region is old then top is stable. Humongous object
+ // allocation sets top last; if top has not yet been set, this is
+ // a stale card and we'll end up with an empty intersection. If
+ // this is not a stale card, the synchronization between the
+ // enqueuing of the card and processing it here will have ensured
+ // we see the up-to-date top here.
+ scan_limit = r->top();
+ }
+ if (scan_limit <= start) {
+ // If the trimmed region is empty, the card must be stale.
+ return false;
}
+ // Okay to clean and process the card now. There are still some
+ // stale card cases that may be detected by iteration and dealt with
+ // as iteration failure.
+ *const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
+
+ // This fence serves two purposes. First, the card must be cleaned
+ // before processing the contents. Second, we can't proceed with
+ // processing until after the read of top, for synchronization with
+ // possibly concurrent humongous object allocation. It's okay that
+ // reading top and reading type were racy wrto each other. We need
+ // both set, in any order, to proceed.
+ OrderAccess::fence();
+
// Don't use addr_for(card_ptr + 1) which can ask for
- // a card beyond the heap. This is not safe without a perm
- // gen at the upper end of the heap.
- HeapWord* end = start + CardTableModRefBS::card_size_in_words;
- MemRegion dirtyRegion(start, end);
+ // a card beyond the heap.
+ HeapWord* end = start + CardTableModRefBS::card_size_in_words;
+ MemRegion dirty_region(start, MIN2(scan_limit, end));
+ assert(!dirty_region.is_empty(), "sanity");
#if CARD_REPEAT_HISTO
init_ct_freq_table(_g1->max_capacity());
@@ -570,7 +621,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
// allocation in this region and making it safe to check the young type.
bool card_processed =
- r->oops_on_card_seq_iterate_careful(dirtyRegion,
+ r->oops_on_card_seq_iterate_careful(dirty_region,
&filter_then_update_rs_oop_cl,
card_ptr);
diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
index 794911ef6..7c48501f3 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
@@ -375,6 +375,50 @@ void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
_prev_marked_bytes = marked_bytes;
}
+// Humongous objects are allocated directly in the old-gen. Need
+// special handling for concurrent processing encountering an
+// in-progress allocation.
+static bool do_oops_on_card_in_humongous(MemRegion mr,
+ FilterOutOfRegionClosure* cl,
+ HeapRegion* hr,
+ G1CollectedHeap* g1h) {
+ assert(hr->isHumongous(), "precondition");
+ HeapRegion* sr = hr->humongous_start_region();
+ oop obj = oop(sr->bottom());
+
+ // If concurrent and klass_or_null is NULL, then space has been
+ // allocated but the object has not yet been published by setting
+ // the klass. That can only happen if the card is stale. However,
+ // we've already set the card clean, so we must return failure,
+ // since the allocating thread could have performed a write to the
+ // card that might be missed otherwise.
+ if (!g1h->is_gc_active() && (obj->klass_or_null_acquire() == NULL)) {
+ return false;
+ }
+
+ // We have a well-formed humongous object at the start of sr.
+ // Only filler objects follow a humongous object in the containing
+ // regions, and we can ignore those. So only process the one
+ // humongous object.
+ if (!g1h->is_obj_dead(obj, sr)) {
+ if (obj->is_objArray() || (sr->bottom() < mr.start())) {
+ // objArrays are always marked precisely, so limit processing
+ // with mr. Non-objArrays might be precisely marked, and since
+ // it's humongous it's worthwhile avoiding full processing.
+ // However, the card could be stale and only cover filler
+ // objects. That should be rare, so not worth checking for;
+ // instead let it fall out from the bounded iteration.
+ obj->oop_iterate(cl, mr);
+ } else {
+ // If obj is not an objArray and mr contains the start of the
+ // obj, then this could be an imprecise mark, and we need to
+ // process the entire object.
+ obj->oop_iterate(cl);
+ }
+ }
+ return true;
+}
+
HeapWord*
HeapRegion::object_iterate_mem_careful(MemRegion mr,
ObjectClosure* cl) {
@@ -399,9 +443,6 @@ HeapRegion::object_iterate_mem_careful(MemRegion mr,
} else if (!g1h->is_obj_dead(obj)) {
cl->do_object(obj);
}
- if (cl->abort()) return cur;
- // The check above must occur before the operation below, since an
- // abort might invalidate the "size" operation.
cur += block_size(cur);
}
return NULL;
@@ -411,30 +452,14 @@ bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr,
FilterOutOfRegionClosure* cl,
jbyte* card_ptr) {
assert(card_ptr != NULL, "pre-condition");
+ assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region");
G1CollectedHeap* g1h = G1CollectedHeap::heap();
- // If we're within a stop-world GC, then we might look at a card in a
- // GC alloc region that extends onto a GC LAB, which may not be
- // parseable. Stop such at the "scan_top" of the region.
- if (g1h->is_gc_active()) {
- mr = mr.intersection(MemRegion(bottom(), scan_top()));
- } else {
- mr = mr.intersection(used_region());
- }
- if (mr.is_empty()) {
- return true;
- }
- // Otherwise, find the obj that extends onto mr.start().
-
- // The intersection of the incoming mr (for the card) and the
- // allocated part of the region is non-empty. This implies that
- // we have actually allocated into this region. The code in
- // G1CollectedHeap.cpp that allocates a new region sets the
- // is_young tag on the region before allocating. Thus we
- // safely know if this region is young.
- if (is_young()) {
- return true;
+ // Special handling for humongous regions.
+ if (isHumongous()) {
+ return do_oops_on_card_in_humongous(mr, cl, this, g1h);
}
+ assert(is_old(), "precondition");
// We can only clean the card here, after we make the decision that
// the card is not young.
@@ -446,50 +471,37 @@ bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr,
HeapWord* const start = mr.start();
HeapWord* const end = mr.end();
- // Update BOT as needed while finding start of (potential) object.
+ // Find the obj that extends onto mr.start().
+ // Update BOT as needed while finding start of (possibly dead)
+ // object containing the start of the region.
HeapWord* cur = block_start(start);
- assert(cur <= start, "Postcondition");
-
- oop obj;
-
- HeapWord* next = cur;
- do {
- cur = next;
- obj = oop(cur);
- if (obj->klass_or_null() == NULL) {
- // Ran into an unparseable point.
- assert(!g1h->is_gc_active(),
- err_msg("Unparsable heap during GC at " PTR_FORMAT, p2i(cur)));
- return false;
- }
- // Otherwise...
- next = cur + block_size(cur);
- } while (next <= start);
-
- // If we finish the above loop...We have a parseable object that
- // begins on or before the start of the memory region, and ends
- // inside or spans the entire region.
- assert(cur <= start, "Loop postcondition");
- assert(obj->klass_or_null() != NULL, "Loop postcondition");
+#ifdef ASSERT
+ {
+ assert(cur <= start,
+ err_msg("cur: " PTR_FORMAT ", start: " PTR_FORMAT, p2i(cur), p2i(start)));
+ HeapWord* next = cur + block_size(cur);
+ assert(start < next,
+ err_msg("start: " PTR_FORMAT ", next: " PTR_FORMAT, p2i(start), p2i(next)));
+ }
+#endif
do {
- obj = oop(cur);
- assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant");
- if (obj->klass_or_null() == NULL) {
- // Ran into an unparseable point.
- assert(!g1h->is_gc_active(),
- err_msg("Unparsable heap during GC at " PTR_FORMAT, p2i(cur)));
- return false;
- }
-
- // Advance the current pointer. "obj" still points to the object to iterate.
- cur = cur + block_size(cur);
+ oop obj = oop(cur);
+ assert(obj->is_oop(true), err_msg("Not an oop at " PTR_FORMAT, p2i(obj)));
+ assert(obj->klass_or_null() != NULL,
+ err_msg("Unparsable heap at " PTR_FORMAT, p2i(obj)));
+
+ if (g1h->is_obj_dead(obj, this)) {
+ // Carefully step over dead object.
+ cur += block_size(cur);
+ } else {
+ // Step over live object, and process its references.
+ cur += obj->size();
+ // Non-objArrays are usually marked imprecise at the object
+ // start, in which case we need to iterate over them in full.
+ // objArrays are precisely marked, but can still be iterated
+ // over in full if completely covered.
- if (!g1h->is_obj_dead(obj)) {
- // Non-objArrays are sometimes marked imprecise at the object start. We
- // always need to iterate over them in full.
- // We only iterate over object arrays in full if they are completely contained
- // in the memory region.
if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) {
obj->oop_iterate(cl);
} else {
diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
index 52ef1d0d2..8a45b3915 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
@@ -422,6 +422,8 @@ class HeapRegion: public G1OffsetTableContigSpace {
bool is_old() const { return _type.is_old(); }
+ bool is_old_or_humongous() const { return _type.is_old_or_humongous(); }
+
// For a humongous region, region in which it starts.
HeapRegion* humongous_start_region() const {
return _humongous_start_region;
diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp
index a9a4fbc25..007dabf19 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp
@@ -111,6 +111,9 @@ public:
bool is_old() const { return get() == OldTag; }
+ bool is_old_or_humongous() const { return (get() & (OldTag | HumMask)) != 0; }
+
+
// Setters
void set_free() { set(FreeTag); }
diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
index f05b4f177..9481dba10 100644
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -1551,22 +1551,25 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
return false;
}
assert(prefix != NULL && prefix != BUSY, "Error");
- size_t i = 1;
oop cur = prefix;
- while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
- i++; cur = cur->list_ptr_from_klass();
+ for (size_t i = 1; i < objsFromOverflow; ++i) {
+ oop next = cur->list_ptr_from_klass();
+ if (next == NULL) break;
+ cur = next;
}
+ assert(cur != NULL, "Loop postcondition");
// Reattach remaining (suffix) to overflow list
- if (cur->klass_or_null() == NULL) {
+ oop suffix = cur->list_ptr_from_klass();
+ if (suffix == NULL) {
// Write back the NULL in lieu of the BUSY we wrote
// above and it is still the same value.
if (_overflow_list == BUSY) {
(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
}
} else {
- assert(cur->klass_or_null() != (Klass*)(address)BUSY, "Error");
- oop suffix = cur->list_ptr_from_klass(); // suffix will be put back on global list
+ assert(suffix != BUSY, "Error");
+ // suffix will be put back on global list
cur->set_klass_to_list_ptr(NULL); // break off suffix
// It's possible that the list is still in the empty(busy) state
// we left it in a short while ago; in that case we may be
@@ -1586,8 +1589,10 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
// Too bad, someone else got in in between; we'll need to do a splice.
// Find the last item of suffix list
oop last = suffix;
- while (last->klass_or_null() != NULL) {
- last = last->list_ptr_from_klass();
+ while (true) {
+ oop next = last->list_ptr_from_klass();
+ if (next == NULL) break;
+ last = next;
}
// Atomically prepend suffix to current overflow list
observed_overflow_list = _overflow_list;
--
2.19.0