From be5a699028cd0b8fd49eb2df0c4b3d1653eca4f3 Mon Sep 17 00:00:00 2001 Date: Mon, 25 Jan 2021 17:22:52 +0800 Subject: Backport of JDK-8160369 Summary::[Backport of JDK-8160369 and it's subtasks] Memory fences needed around setting and reading object lengths LLT: bug url: https://bugs.openjdk.java.net/browse/JDK-8160369 --- .../g1/g1BlockOffsetTable.cpp | 2 +- .../g1/g1BlockOffsetTable.inline.hpp | 4 +- .../vm/gc_implementation/g1/g1RemSet.cpp | 101 +++++++++---- .../vm/gc_implementation/g1/heapRegion.cpp | 140 ++++++++++-------- .../vm/gc_implementation/g1/heapRegion.hpp | 2 + .../gc_implementation/g1/heapRegionType.hpp | 3 + .../parNew/parNewGeneration.cpp | 21 ++- 7 files changed, 173 insertions(+), 100 deletions(-) diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp index ead98e24a..1977fc83d 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp @@ -264,7 +264,7 @@ G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q, while (n <= next_boundary) { q = n; oop obj = oop(q); - if (obj->klass_or_null() == NULL) return q; + if (obj->klass_or_null_acquire() == NULL) return q; n += block_size(q); } assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp index bcfd52a4a..ffc56a0ba 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp @@ -166,7 +166,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, while (n <= addr) { q = n; oop obj = oop(q); - if (obj->klass_or_null() == NULL) return q; + if (obj->klass_or_null_acquire() == NULL) return q; n += block_size(q); } assert(q <= n, "wrong order for q and addr"); @@ -177,7 +177,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, inline HeapWord* G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q, const void* addr) { - if (oop(q)->klass_or_null() == NULL) return q; + if (oop(q)->klass_or_null_acquire() == NULL) return q; HeapWord* n = q + block_size(q); // In the normal case, where the query "addr" is a card boundary, and the // offset table chunks are the same size as cards, the block starting at diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp index 4cad9234c..b062947c8 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -460,18 +460,26 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // And find the region containing it. HeapRegion* r = _g1->heap_region_containing(start); - // Why do we have to check here whether a card is on a young region, - // given that we dirty young regions and, as a result, the - // post-barrier is supposed to filter them out and never to enqueue - // them? When we allocate a new region as the "allocation region" we - // actually dirty its cards after we release the lock, since card - // dirtying while holding the lock was a performance bottleneck. So, - // as a result, it is possible for other threads to actually - // allocate objects in the region (after the acquire the lock) - // before all the cards on the region are dirtied. This is unlikely, - // and it doesn't happen often, but it can happen. So, the extra - // check below filters out those cards. - if (r->is_young()) { + // This check is needed for some uncommon cases where we should + // ignore the card. + // + // The region could be young. Cards for young regions are + // distinctly marked (set to g1_young_gen), so the post-barrier will + // filter them out. However, that marking is performed + // concurrently. A write to a young object could occur before the + // card has been marked young, slipping past the filter. + // + // The card could be stale, because the region has been freed since + // the card was recorded. In this case the region type could be + // anything. If (still) free or (reallocated) young, just ignore + // it. If (reallocated) old or humongous, the later card trimming + // and additional checks in iteration may detect staleness. At + // worst, we end up processing a stale card unnecessarily. + // + // In the normal (non-stale) case, the synchronization between the + // enqueueing of the card and processing it here will have ensured + // we see the up-to-date region type here. + if (!r->is_old_or_humongous()) { return false; } @@ -503,26 +511,69 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, assert(!check_for_refs_into_cset, "sanity"); assert(!SafepointSynchronize::is_at_safepoint(), "sanity"); + const jbyte* orig_card_ptr = card_ptr; card_ptr = hot_card_cache->insert(card_ptr); if (card_ptr == NULL) { // There was no eviction. Nothing to do. return false; - } - - start = _ct_bs->addr_for(card_ptr); - r = _g1->heap_region_containing(start); + } else if (card_ptr != orig_card_ptr) { + // Original card was inserted and an old card was evicted. + start = _ct_bs->addr_for(card_ptr); + r = _g1->heap_region_containing(start); + + // Check whether the region formerly in the cache should be + // ignored, as discussed earlier for the original card. The + // region could have been freed while in the cache. The cset is + // not relevant here, since we're in concurrent phase. + if (!r->is_old_or_humongous()) { + return false; + } + } // Else we still have the original card. + } - // Checking whether the region we got back from the cache - // is young here is inappropriate. The region could have been - // freed, reallocated and tagged as young while in the cache. - // Hence we could see its young type change at any time. + // Trim the region designated by the card to what's been allocated + // in the region. The card could be stale, or the card could cover + // (part of) an object at the end of the allocated space and extend + // beyond the end of allocation. + HeapWord* scan_limit; + if (_g1->is_gc_active()) { + // If we're in a STW GC, then a card might be in a GC alloc region + // and extend onto a GC LAB, which may not be parsable. Stop such + // at the "scan_top" of the region. + scan_limit = r->scan_top(); + } else { + // Non-humongous objects are only allocated in the old-gen during + // GC, so if region is old then top is stable. Humongous object + // allocation sets top last; if top has not yet been set, this is + // a stale card and we'll end up with an empty intersection. If + // this is not a stale card, the synchronization between the + // enqueuing of the card and processing it here will have ensured + // we see the up-to-date top here. + scan_limit = r->top(); + } + if (scan_limit <= start) { + // If the trimmed region is empty, the card must be stale. + return false; } + // Okay to clean and process the card now. There are still some + // stale card cases that may be detected by iteration and dealt with + // as iteration failure. + *const_cast(card_ptr) = CardTableModRefBS::clean_card_val(); + + // This fence serves two purposes. First, the card must be cleaned + // before processing the contents. Second, we can't proceed with + // processing until after the read of top, for synchronization with + // possibly concurrent humongous object allocation. It's okay that + // reading top and reading type were racy wrto each other. We need + // both set, in any order, to proceed. + OrderAccess::fence(); + // Don't use addr_for(card_ptr + 1) which can ask for - // a card beyond the heap. This is not safe without a perm - // gen at the upper end of the heap. - HeapWord* end = start + CardTableModRefBS::card_size_in_words; - MemRegion dirtyRegion(start, end); + // a card beyond the heap. + HeapWord* end = start + CardTableModRefBS::card_size_in_words; + MemRegion dirty_region(start, MIN2(scan_limit, end)); + assert(!dirty_region.is_empty(), "sanity"); #if CARD_REPEAT_HISTO init_ct_freq_table(_g1->max_capacity()); @@ -570,7 +621,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // allocation in this region and making it safe to check the young type. bool card_processed = - r->oops_on_card_seq_iterate_careful(dirtyRegion, + r->oops_on_card_seq_iterate_careful(dirty_region, &filter_then_update_rs_oop_cl, card_ptr); diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp index 794911ef6..7c48501f3 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp @@ -375,6 +375,50 @@ void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark, _prev_marked_bytes = marked_bytes; } +// Humongous objects are allocated directly in the old-gen. Need +// special handling for concurrent processing encountering an +// in-progress allocation. +static bool do_oops_on_card_in_humongous(MemRegion mr, + FilterOutOfRegionClosure* cl, + HeapRegion* hr, + G1CollectedHeap* g1h) { + assert(hr->isHumongous(), "precondition"); + HeapRegion* sr = hr->humongous_start_region(); + oop obj = oop(sr->bottom()); + + // If concurrent and klass_or_null is NULL, then space has been + // allocated but the object has not yet been published by setting + // the klass. That can only happen if the card is stale. However, + // we've already set the card clean, so we must return failure, + // since the allocating thread could have performed a write to the + // card that might be missed otherwise. + if (!g1h->is_gc_active() && (obj->klass_or_null_acquire() == NULL)) { + return false; + } + + // We have a well-formed humongous object at the start of sr. + // Only filler objects follow a humongous object in the containing + // regions, and we can ignore those. So only process the one + // humongous object. + if (!g1h->is_obj_dead(obj, sr)) { + if (obj->is_objArray() || (sr->bottom() < mr.start())) { + // objArrays are always marked precisely, so limit processing + // with mr. Non-objArrays might be precisely marked, and since + // it's humongous it's worthwhile avoiding full processing. + // However, the card could be stale and only cover filler + // objects. That should be rare, so not worth checking for; + // instead let it fall out from the bounded iteration. + obj->oop_iterate(cl, mr); + } else { + // If obj is not an objArray and mr contains the start of the + // obj, then this could be an imprecise mark, and we need to + // process the entire object. + obj->oop_iterate(cl); + } + } + return true; +} + HeapWord* HeapRegion::object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl) { @@ -399,9 +443,6 @@ HeapRegion::object_iterate_mem_careful(MemRegion mr, } else if (!g1h->is_obj_dead(obj)) { cl->do_object(obj); } - if (cl->abort()) return cur; - // The check above must occur before the operation below, since an - // abort might invalidate the "size" operation. cur += block_size(cur); } return NULL; @@ -411,30 +452,14 @@ bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr, FilterOutOfRegionClosure* cl, jbyte* card_ptr) { assert(card_ptr != NULL, "pre-condition"); + assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region"); G1CollectedHeap* g1h = G1CollectedHeap::heap(); - // If we're within a stop-world GC, then we might look at a card in a - // GC alloc region that extends onto a GC LAB, which may not be - // parseable. Stop such at the "scan_top" of the region. - if (g1h->is_gc_active()) { - mr = mr.intersection(MemRegion(bottom(), scan_top())); - } else { - mr = mr.intersection(used_region()); - } - if (mr.is_empty()) { - return true; - } - // Otherwise, find the obj that extends onto mr.start(). - - // The intersection of the incoming mr (for the card) and the - // allocated part of the region is non-empty. This implies that - // we have actually allocated into this region. The code in - // G1CollectedHeap.cpp that allocates a new region sets the - // is_young tag on the region before allocating. Thus we - // safely know if this region is young. - if (is_young()) { - return true; + // Special handling for humongous regions. + if (isHumongous()) { + return do_oops_on_card_in_humongous(mr, cl, this, g1h); } + assert(is_old(), "precondition"); // We can only clean the card here, after we make the decision that // the card is not young. @@ -446,50 +471,37 @@ bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr, HeapWord* const start = mr.start(); HeapWord* const end = mr.end(); - // Update BOT as needed while finding start of (potential) object. + // Find the obj that extends onto mr.start(). + // Update BOT as needed while finding start of (possibly dead) + // object containing the start of the region. HeapWord* cur = block_start(start); - assert(cur <= start, "Postcondition"); - - oop obj; - - HeapWord* next = cur; - do { - cur = next; - obj = oop(cur); - if (obj->klass_or_null() == NULL) { - // Ran into an unparseable point. - assert(!g1h->is_gc_active(), - err_msg("Unparsable heap during GC at " PTR_FORMAT, p2i(cur))); - return false; - } - // Otherwise... - next = cur + block_size(cur); - } while (next <= start); - - // If we finish the above loop...We have a parseable object that - // begins on or before the start of the memory region, and ends - // inside or spans the entire region. - assert(cur <= start, "Loop postcondition"); - assert(obj->klass_or_null() != NULL, "Loop postcondition"); +#ifdef ASSERT + { + assert(cur <= start, + err_msg("cur: " PTR_FORMAT ", start: " PTR_FORMAT, p2i(cur), p2i(start))); + HeapWord* next = cur + block_size(cur); + assert(start < next, + err_msg("start: " PTR_FORMAT ", next: " PTR_FORMAT, p2i(start), p2i(next))); + } +#endif do { - obj = oop(cur); - assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant"); - if (obj->klass_or_null() == NULL) { - // Ran into an unparseable point. - assert(!g1h->is_gc_active(), - err_msg("Unparsable heap during GC at " PTR_FORMAT, p2i(cur))); - return false; - } - - // Advance the current pointer. "obj" still points to the object to iterate. - cur = cur + block_size(cur); + oop obj = oop(cur); + assert(obj->is_oop(true), err_msg("Not an oop at " PTR_FORMAT, p2i(obj))); + assert(obj->klass_or_null() != NULL, + err_msg("Unparsable heap at " PTR_FORMAT, p2i(obj))); + + if (g1h->is_obj_dead(obj, this)) { + // Carefully step over dead object. + cur += block_size(cur); + } else { + // Step over live object, and process its references. + cur += obj->size(); + // Non-objArrays are usually marked imprecise at the object + // start, in which case we need to iterate over them in full. + // objArrays are precisely marked, but can still be iterated + // over in full if completely covered. - if (!g1h->is_obj_dead(obj)) { - // Non-objArrays are sometimes marked imprecise at the object start. We - // always need to iterate over them in full. - // We only iterate over object arrays in full if they are completely contained - // in the memory region. if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) { obj->oop_iterate(cl); } else { diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp index 52ef1d0d2..8a45b3915 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp @@ -422,6 +422,8 @@ class HeapRegion: public G1OffsetTableContigSpace { bool is_old() const { return _type.is_old(); } + bool is_old_or_humongous() const { return _type.is_old_or_humongous(); } + // For a humongous region, region in which it starts. HeapRegion* humongous_start_region() const { return _humongous_start_region; diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp index a9a4fbc25..007dabf19 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp @@ -111,6 +111,9 @@ public: bool is_old() const { return get() == OldTag; } + bool is_old_or_humongous() const { return (get() & (OldTag | HumMask)) != 0; } + + // Setters void set_free() { set(FreeTag); } diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp index f05b4f177..9481dba10 100644 --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @@ -1551,22 +1551,25 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan return false; } assert(prefix != NULL && prefix != BUSY, "Error"); - size_t i = 1; oop cur = prefix; - while (i < objsFromOverflow && cur->klass_or_null() != NULL) { - i++; cur = cur->list_ptr_from_klass(); + for (size_t i = 1; i < objsFromOverflow; ++i) { + oop next = cur->list_ptr_from_klass(); + if (next == NULL) break; + cur = next; } + assert(cur != NULL, "Loop postcondition"); // Reattach remaining (suffix) to overflow list - if (cur->klass_or_null() == NULL) { + oop suffix = cur->list_ptr_from_klass(); + if (suffix == NULL) { // Write back the NULL in lieu of the BUSY we wrote // above and it is still the same value. if (_overflow_list == BUSY) { (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); } } else { - assert(cur->klass_or_null() != (Klass*)(address)BUSY, "Error"); - oop suffix = cur->list_ptr_from_klass(); // suffix will be put back on global list + assert(suffix != BUSY, "Error"); + // suffix will be put back on global list cur->set_klass_to_list_ptr(NULL); // break off suffix // It's possible that the list is still in the empty(busy) state // we left it in a short while ago; in that case we may be @@ -1586,8 +1589,10 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan // Too bad, someone else got in in between; we'll need to do a splice. // Find the last item of suffix list oop last = suffix; - while (last->klass_or_null() != NULL) { - last = last->list_ptr_from_klass(); + while (true) { + oop next = last->list_ptr_from_klass(); + if (next == NULL) break; + last = next; } // Atomically prepend suffix to current overflow list observed_overflow_list = _overflow_list; -- 2.19.0