From 822f93a00af8300b0516707c2fded59c7487b3ea Mon Sep 17 00:00:00 2001 Date: Mon, 14 Oct 2019 16:34:52 +0000 Subject: [PATCH] Backport of JDK-8160369 Summary::[Backport of JDK-8160369 and it's subtasks] Memory fences needed around setting and reading object lengths LLT: bug url: https://bugs.openjdk.java.net/browse/JDK-8160369 --- .../vm/gc_implementation/g1/g1BlockOffsetTable.cpp | 2 +- .../g1/g1BlockOffsetTable.inline.hpp | 4 +- .../src/share/vm/gc_implementation/g1/g1RemSet.cpp | 137 ++++++++++------- .../share/vm/gc_implementation/g1/heapRegion.cpp | 166 ++++++++++----------- .../share/vm/gc_implementation/g1/heapRegion.hpp | 26 ++-- .../vm/gc_implementation/g1/heapRegionType.hpp | 3 + .../gc_implementation/parNew/parNewGeneration.cpp | 21 ++- 7 files changed, 203 insertions(+), 156 deletions(-) diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp index ead98e24a0..1977fc83da 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp @@ -264,7 +264,7 @@ G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q, while (n <= next_boundary) { q = n; oop obj = oop(q); - if (obj->klass_or_null() == NULL) return q; + if (obj->klass_or_null_acquire() == NULL) return q; n += block_size(q); } assert(q <= next_boundary && n > next_boundary, "Consequence of loop"); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp index bcfd52a4a2..ffc56a0ba0 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp @@ -166,7 +166,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, while (n <= addr) { q = n; oop obj = oop(q); - if (obj->klass_or_null() == NULL) return q; + if (obj->klass_or_null_acquire() == NULL) return q; n += block_size(q); } assert(q <= n, "wrong order for q and addr"); @@ -177,7 +177,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n, inline HeapWord* G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q, const void* addr) { - if (oop(q)->klass_or_null() == NULL) return q; + if (oop(q)->klass_or_null_acquire() == NULL) return q; HeapWord* n = q + block_size(q); // In the normal case, where the query "addr" is a card boundary, and the // offset table chunks are the same size as cards, the block starting at diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp index da4d632487..da417fb725 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -460,18 +460,26 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // And find the region containing it. HeapRegion* r = _g1->heap_region_containing(start); - // Why do we have to check here whether a card is on a young region, - // given that we dirty young regions and, as a result, the - // post-barrier is supposed to filter them out and never to enqueue - // them? When we allocate a new region as the "allocation region" we - // actually dirty its cards after we release the lock, since card - // dirtying while holding the lock was a performance bottleneck. So, - // as a result, it is possible for other threads to actually - // allocate objects in the region (after the acquire the lock) - // before all the cards on the region are dirtied. This is unlikely, - // and it doesn't happen often, but it can happen. So, the extra - // check below filters out those cards. - if (r->is_young()) { + // This check is needed for some uncommon cases where we should + // ignore the card. + // + // The region could be young. Cards for young regions are + // distinctly marked (set to g1_young_gen), so the post-barrier will + // filter them out. However, that marking is performed + // concurrently. A write to a young object could occur before the + // card has been marked young, slipping past the filter. + // + // The card could be stale, because the region has been freed since + // the card was recorded. In this case the region type could be + // anything. If (still) free or (reallocated) young, just ignore + // it. If (reallocated) old or humongous, the later card trimming + // and additional checks in iteration may detect staleness. At + // worst, we end up processing a stale card unnecessarily. + // + // In the normal (non-stale) case, the synchronization between the + // enqueueing of the card and processing it here will have ensured + // we see the up-to-date region type here. + if (!r->is_old_or_humongous()) { return false; } @@ -503,26 +511,69 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, assert(!check_for_refs_into_cset, "sanity"); assert(!SafepointSynchronize::is_at_safepoint(), "sanity"); + const jbyte* orig_card_ptr = card_ptr; card_ptr = hot_card_cache->insert(card_ptr); if (card_ptr == NULL) { // There was no eviction. Nothing to do. return false; - } - - start = _ct_bs->addr_for(card_ptr); - r = _g1->heap_region_containing(start); + } else if (card_ptr != orig_card_ptr) { + // Original card was inserted and an old card was evicted. + start = _ct_bs->addr_for(card_ptr); + r = _g1->heap_region_containing(start); + + // Check whether the region formerly in the cache should be + // ignored, as discussed earlier for the original card. The + // region could have been freed while in the cache. The cset is + // not relevant here, since we're in concurrent phase. + if (!r->is_old_or_humongous()) { + return false; + } + } // Else we still have the original card. + } - // Checking whether the region we got back from the cache - // is young here is inappropriate. The region could have been - // freed, reallocated and tagged as young while in the cache. - // Hence we could see its young type change at any time. + // Trim the region designated by the card to what's been allocated + // in the region. The card could be stale, or the card could cover + // (part of) an object at the end of the allocated space and extend + // beyond the end of allocation. + HeapWord* scan_limit; + if (_g1->is_gc_active()) { + // If we're in a STW GC, then a card might be in a GC alloc region + // and extend onto a GC LAB, which may not be parsable. Stop such + // at the "scan_top" of the region. + scan_limit = r->scan_top(); + } else { + // Non-humongous objects are only allocated in the old-gen during + // GC, so if region is old then top is stable. Humongous object + // allocation sets top last; if top has not yet been set, this is + // a stale card and we'll end up with an empty intersection. If + // this is not a stale card, the synchronization between the + // enqueuing of the card and processing it here will have ensured + // we see the up-to-date top here. + scan_limit = r->top(); + } + if (scan_limit <= start) { + // If the trimmed region is empty, the card must be stale. + return false; } + // Okay to clean and process the card now. There are still some + // stale card cases that may be detected by iteration and dealt with + // as iteration failure. + *const_cast(card_ptr) = CardTableModRefBS::clean_card_val(); + + // This fence serves two purposes. First, the card must be cleaned + // before processing the contents. Second, we can't proceed with + // processing until after the read of top, for synchronization with + // possibly concurrent humongous object allocation. It's okay that + // reading top and reading type were racy wrto each other. We need + // both set, in any order, to proceed. + OrderAccess::fence(); + // Don't use addr_for(card_ptr + 1) which can ask for - // a card beyond the heap. This is not safe without a perm - // gen at the upper end of the heap. - HeapWord* end = start + CardTableModRefBS::card_size_in_words; - MemRegion dirtyRegion(start, end); + // a card beyond the heap. + HeapWord* end = start + CardTableModRefBS::card_size_in_words; + MemRegion dirty_region(start, MIN2(scan_limit, end)); + assert(!dirty_region.is_empty(), "sanity"); #if CARD_REPEAT_HISTO init_ct_freq_table(_g1->max_capacity()); @@ -555,33 +606,17 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, (OopClosure*)&mux : (OopClosure*)&update_rs_oop_cl)); - // The region for the current card may be a young region. The - // current card may have been a card that was evicted from the - // card cache. When the card was inserted into the cache, we had - // determined that its region was non-young. While in the cache, - // the region may have been freed during a cleanup pause, reallocated - // and tagged as young. - // - // We wish to filter out cards for such a region but the current - // thread, if we're running concurrently, may "see" the young type - // change at any time (so an earlier "is_young" check may pass or - // fail arbitrarily). We tell the iteration code to perform this - // filtering when it has been determined that there has been an actual - // allocation in this region and making it safe to check the young type. - bool filter_young = true; - - HeapWord* stop_point = - r->oops_on_card_seq_iterate_careful(dirtyRegion, - &filter_then_update_rs_oop_cl, - filter_young, - card_ptr); - - // If stop_point is non-null, then we encountered an unallocated region - // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the - // card and re-enqueue: if we put off the card until a GC pause, then the - // unallocated portion will be filled in. Alternatively, we might try - // the full complexity of the technique used in "regular" precleaning. - if (stop_point != NULL) { + bool card_processed = + r->oops_on_card_seq_iterate_careful(dirty_region, + &filter_then_update_rs_oop_cl); + + // If unable to process the card then we encountered an unparsable + // part of the heap (e.g. a partially allocated object) while + // processing a stale card. Despite the card being stale, redirty + // and re-enqueue, because we've already cleaned the card. Without + // this we could incorrectly discard a non-stale card. + if (!card_processed) { + assert(!_g1->is_gc_active(), "Unparsable heap during GC"); // The card might have gotten re-dirtied and re-enqueued while we // worked. (In fact, it's pretty likely.) if (*card_ptr != CardTableModRefBS::dirty_card_val()) { diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp index eefa1c9499..5d1578a248 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp @@ -339,6 +339,50 @@ void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark, _prev_marked_bytes = marked_bytes; } +// Humongous objects are allocated directly in the old-gen. Need +// special handling for concurrent processing encountering an +// in-progress allocation. +static bool do_oops_on_card_in_humongous(MemRegion mr, + FilterOutOfRegionClosure* cl, + HeapRegion* hr, + G1CollectedHeap* g1h) { + assert(hr->isHumongous(), "precondition"); + HeapRegion* sr = hr->humongous_start_region(); + oop obj = oop(sr->bottom()); + + // If concurrent and klass_or_null is NULL, then space has been + // allocated but the object has not yet been published by setting + // the klass. That can only happen if the card is stale. However, + // we've already set the card clean, so we must return failure, + // since the allocating thread could have performed a write to the + // card that might be missed otherwise. + if (!g1h->is_gc_active() && (obj->klass_or_null_acquire() == NULL)) { + return false; + } + + // We have a well-formed humongous object at the start of sr. + // Only filler objects follow a humongous object in the containing + // regions, and we can ignore those. So only process the one + // humongous object. + if (!g1h->is_obj_dead(obj, sr)) { + if (obj->is_objArray() || (sr->bottom() < mr.start())) { + // objArrays are always marked precisely, so limit processing + // with mr. Non-objArrays might be precisely marked, and since + // it's humongous it's worthwhile avoiding full processing. + // However, the card could be stale and only cover filler + // objects. That should be rare, so not worth checking for; + // instead let it fall out from the bounded iteration. + obj->oop_iterate(cl, mr); + } else { + // If obj is not an objArray and mr contains the start of the + // obj, then this could be an imprecise mark, and we need to + // process the entire object. + obj->oop_iterate(cl); + } + } + return true; +} + HeapWord* HeapRegion::object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl) { @@ -363,106 +407,62 @@ HeapRegion::object_iterate_mem_careful(MemRegion mr, } else if (!g1h->is_obj_dead(obj)) { cl->do_object(obj); } - if (cl->abort()) return cur; - // The check above must occur before the operation below, since an - // abort might invalidate the "size" operation. cur += block_size(cur); } return NULL; } -HeapWord* -HeapRegion:: -oops_on_card_seq_iterate_careful(MemRegion mr, - FilterOutOfRegionClosure* cl, - bool filter_young, - jbyte* card_ptr) { - // Currently, we should only have to clean the card if filter_young - // is true and vice versa. - if (filter_young) { - assert(card_ptr != NULL, "pre-condition"); - } else { - assert(card_ptr == NULL, "pre-condition"); - } +bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr, + FilterOutOfRegionClosure* cl) { + assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region"); G1CollectedHeap* g1h = G1CollectedHeap::heap(); - // If we're within a stop-world GC, then we might look at a card in a - // GC alloc region that extends onto a GC LAB, which may not be - // parseable. Stop such at the "scan_top" of the region. - if (g1h->is_gc_active()) { - mr = mr.intersection(MemRegion(bottom(), scan_top())); - } else { - mr = mr.intersection(used_region()); - } - if (mr.is_empty()) return NULL; - // Otherwise, find the obj that extends onto mr.start(). - - // The intersection of the incoming mr (for the card) and the - // allocated part of the region is non-empty. This implies that - // we have actually allocated into this region. The code in - // G1CollectedHeap.cpp that allocates a new region sets the - // is_young tag on the region before allocating. Thus we - // safely know if this region is young. - if (is_young() && filter_young) { - return NULL; + // Special handling for humongous regions. + if (isHumongous()) { + return do_oops_on_card_in_humongous(mr, cl, this, g1h); } + assert(is_old(), "precondition"); - assert(!is_young(), "check value of filter_young"); - - // We can only clean the card here, after we make the decision that - // the card is not young. And we only clean the card if we have been - // asked to (i.e., card_ptr != NULL). - if (card_ptr != NULL) { - *card_ptr = CardTableModRefBS::clean_card_val(); - // We must complete this write before we do any of the reads below. - OrderAccess::storeload(); - } + // Because mr has been trimmed to what's been allocated in this + // region, the parts of the heap that are examined here are always + // parsable; there's no need to use klass_or_null to detect + // in-progress allocation. // Cache the boundaries of the memory region in some const locals HeapWord* const start = mr.start(); HeapWord* const end = mr.end(); - // We used to use "block_start_careful" here. But we're actually happy - // to update the BOT while we do this... + // Find the obj that extends onto mr.start(). + // Update BOT as needed while finding start of (possibly dead) + // object containing the start of the region. HeapWord* cur = block_start(start); - assert(cur <= start, "Postcondition"); - - oop obj; - - HeapWord* next = cur; - do { - cur = next; - obj = oop(cur); - if (obj->klass_or_null() == NULL) { - // Ran into an unparseable point. - return cur; - } - // Otherwise... - next = cur + block_size(cur); - } while (next <= start); - // If we finish the above loop...We have a parseable object that - // begins on or before the start of the memory region, and ends - // inside or spans the entire region. - assert(cur <= start, "Loop postcondition"); - assert(obj->klass_or_null() != NULL, "Loop postcondition"); +#ifdef ASSERT + { + assert(cur <= start, + "cur: " PTR_FORMAT ", start: " PTR_FORMAT); + HeapWord* next = cur + block_size(cur); + assert(start < next, + "start: " PTR_FORMAT ", next: " PTR_FORMAT); + } +#endif do { - obj = oop(cur); - assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant"); - if (obj->klass_or_null() == NULL) { - // Ran into an unparseable point. - return cur; - } - - // Advance the current pointer. "obj" still points to the object to iterate. - cur = cur + block_size(cur); - - if (!g1h->is_obj_dead(obj)) { - // Non-objArrays are sometimes marked imprecise at the object start. We - // always need to iterate over them in full. - // We only iterate over object arrays in full if they are completely contained - // in the memory region. + oop obj = oop(cur); + assert(obj->is_oop(true), "Not an oop at " PTR_FORMAT); + assert(obj->klass_or_null() != NULL, + "Unparsable heap at " PTR_FORMAT); + + if (g1h->is_obj_dead(obj, this)) { + // Carefully step over dead object. + cur += block_size(cur); + } else { + // Step over live object, and process its references. + cur += obj->size(); + // Non-objArrays are usually marked imprecise at the object + // start, in which case we need to iterate over them in full. + // objArrays are precisely marked, but can still be iterated + // over in full if completely covered. if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) { obj->oop_iterate(cl); } else { @@ -471,7 +471,7 @@ oops_on_card_seq_iterate_careful(MemRegion mr, } } while (cur < end); - return NULL; + return true; } // Code roots support diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp index 76627e7ba4..a3f5e506a5 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp @@ -418,6 +418,8 @@ class HeapRegion: public G1OffsetTableContigSpace { bool is_old() const { return _type.is_old(); } + bool is_old_or_humongous() const { return _type.is_old_or_humongous(); } + // For a humongous region, region in which it starts. HeapRegion* humongous_start_region() const { return _humongous_start_region; @@ -702,7 +704,7 @@ class HeapRegion: public G1OffsetTableContigSpace { _next_marked_bytes = 0; } } - + // Requires that "mr" be entirely within the region. // Apply "cl->do_object" to all objects that intersect with "mr". // If the iteration encounters an unparseable portion of the region, @@ -714,16 +716,18 @@ class HeapRegion: public G1OffsetTableContigSpace { HeapWord* object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl); - // filter_young: if true and the region is a young region then we - // skip the iteration. - // card_ptr: if not NULL, and we decide that the card is not young - // and we iterate over it, we'll clean the card before we start the - // iteration. - HeapWord* - oops_on_card_seq_iterate_careful(MemRegion mr, - FilterOutOfRegionClosure* cl, - bool filter_young, - jbyte* card_ptr); + // Iterate over the objects overlapping part of a card, applying cl + // to all references in the region. This is a helper for + // G1RemSet::refine_card, and is tightly coupled with it. + // mr: the memory region covered by the card, trimmed to the + // allocated space for this region. Must not be empty. + // This region must be old or humongous. + // Returns true if the designated objects were successfully + // processed, false if an unparsable part of the heap was + // encountered; that only happens when invoked concurrently with the + // mutator. + bool oops_on_card_seq_iterate_careful(MemRegion mr, + FilterOutOfRegionClosure* cl); size_t recorded_rs_length() const { return _recorded_rs_length; } double predicted_elapsed_time_ms() const { return _predicted_elapsed_time_ms; } diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp index b00590a6b7..3b9904c39b 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionType.hpp @@ -110,6 +110,9 @@ public: bool is_old() const { return get() == OldTag; } + bool is_old_or_humongous() const { return (get() & (OldTag | HumMask)) != 0; } + + // Setters void set_free() { set(FreeTag); } diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp index 67b0421ebf..2b9fb53293 100644 --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @@ -1551,22 +1551,25 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan return false; } assert(prefix != NULL && prefix != BUSY, "Error"); - size_t i = 1; oop cur = prefix; - while (i < objsFromOverflow && cur->klass_or_null() != NULL) { - i++; cur = cur->list_ptr_from_klass(); + for (size_t i = 1; i < objsFromOverflow; ++i) { + oop next = cur->list_ptr_from_klass(); + if (next == NULL) break; + cur = next; } + assert(cur != NULL, "Loop postcondition"); // Reattach remaining (suffix) to overflow list - if (cur->klass_or_null() == NULL) { + oop suffix = cur->list_ptr_from_klass(); + if (suffix == NULL) { // Write back the NULL in lieu of the BUSY we wrote // above and it is still the same value. if (_overflow_list == BUSY) { (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); } } else { - assert(cur->klass_or_null() != (Klass*)(address)BUSY, "Error"); - oop suffix = cur->list_ptr_from_klass(); // suffix will be put back on global list + assert(suffix != BUSY, "Error"); + // suffix will be put back on global list cur->set_klass_to_list_ptr(NULL); // break off suffix // It's possible that the list is still in the empty(busy) state // we left it in a short while ago; in that case we may be @@ -1586,8 +1589,10 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan // Too bad, someone else got in in between; we'll need to do a splice. // Find the last item of suffix list oop last = suffix; - while (last->klass_or_null() != NULL) { - last = last->list_ptr_from_klass(); + while (true) { + oop next = last->list_ptr_from_klass(); + if (next == NULL) break; + last = next; } // Atomically prepend suffix to current overflow list observed_overflow_list = _overflow_list; -- 2.12.3