From cf8d7d0cd7bac06db1d2493086e913e554f9b493 Mon Sep 17 00:00:00 2001 Subject: Backport 8069330 and adapt G1GC related optimizations --- .../g1/collectionSetChooser.cpp | 28 +- .../g1/collectionSetChooser.hpp | 17 +- .../g1/concurrentG1Refine.hpp | 2 + .../gc_implementation/g1/concurrentMark.cpp | 1074 +++-------------- .../gc_implementation/g1/concurrentMark.hpp | 200 +-- .../g1/concurrentMark.inline.hpp | 330 ++--- .../g1/concurrentMarkThread.cpp | 6 + .../vm/gc_implementation/g1/g1Allocator.cpp | 3 - .../g1/g1BlockOffsetTable.cpp | 74 +- .../g1/g1BlockOffsetTable.hpp | 8 +- .../g1/g1BlockOffsetTable.inline.hpp | 1 - .../gc_implementation/g1/g1CollectedHeap.cpp | 819 +++++-------- .../gc_implementation/g1/g1CollectedHeap.hpp | 43 +- .../g1/g1CollectedHeap.inline.hpp | 30 +- .../g1/g1CollectorPolicy.cpp | 78 +- .../g1/g1CollectorPolicy.hpp | 17 +- .../vm/gc_implementation/g1/g1ErgoVerbose.cpp | 1 + .../vm/gc_implementation/g1/g1ErgoVerbose.hpp | 1 + .../vm/gc_implementation/g1/g1EvacFailure.hpp | 12 +- .../vm/gc_implementation/g1/g1FullGCScope.cpp | 83 ++ .../vm/gc_implementation/g1/g1FullGCScope.hpp | 68 ++ .../gc_implementation/g1/g1GCPhaseTimes.cpp | 3 + .../gc_implementation/g1/g1GCPhaseTimes.hpp | 1 + .../vm/gc_implementation/g1/g1HRPrinter.cpp | 1 - .../vm/gc_implementation/g1/g1HRPrinter.hpp | 1 - .../gc_implementation/g1/g1HotCardCache.cpp | 23 +- .../gc_implementation/g1/g1HotCardCache.hpp | 9 +- .../vm/gc_implementation/g1/g1MarkSweep.cpp | 51 +- .../vm/gc_implementation/g1/g1MarkSweep.hpp | 4 +- .../vm/gc_implementation/g1/g1OopClosures.cpp | 17 +- .../vm/gc_implementation/g1/g1OopClosures.hpp | 150 +-- .../g1/g1OopClosures.inline.hpp | 272 ++--- .../g1/g1ParScanThreadState.cpp | 8 +- .../g1/g1ParScanThreadState.hpp | 2 +- .../g1/g1ParScanThreadState.inline.hpp | 7 +- .../g1/g1RegionMarkStatsCache.cpp | 65 + .../g1/g1RegionMarkStatsCache.hpp | 130 ++ .../g1/g1RegionMarkStatsCache.inline.hpp | 54 + .../vm/gc_implementation/g1/g1RemSet.cpp | 655 ++++++---- .../vm/gc_implementation/g1/g1RemSet.hpp | 72 +- .../gc_implementation/g1/g1RemSet.inline.hpp | 2 +- .../g1/g1RemSetTrackingPolicy.cpp | 110 ++ .../g1/g1RemSetTrackingPolicy.hpp | 59 + .../gc_implementation/g1/g1RootProcessor.cpp | 6 +- .../gc_implementation/g1/g1RootProcessor.hpp | 4 +- .../g1/g1SerialFullCollector.cpp | 168 +++ .../g1/g1SerialFullCollector.hpp | 49 + .../vm/gc_implementation/g1/g1StringDedup.cpp | 2 +- .../vm/gc_implementation/g1/g1_globals.hpp | 3 + .../g1/g1_specialized_oop_closures.hpp | 27 +- .../vm/gc_implementation/g1/heapRegion.cpp | 283 +---- .../vm/gc_implementation/g1/heapRegion.hpp | 136 +-- .../g1/heapRegion.inline.hpp | 166 ++- .../g1/heapRegionManager.cpp | 58 +- .../g1/heapRegionManager.hpp | 8 + .../g1/heapRegionManager.inline.hpp | 12 + .../gc_implementation/g1/heapRegionRemSet.cpp | 134 +- .../gc_implementation/g1/heapRegionRemSet.hpp | 86 +- .../vm/gc_implementation/g1/heapRegionSet.cpp | 1 - .../vm/gc_implementation/g1/satbQueue.cpp | 2 +- hotspot/src/share/vm/memory/allocation.hpp | 13 + .../src/share/vm/memory/allocation.inline.hpp | 46 + .../src/share/vm/memory/collectorPolicy.hpp | 2 + hotspot/src/share/vm/runtime/atomic.hpp | 3 + .../src/share/vm/runtime/atomic.inline.hpp | 16 + hotspot/src/share/vm/utilities/bitMap.cpp | 4 + hotspot/src/share/vm/utilities/bitMap.hpp | 12 +- hotspot/test/gc/g1/Test2GbHeap.java | 3 + hotspot/test/gc/g1/TestGCLogMessages.java | 4 +- 69 files changed, 2847 insertions(+), 2992 deletions(-) create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.cpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.hpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.cpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.hpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.cpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.hpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.cpp create mode 100644 hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.hpp diff --git a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp index ff1b6f0cd..a1bbd9d48 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "gc_implementation/g1/collectionSetChooser.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" #include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "memory/space.inline.hpp" @@ -84,8 +85,7 @@ CollectionSetChooser::CollectionSetChooser() : 100), true /* C_Heap */), _curr_index(0), _length(0), _first_par_unreserved_idx(0), _region_live_threshold_bytes(0), _remaining_reclaimable_bytes(0) { - _region_live_threshold_bytes = - HeapRegion::GrainBytes * (size_t) G1MixedGCLiveThresholdPercent / 100; + _region_live_threshold_bytes = mixed_gc_live_threshold_bytes(); } #ifndef PRODUCT @@ -151,6 +151,8 @@ void CollectionSetChooser::add_region(HeapRegion* hr) { assert(!hr->isHumongous(), "Humongous regions shouldn't be added to the collection set"); assert(!hr->is_young(), "should not be young!"); + assert(hr->rem_set()->is_complete(), + err_msg("Trying to add region %u to the collection set with incomplete remembered set", hr->hrm_index())); _regions.append(hr); _length++; _remaining_reclaimable_bytes += hr->reclaimable_bytes(); @@ -208,9 +210,31 @@ void CollectionSetChooser::update_totals(uint region_num, } } +void CollectionSetChooser::iterate(HeapRegionClosure* cl) { + for (uint i = _curr_index; i < _length; i++) { + HeapRegion* r = regions_at(i); + if (cl->doHeapRegion(r)) { + cl->incomplete(); + break; + } + } +} + void CollectionSetChooser::clear() { _regions.clear(); _curr_index = 0; _length = 0; _remaining_reclaimable_bytes = 0; }; + +bool CollectionSetChooser::region_occupancy_low_enough_for_evac(size_t live_bytes) { + return live_bytes < mixed_gc_live_threshold_bytes(); +} + +bool CollectionSetChooser::should_add(HeapRegion* hr) const { + assert(hr->is_marked(), "pre-condition"); + assert(!hr->is_young(), "should never consider young regions"); + return !hr->isHumongous() && + region_occupancy_low_enough_for_evac(hr->live_bytes()) && + hr->rem_set()->is_complete(); +} \ No newline at end of file diff --git a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp index c36852e0c..7bf056cd9 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp @@ -102,16 +102,18 @@ public: void sort_regions(); + static size_t mixed_gc_live_threshold_bytes() { + return HeapRegion::GrainBytes * (size_t) G1MixedGCLiveThresholdPercent / 100; + } + + static bool region_occupancy_low_enough_for_evac(size_t live_bytes); + // Determine whether to add the given region to the CSet chooser or // not. Currently, we skip humongous regions (we never add them to // the CSet, we only reclaim them during cleanup) and regions whose // live bytes are over the threshold. - bool should_add(HeapRegion* hr) { - assert(hr->is_marked(), "pre-condition"); - assert(!hr->is_young(), "should never consider young regions"); - return !hr->isHumongous() && - hr->live_bytes() < _region_live_threshold_bytes; - } + // Regions also need a complete remembered set to be a candidate. + bool should_add(HeapRegion* hr) const ; // Returns the number candidate old regions added uint length() { return _length; } @@ -133,6 +135,9 @@ public: // and the amount of reclaimable bytes by reclaimable_bytes. void update_totals(uint region_num, size_t reclaimable_bytes); + // Iterate over all collection set candidate regions. + void iterate(HeapRegionClosure* cl); + void clear(); // Return the number of candidate regions that remain to be collected. diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp index 7f7716381..94907a6d9 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp @@ -107,6 +107,8 @@ class ConcurrentG1Refine: public CHeapObj { int thread_threshold_step() const { return _thread_threshold_step; } G1HotCardCache* hot_card_cache() { return &_hot_card_cache; } + + static bool hot_card_cache_enabled() { return G1HotCardCache::default_use_cache(); } }; #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTG1REFINE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp index 34c0684fc..457002859 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp @@ -30,6 +30,7 @@ #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" +#include "gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp" #include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1OopClosures.inline.hpp" @@ -530,17 +531,13 @@ ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev _cleanup_sleep_factor(0.0), _cleanup_task_overhead(1.0), _cleanup_list("Cleanup List"), - _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), - _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> - CardTableModRefBS::card_shift, - false /* in_resource_area*/), _prevMarkBitMap(&_markBitMap1), _nextMarkBitMap(&_markBitMap2), _markStack(this), // _finger set in set_non_marking_state - + _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)), // _active_tasks set in set_non_marking_state // _tasks set inside the constructor @@ -560,12 +557,10 @@ ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), _cleanup_times(), _total_counting_time(0.0), - _total_rs_scrub_time(0.0), _parallel_workers(NULL), - - _count_card_bitmaps(NULL), - _count_marked_bytes(NULL), + _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), + _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)), _completed_initialization(false) { CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; if (verbose_level < no_verbose) { @@ -718,40 +713,19 @@ ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); - _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); - _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); - - BitMap::idx_t card_bm_size = _card_bm.size(); - // so that the assertion in MarkingTaskQueue::task_queue doesn't fail _active_tasks = _max_worker_id; - size_t max_regions = (size_t) _g1h->max_regions(); for (uint i = 0; i < _max_worker_id; ++i) { CMTaskQueue* task_queue = new CMTaskQueue(); task_queue->initialize(); _task_queues->register_queue(i, task_queue); - _count_card_bitmaps[i] = BitMap(card_bm_size, false); - _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); - - _tasks[i] = new CMTask(i, this, - _count_marked_bytes[i], - &_count_card_bitmaps[i], - task_queue, _task_queues); + _tasks[i] = new CMTask(i, this, task_queue, _task_queues, _region_mark_stats, _g1h->max_regions()); _accum_task_vtime[i] = 0.0; } - // Calculate the card number for the bottom of the heap. Used - // in biasing indexes into the accounting card bitmaps. - _heap_bottom_card_num = - intptr_t(uintptr_t(_g1h->reserved_region().start()) >> - CardTableModRefBS::card_shift); - - // Clear all the liveness counting data - clear_all_count_data(); - // so that the call below can read a sensible value _heap_start = g1h->reserved_region().start(); set_non_marking_state(); @@ -777,18 +751,51 @@ void ConcurrentMark::reset() { gclog_or_tty->print_cr("[global] resetting"); } - // We do reset all of them, since different phases will use - // different number of active threads. So, it's easiest to have all - // of them ready. + // Reset all tasks, since different phases will use different number of active + // threads. So, it's easiest to have all of them ready. for (uint i = 0; i < _max_worker_id; ++i) { _tasks[i]->reset(_nextMarkBitMap); } + uint max_regions = _g1h->max_regions(); + for (uint i = 0; i < max_regions; i++) { + _top_at_rebuild_starts[i] = NULL; + _region_mark_stats[i].clear(); + } + // we need this to make sure that the flag is on during the evac // pause with initial mark piggy-backed set_concurrent_marking_in_progress(); } +void ConcurrentMark::clear_statistics_in_region(uint region_idx) { + for (uint j = 0; j < _max_worker_id; ++j) { + _tasks[j]->clear_mark_stats_cache(region_idx); + } + _top_at_rebuild_starts[region_idx] = NULL; + _region_mark_stats[region_idx].clear(); +} + +void ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { + assert(SafepointSynchronize::is_at_safepoint(), "May only be called at a safepoint."); + // Need to clear mark bit of the humongous object if already set and during a marking cycle. + if (_nextMarkBitMap->isMarked(r->bottom())) { + _nextMarkBitMap->clear(r->bottom()); + } + + // Clear any statistics about the region gathered so far. + uint const region_idx = r->hrm_index(); + if (r->isHumongous()) { + assert(r->startsHumongous(), "Got humongous continues region here"); + uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); + for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { + clear_statistics_in_region(j); + } + } else { + clear_statistics_in_region(region_idx); + } +} + void ConcurrentMark::reset_marking_state(bool clear_overflow) { _markStack.setEmpty(); // Also clears the _markStack overflow flag @@ -796,6 +803,11 @@ void ConcurrentMark::reset_marking_state(bool clear_overflow) { // Expand the marking stack, if we have to and if we can. if (has_overflown()) { _markStack.expand(); + + uint max_regions = _g1h->max_regions(); + for (uint i = 0; i < max_regions; i++) { + _region_mark_stats[i].clear_during_overflow(); + } } if (clear_overflow) { @@ -852,6 +864,8 @@ void ConcurrentMark::set_non_marking_state() { } ConcurrentMark::~ConcurrentMark() { + FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts, mtGC); + FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats, mtGC); // The ConcurrentMark instance is never freed. ShouldNotReachHere(); } @@ -872,12 +886,6 @@ void ConcurrentMark::clearNextBitmap() { ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); g1h->heap_region_iterate(&cl); - // Clear the liveness counting data. If the marking has been aborted, the abort() - // call already did that. - if (cl.complete()) { - clear_all_count_data(); - } - // Repeat the asserts from above. guarantee(cmThread()->during_cycle(), "invariant"); guarantee(!g1h->mark_in_progress(), "invariant"); @@ -894,12 +902,8 @@ class CheckBitmapClearHRClosure : public HeapRegionClosure { // This closure can be called concurrently to the mutator, so we must make sure // that the result of the getNextMarkedWordAddress() call is compared to the // value passed to it as limit to detect any found bits. - // We can use the region's orig_end() for the limit and the comparison value - // as it always contains the "real" end of the region that never changes and - // has no side effects. - // Due to the latter, there can also be no problem with the compiler generating - // reloads of the orig_end() call. - HeapWord* end = r->orig_end(); + // end never changes in G1. + HeapWord* end = r->end(); return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; } }; @@ -913,9 +917,7 @@ bool ConcurrentMark::nextMarkBitmapIsClear() { class NoteStartOfMarkHRClosure: public HeapRegionClosure { public: bool doHeapRegion(HeapRegion* r) { - if (!r->continuesHumongous()) { - r->note_start_of_marking(); - } + r->note_start_of_marking(); return false; } }; @@ -1025,32 +1027,7 @@ void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { return; } - // If we're executing the concurrent phase of marking, reset the marking - // state; otherwise the marking state is reset after reference processing, - // during the remark pause. - // If we reset here as a result of an overflow during the remark we will - // see assertion failures from any subsequent set_concurrency_and_phase() - // calls. - if (concurrent()) { - // let the task associated with with worker 0 do this - if (worker_id == 0) { - // task 0 is responsible for clearing the global data structures - // We should be here because of an overflow. During STW we should - // not clear the overflow flag since we rely on it being true when - // we exit this method to abort the pause and restart concurent - // marking. - reset_marking_state(true /* clear_overflow */); - force_overflow()->update(); - - if (G1Log::fine()) { - gclog_or_tty->gclog_stamp(concurrent_gc_id()); - gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); - } - } - } - // after this, each task should reset its own data structures then - // then go into the second barrier } void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { @@ -1135,7 +1112,7 @@ public: double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; _cm->clear_has_overflown(); - _cm->do_yield_check(worker_id); + _cm->do_yield_check(); jlong sleep_time_ms; if (!_cm->has_aborted() && the_task->has_aborted()) { @@ -1289,6 +1266,46 @@ void ConcurrentMark::markFromRoots() { print_stats(); } +class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + + uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. + + void update_remset_before_rebuild(HeapRegion * hr) { + G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); + + size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; + bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); + if (selected_for_rebuild) { + _num_regions_selected_for_rebuild++; + } + _cm->update_top_at_rebuild_start(hr); + } + + public: + G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, ConcurrentMark* cm) : + _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { } + + virtual bool doHeapRegion(HeapRegion* r) { + update_remset_before_rebuild(r); + return false; + } + + uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } +}; + +class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { + G1CollectedHeap* _g1h; + public: + G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } + + virtual bool doHeapRegion(HeapRegion* r) { + _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); + return false; + } +}; + void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { // world is stopped at this checkpoint assert(SafepointSynchronize::is_at_safepoint(), @@ -1308,7 +1325,7 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { HandleMark hm; // handle scope Universe::heap()->prepare_for_verify(); Universe::verify(VerifyOption_G1UsePrevMarking, - " VerifyDuringGC:(before)"); + " VerifyDuringGC:(Remark before)"); } g1h->check_bitmaps("Remark Start"); @@ -1335,17 +1352,13 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { HandleMark hm; // handle scope Universe::heap()->prepare_for_verify(); Universe::verify(VerifyOption_G1UsePrevMarking, - " VerifyDuringGC:(overflow)"); + " VerifyDuringGC:(Remark overflow)"); } // Clear the marking state because we will be restarting // marking due to overflowing the global mark stack. reset_marking_state(); } else { - // Aggregate the per-task counting data that we have accumulated - // while marking. - aggregate_count_data(); - SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); // We're done with marking. // This is the end of the marking cycle, we're expected all @@ -1353,12 +1366,27 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { satb_mq_set.set_active_all_threads(false, /* new active value */ true /* expected_active */); + { + GCTraceTime t("Flush Task Caches", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); + flush_all_task_caches(); + } + + { + GCTraceTime t("Update Remembered Set Tracking Before Rebuild", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id()); + G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this); + g1h->heap_region_iterate(&cl); + if (verbose_low()) { + gclog_or_tty->print_cr("Remembered Set Tracking update regions total %u, selected %u", + _g1h->num_regions(), cl.num_selected_for_rebuild()); + } + } + g1h->shrink_heap_at_remark(); if (VerifyDuringGC) { HandleMark hm; // handle scope Universe::heap()->prepare_for_verify(); Universe::verify(VerifyOption_G1UseNextMarking, - " VerifyDuringGC:(after)"); + " VerifyDuringGC:(Remark after)"); } g1h->check_bitmaps("Remark End"); assert(!restart_for_overflow(), "sanity"); @@ -1378,457 +1406,6 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); } -// Base class of the closures that finalize and verify the -// liveness counting data. -class CMCountDataClosureBase: public HeapRegionClosure { -protected: - G1CollectedHeap* _g1h; - ConcurrentMark* _cm; - CardTableModRefBS* _ct_bs; - - BitMap* _region_bm; - BitMap* _card_bm; - - // Takes a region that's not empty (i.e., it has at least one - // live object in it and sets its corresponding bit on the region - // bitmap to 1. If the region is "starts humongous" it will also set - // to 1 the bits on the region bitmap that correspond to its - // associated "continues humongous" regions. - void set_bit_for_region(HeapRegion* hr) { - assert(!hr->continuesHumongous(), "should have filtered those out"); - - BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); - if (!hr->startsHumongous()) { - // Normal (non-humongous) case: just set the bit. - _region_bm->par_at_put(index, true); - } else { - // Starts humongous case: calculate how many regions are part of - // this humongous region and then set the bit range. - BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); - _region_bm->par_at_put_range(index, end_index, true); - } - } - -public: - CMCountDataClosureBase(G1CollectedHeap* g1h, - BitMap* region_bm, BitMap* card_bm): - _g1h(g1h), _cm(g1h->concurrent_mark()), - _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), - _region_bm(region_bm), _card_bm(card_bm) { } -}; - -// Closure that calculates the # live objects per region. Used -// for verification purposes during the cleanup pause. -class CalcLiveObjectsClosure: public CMCountDataClosureBase { - CMBitMapRO* _bm; - size_t _region_marked_bytes; - -public: - CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, - BitMap* region_bm, BitMap* card_bm) : - CMCountDataClosureBase(g1h, region_bm, card_bm), - _bm(bm), _region_marked_bytes(0) { } - - bool doHeapRegion(HeapRegion* hr) { - - if (hr->continuesHumongous()) { - // We will ignore these here and process them when their - // associated "starts humongous" region is processed (see - // set_bit_for_heap_region()). Note that we cannot rely on their - // associated "starts humongous" region to have their bit set to - // 1 since, due to the region chunking in the parallel region - // iteration, a "continues humongous" region might be visited - // before its associated "starts humongous". - return false; - } - - HeapWord* ntams = hr->next_top_at_mark_start(); - HeapWord* start = hr->bottom(); - - assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), - err_msg("Preconditions not met - " - "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, - p2i(start), p2i(ntams), p2i(hr->end()))); - - // Find the first marked object at or after "start". - start = _bm->getNextMarkedWordAddress(start, ntams); - - size_t marked_bytes = 0; - - while (start < ntams) { - oop obj = oop(start); - int obj_sz = obj->size(); - HeapWord* obj_end = start + obj_sz; - - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); - - // Note: if we're looking at the last region in heap - obj_end - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { - // end of object is not card aligned - increment to cover - // all the cards spanned by the object - end_idx += 1; - } - - // Set the bits in the card BM for the cards spanned by this object. - _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); - - // Add the size of this object to the number of marked bytes. - marked_bytes += (size_t)obj_sz * HeapWordSize; - - // Find the next marked object after this one. - start = _bm->getNextMarkedWordAddress(obj_end, ntams); - } - - // Mark the allocated-since-marking portion... - HeapWord* top = hr->top(); - if (ntams < top) { - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); - - // Note: if we're looking at the last region in heap - top - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { - // end of object is not card aligned - increment to cover - // all the cards spanned by the object - end_idx += 1; - } - _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); - - // This definitely means the region has live objects. - set_bit_for_region(hr); - } - - // Update the live region bitmap. - if (marked_bytes > 0) { - set_bit_for_region(hr); - } - - // Set the marked bytes for the current region so that - // it can be queried by a calling verificiation routine - _region_marked_bytes = marked_bytes; - - return false; - } - - size_t region_marked_bytes() const { return _region_marked_bytes; } -}; - -// Heap region closure used for verifying the counting data -// that was accumulated concurrently and aggregated during -// the remark pause. This closure is applied to the heap -// regions during the STW cleanup pause. - -class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - ConcurrentMark* _cm; - CalcLiveObjectsClosure _calc_cl; - BitMap* _region_bm; // Region BM to be verified - BitMap* _card_bm; // Card BM to be verified - bool _verbose; // verbose output? - - BitMap* _exp_region_bm; // Expected Region BM values - BitMap* _exp_card_bm; // Expected card BM values - - int _failures; - -public: - VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, - BitMap* region_bm, - BitMap* card_bm, - BitMap* exp_region_bm, - BitMap* exp_card_bm, - bool verbose) : - _g1h(g1h), _cm(g1h->concurrent_mark()), - _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), - _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), - _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), - _failures(0) { } - - int failures() const { return _failures; } - - bool doHeapRegion(HeapRegion* hr) { - if (hr->continuesHumongous()) { - // We will ignore these here and process them when their - // associated "starts humongous" region is processed (see - // set_bit_for_heap_region()). Note that we cannot rely on their - // associated "starts humongous" region to have their bit set to - // 1 since, due to the region chunking in the parallel region - // iteration, a "continues humongous" region might be visited - // before its associated "starts humongous". - return false; - } - - int failures = 0; - - // Call the CalcLiveObjectsClosure to walk the marking bitmap for - // this region and set the corresponding bits in the expected region - // and card bitmaps. - bool res = _calc_cl.doHeapRegion(hr); - assert(res == false, "should be continuing"); - - MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), - Mutex::_no_safepoint_check_flag); - - // Verify the marked bytes for this region. - size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); - size_t act_marked_bytes = hr->next_marked_bytes(); - - // We're not OK if expected marked bytes > actual marked bytes. It means - // we have missed accounting some objects during the actual marking. - if (exp_marked_bytes > act_marked_bytes) { - if (_verbose) { - gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " - "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, - hr->hrm_index(), exp_marked_bytes, act_marked_bytes); - } - failures += 1; - } - - // Verify the bit, for this region, in the actual and expected - // (which was just calculated) region bit maps. - // We're not OK if the bit in the calculated expected region - // bitmap is set and the bit in the actual region bitmap is not. - BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); - - bool expected = _exp_region_bm->at(index); - bool actual = _region_bm->at(index); - if (expected && !actual) { - if (_verbose) { - gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " - "expected: %s, actual: %s", - hr->hrm_index(), - BOOL_TO_STR(expected), BOOL_TO_STR(actual)); - } - failures += 1; - } - - // Verify that the card bit maps for the cards spanned by the current - // region match. We have an error if we have a set bit in the expected - // bit map and the corresponding bit in the actual bitmap is not set. - - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); - - for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { - expected = _exp_card_bm->at(i); - actual = _card_bm->at(i); - - if (expected && !actual) { - if (_verbose) { - gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " - "expected: %s, actual: %s", - hr->hrm_index(), i, - BOOL_TO_STR(expected), BOOL_TO_STR(actual)); - } - failures += 1; - } - } - - if (failures > 0 && _verbose) { - gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " - "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, - HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()), - _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); - } - - _failures += failures; - - // We could stop iteration over the heap when we - // find the first violating region by returning true. - return false; - } -}; - -class G1ParVerifyFinalCountTask: public AbstractGangTask { -protected: - G1CollectedHeap* _g1h; - ConcurrentMark* _cm; - BitMap* _actual_region_bm; - BitMap* _actual_card_bm; - - uint _n_workers; - - BitMap* _expected_region_bm; - BitMap* _expected_card_bm; - - int _failures; - bool _verbose; - - HeapRegionClaimer _hrclaimer; - -public: - G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, - BitMap* region_bm, BitMap* card_bm, - BitMap* expected_region_bm, BitMap* expected_card_bm) - : AbstractGangTask("G1 verify final counting"), - _g1h(g1h), _cm(_g1h->concurrent_mark()), - _actual_region_bm(region_bm), _actual_card_bm(card_bm), - _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), - _failures(0), _verbose(false), - _n_workers(0) { - assert(VerifyDuringGC, "don't call this otherwise"); - - // Use the value already set as the number of active threads - // in the call to run_task(). - if (G1CollectedHeap::use_parallel_gc_threads()) { - assert( _g1h->workers()->active_workers() > 0, - "Should have been previously set"); - _n_workers = _g1h->workers()->active_workers(); - } else { - _n_workers = 1; - } - _hrclaimer.set_workers(_n_workers); - - assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); - assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); - - _verbose = _cm->verbose_medium(); - } - - void work(uint worker_id) { - assert(worker_id < _n_workers, "invariant"); - - VerifyLiveObjectDataHRClosure verify_cl(_g1h, - _actual_region_bm, _actual_card_bm, - _expected_region_bm, - _expected_card_bm, - _verbose); - - if (G1CollectedHeap::use_parallel_gc_threads()) { - _g1h->heap_region_par_iterate_chunked(&verify_cl, - worker_id, - &_hrclaimer); - } else { - _g1h->heap_region_iterate(&verify_cl); - } - - Atomic::add(verify_cl.failures(), &_failures); - } - - int failures() const { return _failures; } -}; - -// Closure that finalizes the liveness counting data. -// Used during the cleanup pause. -// Sets the bits corresponding to the interval [NTAMS, top] -// (which contains the implicitly live objects) in the -// card liveness bitmap. Also sets the bit for each region, -// containing live data, in the region liveness bitmap. - -class FinalCountDataUpdateClosure: public CMCountDataClosureBase { - public: - FinalCountDataUpdateClosure(G1CollectedHeap* g1h, - BitMap* region_bm, - BitMap* card_bm) : - CMCountDataClosureBase(g1h, region_bm, card_bm) { } - - bool doHeapRegion(HeapRegion* hr) { - - if (hr->continuesHumongous()) { - // We will ignore these here and process them when their - // associated "starts humongous" region is processed (see - // set_bit_for_heap_region()). Note that we cannot rely on their - // associated "starts humongous" region to have their bit set to - // 1 since, due to the region chunking in the parallel region - // iteration, a "continues humongous" region might be visited - // before its associated "starts humongous". - return false; - } - - HeapWord* ntams = hr->next_top_at_mark_start(); - HeapWord* top = hr->top(); - - assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); - - // Mark the allocated-since-marking portion... - if (ntams < top) { - // This definitely means the region has live objects. - set_bit_for_region(hr); - - // Now set the bits in the card bitmap for [ntams, top) - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); - - // Note: if we're looking at the last region in heap - top - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { - // end of object is not card aligned - increment to cover - // all the cards spanned by the object - end_idx += 1; - } - - assert(end_idx <= _card_bm->size(), - err_msg("oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, - end_idx, _card_bm->size())); - assert(start_idx < _card_bm->size(), - err_msg("oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, - start_idx, _card_bm->size())); - - _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); - } - - // Set the bit for the region if it contains live data - if (hr->next_marked_bytes() > 0) { - set_bit_for_region(hr); - } - - return false; - } -}; - -class G1ParFinalCountTask: public AbstractGangTask { -protected: - G1CollectedHeap* _g1h; - ConcurrentMark* _cm; - BitMap* _actual_region_bm; - BitMap* _actual_card_bm; - - uint _n_workers; - HeapRegionClaimer _hrclaimer; - -public: - G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) - : AbstractGangTask("G1 final counting"), - _g1h(g1h), _cm(_g1h->concurrent_mark()), - _actual_region_bm(region_bm), _actual_card_bm(card_bm), - _n_workers(0) { - // Use the value already set as the number of active threads - // in the call to run_task(). - if (G1CollectedHeap::use_parallel_gc_threads()) { - assert( _g1h->workers()->active_workers() > 0, - "Should have been previously set"); - _n_workers = _g1h->workers()->active_workers(); - } else { - _n_workers = 1; - } - _hrclaimer.set_workers(_n_workers); - } - - void work(uint worker_id) { - assert(worker_id < _n_workers, "invariant"); - - FinalCountDataUpdateClosure final_update_cl(_g1h, - _actual_region_bm, - _actual_card_bm); - - if (G1CollectedHeap::use_parallel_gc_threads()) { - _g1h->heap_region_par_iterate_chunked(&final_update_cl, - worker_id, - &_hrclaimer); - } else { - _g1h->heap_region_iterate(&final_update_cl); - } - } -}; - class G1ParNoteEndTask; class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { @@ -1861,9 +1438,6 @@ public: const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } bool doHeapRegion(HeapRegion *hr) { - if (hr->continuesHumongous()) { - return false; - } // We use a claim value of zero here because all regions // were claimed with value 1 in the FinalCount task. _g1->reset_gc_time_stamps(hr); @@ -1876,7 +1450,6 @@ public: _freed_bytes += hr->used(); hr->set_containing_set(NULL); if (hr->isHumongous()) { - assert(hr->startsHumongous(), "we should only see starts humongous"); _humongous_regions_removed.increment(1u, hr->capacity()); _g1->free_humongous_region(hr, _local_cleanup_list, true); } else { @@ -1967,28 +1540,6 @@ public: size_t freed_bytes() { return _freed_bytes; } }; -class G1ParScrubRemSetTask: public AbstractGangTask { -protected: - G1RemSet* _g1rs; - BitMap* _region_bm; - BitMap* _card_bm; - HeapRegionClaimer _hrclaimer; -public: - G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, - BitMap* card_bm, uint n_workers) : - AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), - _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) { } - - void work(uint worker_id) { - if (G1CollectedHeap::use_parallel_gc_threads()) { - _g1rs->scrub_par(_region_bm, _card_bm, worker_id, &_hrclaimer); - } else { - _g1rs->scrub(_region_bm, _card_bm); - } - } - -}; - void ConcurrentMark::cleanup() { // world is stopped at this checkpoint assert(SafepointSynchronize::is_at_safepoint(), @@ -2003,11 +1554,11 @@ void ConcurrentMark::cleanup() { g1h->verify_region_sets_optional(); - if (VerifyDuringGC) { + if (VerifyDuringGC) { // While rebuilding the remembered set we used the next marking... HandleMark hm; // handle scope Universe::heap()->prepare_for_verify(); - Universe::verify(VerifyOption_G1UsePrevMarking, - " VerifyDuringGC:(before)"); + Universe::verify(VerifyOption_G1UseNextMarking, + " VerifyDuringGC:(Cleanup before)"); } g1h->check_bitmaps("Cleanup Start"); @@ -2018,48 +1569,12 @@ void ConcurrentMark::cleanup() { HeapRegionRemSet::reset_for_cleanup_tasks(); - uint n_workers; - - // Do counting once more with the world stopped for good measure. - G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); - - if (G1CollectedHeap::use_parallel_gc_threads()) { - g1h->set_par_threads(); - n_workers = g1h->n_par_threads(); - assert(g1h->n_par_threads() == n_workers, - "Should not have been reset"); - g1h->workers()->run_task(&g1_par_count_task); - // Done with the parallel phase so reset to 0. - g1h->set_par_threads(0); - } else { - n_workers = 1; - g1_par_count_task.work(0); - } - - if (VerifyDuringGC) { - // Verify that the counting data accumulated during marking matches - // that calculated by walking the marking bitmap. - - // Bitmaps to hold expected values - BitMap expected_region_bm(_region_bm.size(), true); - BitMap expected_card_bm(_card_bm.size(), true); - - G1ParVerifyFinalCountTask g1_par_verify_task(g1h, - &_region_bm, - &_card_bm, - &expected_region_bm, - &expected_card_bm); - - if (G1CollectedHeap::use_parallel_gc_threads()) { - g1h->set_par_threads((int)n_workers); - g1h->workers()->run_task(&g1_par_verify_task); - // Done with the parallel phase so reset to 0. - g1h->set_par_threads(0); - } else { - g1_par_verify_task.work(0); - } - - guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); + uint n_workers = G1CollectedHeap::use_parallel_gc_threads() ? + g1h->workers()->active_workers() : + 1; + { + G1UpdateRemSetTrackingAfterRebuild cl(_g1h); + g1h->heap_region_iterate(&cl); } size_t start_used_bytes = g1h->used(); @@ -2070,7 +1585,7 @@ void ConcurrentMark::cleanup() { _total_counting_time += this_final_counting_time; if (G1PrintRegionLivenessInfo) { - G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); + G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Cleanup"); _g1h->heap_region_iterate(&cl); } @@ -2097,24 +1612,6 @@ void ConcurrentMark::cleanup() { g1h->set_free_regions_coming(); } - // call below, since it affects the metric by which we sort the heap - // regions. - if (G1ScrubRemSets) { - double rs_scrub_start = os::elapsedTime(); - G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers); - if (G1CollectedHeap::use_parallel_gc_threads()) { - g1h->set_par_threads((int)n_workers); - g1h->workers()->run_task(&g1_par_scrub_rs_task); - g1h->set_par_threads(0); - } else { - g1_par_scrub_rs_task.work(0); - } - - double rs_scrub_end = os::elapsedTime(); - double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); - _total_rs_scrub_time += this_rs_scrub_time; - } - // this will also free any regions totally full of garbage objects, // and sort the regions. g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); @@ -2257,14 +1754,12 @@ class G1CMKeepAliveAndDrainClosure: public OopClosure { template void do_oop_work(T* p) { if (!_cm->has_overflown()) { - oop obj = oopDesc::load_decode_heap_oop(p); if (_cm->verbose_high()) { - gclog_or_tty->print_cr("\t[%u] we're looking at location " - "*" PTR_FORMAT " = " PTR_FORMAT, - _task->worker_id(), p2i(p), p2i((void*) obj)); + gclog_or_tty->print_cr("\t[%u] we're looking at location " PTR_FORMAT "", + _task->worker_id(), p2i(p)); } - _task->deal_with_reference(obj); + _task->deal_with_reference(p); _ref_counter--; if (_ref_counter == 0) { @@ -2633,15 +2128,8 @@ private: // circumspect about treating the argument as an object. void do_entry(void* entry) const { _task->increment_refs_reached(); - HeapRegion* hr = _g1h->heap_region_containing_raw(entry); - if (entry < hr->next_top_at_mark_start()) { - // Until we get here, we don't know whether entry refers to a valid - // object; it could instead have been a stale reference. - oop obj = static_cast(entry); - assert(obj->is_oop(true /* ignore mark word */), - err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj))); - _task->make_reference_grey(obj, hr); - } + oop const obj = static_cast(entry); + _task->make_reference_grey(obj); } public: @@ -2784,6 +2272,21 @@ void ConcurrentMark::checkpointRootsFinalWork() { print_stats(); } +void ConcurrentMark::flush_all_task_caches() { + size_t hits = 0; + size_t misses = 0; + for (uint i = 0; i < _max_worker_id; i++) { + Pair stats = _tasks[i]->flush_mark_stats_cache(); + hits += stats.first; + misses += stats.second; + } + size_t sum = hits + misses; + if (G1Log::finer()) { + gclog_or_tty->print("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", + hits, misses, sum != 0 ? double(hits) / sum * 100.0 : 0.0); + } +} + #ifndef PRODUCT class PrintReachableOopClosure: public OopClosure { @@ -2975,30 +2478,7 @@ ConcurrentMark::claim_region(uint worker_id) { while (finger < _heap_end) { assert(_g1h->is_in_g1_reserved(finger), "invariant"); - // Note on how this code handles humongous regions. In the - // normal case the finger will reach the start of a "starts - // humongous" (SH) region. Its end will either be the end of the - // last "continues humongous" (CH) region in the sequence, or the - // standard end of the SH region (if the SH is the only region in - // the sequence). That way claim_region() will skip over the CH - // regions. However, there is a subtle race between a CM thread - // executing this method and a mutator thread doing a humongous - // object allocation. The two are not mutually exclusive as the CM - // thread does not need to hold the Heap_lock when it gets - // here. So there is a chance that claim_region() will come across - // a free region that's in the progress of becoming a SH or a CH - // region. In the former case, it will either - // a) Miss the update to the region's end, in which case it will - // visit every subsequent CH region, will find their bitmaps - // empty, and do nothing, or - // b) Will observe the update of the region's end (in which case - // it will skip the subsequent CH regions). - // If it comes across a region that suddenly becomes CH, the - // scenario will be similar to b). So, the race between - // claim_region() and a humongous object allocation might force us - // to do a bit of unnecessary work (due to some unnecessary bitmap - // iterations) but it should not introduce and correctness issues. - HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); + HeapRegion* curr_region = _g1h->heap_region_containing(finger); // Make sure that the reads below do not float before loading curr_region. OrderAccess::loadload(); @@ -3146,16 +2626,9 @@ void ConcurrentMark::verify_no_cset_oops() { // Verify the global finger HeapWord* global_finger = finger(); if (global_finger != NULL && global_finger < _heap_end) { - // The global finger always points to a heap region boundary. We - // use heap_region_containing_raw() to get the containing region - // given that the global finger could be pointing to a free region - // which subsequently becomes continues humongous. If that - // happens, heap_region_containing() will return the bottom of the - // corresponding starts humongous region and the check below will - // not hold any more. // Since we always iterate over all regions, we might get a NULL HeapRegion // here. - HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); + HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); guarantee(global_hr == NULL || global_finger == global_hr->bottom(), err_msg("global finger: " PTR_FORMAT " region: " HR_FORMAT, p2i(global_finger), HR_FORMAT_PARAMS(global_hr))); @@ -3168,7 +2641,7 @@ void ConcurrentMark::verify_no_cset_oops() { HeapWord* task_finger = task->finger(); if (task_finger != NULL && task_finger < _heap_end) { // See above note on the global finger verification. - HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); + HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); guarantee(task_hr == NULL || task_finger == task_hr->bottom() || !task_hr->in_collection_set(), err_msg("task finger: " PTR_FORMAT " region: " HR_FORMAT, @@ -3178,187 +2651,10 @@ void ConcurrentMark::verify_no_cset_oops() { } #endif // PRODUCT -// Aggregate the counting data that was constructed concurrently -// with marking. -class AggregateCountDataHRClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - ConcurrentMark* _cm; - CardTableModRefBS* _ct_bs; - BitMap* _cm_card_bm; - uint _max_worker_id; - - public: - AggregateCountDataHRClosure(G1CollectedHeap* g1h, - BitMap* cm_card_bm, - uint max_worker_id) : - _g1h(g1h), _cm(g1h->concurrent_mark()), - _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), - _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } - - bool doHeapRegion(HeapRegion* hr) { - if (hr->continuesHumongous()) { - // We will ignore these here and process them when their - // associated "starts humongous" region is processed. - // Note that we cannot rely on their associated - // "starts humongous" region to have their bit set to 1 - // since, due to the region chunking in the parallel region - // iteration, a "continues humongous" region might be visited - // before its associated "starts humongous". - return false; - } - - HeapWord* start = hr->bottom(); - HeapWord* limit = hr->next_top_at_mark_start(); - HeapWord* end = hr->end(); - - assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), - err_msg("Preconditions not met - " - "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " - "top: " PTR_FORMAT ", end: " PTR_FORMAT, - p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()))); - - assert(hr->next_marked_bytes() == 0, "Precondition"); - - if (start == limit) { - // NTAMS of this region has not been set so nothing to do. - return false; - } - - // 'start' should be in the heap. - assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); - // 'end' *may* be just beyone the end of the heap (if hr is the last region) - assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); - - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); - BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); - - // If ntams is not card aligned then we bump card bitmap index - // for limit so that we get the all the cards spanned by - // the object ending at ntams. - // Note: if this is the last region in the heap then ntams - // could be actually just beyond the end of the the heap; - // limit_idx will then correspond to a (non-existent) card - // that is also outside the heap. - if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { - limit_idx += 1; - } - - assert(limit_idx <= end_idx, "or else use atomics"); - - // Aggregate the "stripe" in the count data associated with hr. - uint hrm_index = hr->hrm_index(); - size_t marked_bytes = 0; - - for (uint i = 0; i < _max_worker_id; i += 1) { - size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); - BitMap* task_card_bm = _cm->count_card_bitmap_for(i); - - // Fetch the marked_bytes in this region for task i and - // add it to the running total for this region. - marked_bytes += marked_bytes_array[hrm_index]; - - // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) - // into the global card bitmap. - BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); - - while (scan_idx < limit_idx) { - assert(task_card_bm->at(scan_idx) == true, "should be"); - _cm_card_bm->set_bit(scan_idx); - assert(_cm_card_bm->at(scan_idx) == true, "should be"); - - // BitMap::get_next_one_offset() can handle the case when - // its left_offset parameter is greater than its right_offset - // parameter. It does, however, have an early exit if - // left_offset == right_offset. So let's limit the value - // passed in for left offset here. - BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); - scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); - } - } - - // Update the marked bytes for this region. - hr->add_to_marked_bytes(marked_bytes); - - // Next heap region - return false; - } -}; - -class G1AggregateCountDataTask: public AbstractGangTask { -protected: - G1CollectedHeap* _g1h; - ConcurrentMark* _cm; - BitMap* _cm_card_bm; - uint _max_worker_id; - int _active_workers; - HeapRegionClaimer _hrclaimer; - -public: - G1AggregateCountDataTask(G1CollectedHeap* g1h, - ConcurrentMark* cm, - BitMap* cm_card_bm, - uint max_worker_id, - int n_workers) : - AbstractGangTask("Count Aggregation"), - _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), - _max_worker_id(max_worker_id), - _active_workers(n_workers), - _hrclaimer(_active_workers) { } - - void work(uint worker_id) { - AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); - - if (G1CollectedHeap::use_parallel_gc_threads()) { - _g1h->heap_region_par_iterate_chunked(&cl, worker_id, &_hrclaimer); - } else { - _g1h->heap_region_iterate(&cl); - } - } -}; - - -void ConcurrentMark::aggregate_count_data() { - int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? - _g1h->workers()->active_workers() : - 1); - - G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, - _max_worker_id, n_workers); - - if (G1CollectedHeap::use_parallel_gc_threads()) { - _g1h->set_par_threads(n_workers); - _g1h->workers()->run_task(&g1_par_agg_task); - _g1h->set_par_threads(0); - } else { - g1_par_agg_task.work(0); - } -} - -// Clear the per-worker arrays used to store the per-region counting data -void ConcurrentMark::clear_all_count_data() { - // Clear the global card bitmap - it will be filled during - // liveness count aggregation (during remark) and the - // final counting task. - _card_bm.clear(); - - // Clear the global region bitmap - it will be filled as part - // of the final counting task. - _region_bm.clear(); - - uint max_regions = _g1h->max_regions(); - assert(_max_worker_id > 0, "uninitialized"); - - for (uint i = 0; i < _max_worker_id; i += 1) { - BitMap* task_card_bm = count_card_bitmap_for(i); - size_t* marked_bytes_array = count_marked_bytes_array_for(i); - - assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); - assert(marked_bytes_array != NULL, "uninitialized"); - - memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); - task_card_bm->clear(); - } +void ConcurrentMark::rebuild_rem_set_concurrently() { + uint num_workers = MAX2(1U, calc_parallel_marking_threads()); + bool use_parallel = use_parallel_marking_threads(); + _g1h->g1_rem_set()->rebuild_rem_set(this, _parallel_workers, use_parallel, num_workers, _worker_id_offset); } void ConcurrentMark::print_stats() { @@ -3381,8 +2677,6 @@ void ConcurrentMark::abort() { // since VerifyDuringGC verifies the objects marked during // a full GC against the previous bitmap. - // Clear the liveness counting data - clear_all_count_data(); // Empty mark stack reset_marking_state(); for (uint i = 0; i < _max_worker_id; ++i) { @@ -3437,18 +2731,12 @@ void ConcurrentMark::print_summary_info() { } print_ms_time_info(" ", "cleanups", _cleanup_times); - gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", + gclog_or_tty->print_cr(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); - if (G1ScrubRemSets) { - gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", - _total_rs_scrub_time, - (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / - (double)_cleanup_times.num() - : 0.0)); - } + gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); @@ -3471,19 +2759,6 @@ void ConcurrentMark::print_on_error(outputStream* st) const { _nextMarkBitMap->print_on_error(st, " Next Bits: "); } -// We take a break if someone is trying to stop the world. -bool ConcurrentMark::do_yield_check(uint worker_id) { - if (SuspendibleThreadSet::should_yield()) { - if (worker_id == 0) { - _g1h->g1_policy()->record_concurrent_pause(); - } - SuspendibleThreadSet::yield(); - return true; - } else { - return false; - } -} - #ifndef PRODUCT // for debugging purposes void ConcurrentMark::print_finger() { @@ -3575,8 +2850,6 @@ G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, void CMTask::setup_for_region(HeapRegion* hr) { assert(hr != NULL, "claim_region() should have filtered out NULL regions"); - assert(!hr->continuesHumongous(), - "claim_region() should have filtered out continues humongous regions"); if (_cm->verbose_low()) { gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT, @@ -3665,6 +2938,8 @@ void CMTask::reset(CMBitMap* nextMarkBitMap) { _elapsed_time_ms = 0.0; _termination_time_ms = 0.0; _termination_start_time_ms = 0.0; + _mark_stats_cache.reset(); + #if _MARKING_STATS_ _local_pushes = 0; @@ -4021,6 +3296,14 @@ void CMTask::drain_satb_buffers() { decrease_limits(); } +void CMTask::clear_mark_stats_cache(uint region_idx) { + _mark_stats_cache.reset(region_idx); +} + +Pair CMTask::flush_mark_stats_cache() { + return _mark_stats_cache.evict_all(); +} + void CMTask::print_stats() { gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d", _worker_id, _calls); @@ -4031,7 +3314,11 @@ void CMTask::print_stats() { _step_times_ms.sd()); gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", _step_times_ms.maximum(), _step_times_ms.sum()); - + size_t const hits = _mark_stats_cache.hits(); + size_t const misses = _mark_stats_cache.misses(); + gclog_or_tty->print_cr(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", + hits, misses, + hits + misses != 0 ? double(hits) / (hits + misses) * 100.0 : 0.0); #if _MARKING_STATS_ gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), @@ -4557,16 +3844,34 @@ void CMTask::do_marking_step(double time_target_ms, // When we exit this sync barrier we know that all tasks have // stopped doing marking work. So, it's now safe to - // re-initialise our data structures. At the end of this method, - // task 0 will clear the global data structures. + // re-initialise our data structures. } statsOnly( ++_aborted_overflow ); // We clear the local state of this task... clear_region_fields(); + flush_mark_stats_cache(); if (!is_serial) { + // If we're executing the concurrent phase of marking, reset the marking + // state; otherwise the marking state is reset after reference processing, + // during the remark pause. + // If we reset here as a result of an overflow during the remark we will + // see assertion failures from any subsequent set_concurrency_and_phase() + // calls. + if (_cm->concurrent() && _worker_id == 0) { + // Worker 0 is responsible for clearing the global data structures because + // of an overflow. During STW we should not clear the overflow flag (in + // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit + // method to abort the pause and restart concurrent marking. + _cm->reset_marking_state(); + _cm->force_overflow()->update(); + + if (G1Log::finer()) { + gclog_or_tty->print_cr("Concurrent Mark reset for overflow"); + } + } // ...and enter the second barrier. _cm->enter_second_sync_barrier(_worker_id); } @@ -4597,20 +3902,19 @@ void CMTask::do_marking_step(double time_target_ms, CMTask::CMTask(uint worker_id, ConcurrentMark* cm, - size_t* marked_bytes, - BitMap* card_bm, CMTaskQueue* task_queue, - CMTaskQueueSet* task_queues) + CMTaskQueueSet* task_queues, + G1RegionMarkStats* mark_stats, + uint max_regions) : _g1h(G1CollectedHeap::heap()), _worker_id(worker_id), _cm(cm), _objArray_processor(this), _claimed(false), _nextMarkBitMap(NULL), _task_queue(task_queue), + _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), _task_queues(task_queues), - _cm_oop_closure(NULL), - _marked_bytes_array(marked_bytes), - _card_bm(card_bm) { + _cm_oop_closure(NULL) { guarantee(task_queue != NULL, "invariant"); guarantee(task_queues != NULL, "invariant"); diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp index 172caef29..e4da1dfdc 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp @@ -27,6 +27,7 @@ #include "classfile/javaClasses.hpp" #include "gc_implementation/g1/g1ConcurrentMarkObjArrayProcessor.hpp" +#include "gc_implementation/g1/g1RegionMarkStatsCache.hpp" #include "gc_implementation/g1/heapRegionSet.hpp" #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp" #include "gc_implementation/shared/gcId.hpp" @@ -82,6 +83,7 @@ class CMBitMapRO VALUE_OBJ_CLASS_SPEC { return _bm.at(heapWordToOffset(addr)); } + bool isMarked(oop obj) const { return isMarked((HeapWord*)obj);} // iteration inline bool iterate(BitMapClosure* cl, MemRegion mr); inline bool iterate(BitMapClosure* cl); @@ -377,7 +379,7 @@ class ConcurrentMark: public CHeapObj { friend class CMRemarkTask; friend class CMConcurrentMarkingTask; friend class G1ParNoteEndTask; - friend class CalcLiveObjectsClosure; + friend class G1VerifyLiveDataClosure; friend class G1CMRefProcTaskProxy; friend class G1CMRefProcTaskExecutor; friend class G1CMKeepAliveAndDrainClosure; @@ -408,9 +410,6 @@ protected: CMBitMapRO* _prevMarkBitMap; // completed mark bitmap CMBitMap* _nextMarkBitMap; // under-construction mark bitmap - BitMap _region_bm; - BitMap _card_bm; - // Heap bounds HeapWord* _heap_start; HeapWord* _heap_end; @@ -425,6 +424,7 @@ protected: // last claimed region // marking tasks + uint _worker_id_offset; uint _max_worker_id;// maximum worker id uint _active_tasks; // task num currently active CMTask** _tasks; // task queue array (max_worker_id len) @@ -472,7 +472,6 @@ protected: NumberSeq _remark_weak_ref_times; NumberSeq _cleanup_times; double _total_counting_time; - double _total_rs_scrub_time; double* _accum_task_vtime; // accumulated task vtime @@ -559,7 +558,9 @@ protected: // Returns the task with the given id CMTask* task(int id) { - assert(0 <= id && id < (int) _active_tasks, + // During initial mark we use the parallel gc threads to do some work, so + // we can only compare against _max_num_tasks. + assert(0 <= id && id < (int) _max_worker_id, "task id not within active bounds"); return _tasks[id]; } @@ -601,23 +602,6 @@ protected: } } - // Live Data Counting data structures... - // These data structures are initialized at the start of - // marking. They are written to while marking is active. - // They are aggregated during remark; the aggregated values - // are then used to populate the _region_bm, _card_bm, and - // the total live bytes, which are then subsequently updated - // during cleanup. - - // An array of bitmaps (one bit map per task). Each bitmap - // is used to record the cards spanned by the live objects - // marked by that task/worker. - BitMap* _count_card_bitmaps; - - // Used to record the number of marked live bytes - // (for each region, by worker thread). - size_t** _count_marked_bytes; - // Card index of the bottom of the G1 heap. Used for biasing indices into // the card bitmaps. intptr_t _heap_bottom_card_num; @@ -625,7 +609,29 @@ protected: // Set to true when initialization is complete bool _completed_initialization; + // Clear statistics gathered during the concurrent cycle for the given region after + // it has been reclaimed. + void clear_statistics_in_region(uint region_idx); + // Region statistics gathered during marking. + G1RegionMarkStats* _region_mark_stats; + // Top pointer for each region at the start of the rebuild remembered set process + // for regions which remembered sets need to be rebuilt. A NULL for a given region + // means that this region does not be scanned during the rebuilding remembered + // set phase at all. + HeapWord** _top_at_rebuild_starts; public: + void add_to_liveness(uint worker_id, oop const obj, size_t size); + // Liveness of the given region as determined by concurrent marking, i.e. the amount of + // live words between bottom and nTAMS. + size_t liveness(uint region) { return _region_mark_stats[region]._live_words; } + + // Sets the internal top_at_region_start for the given region to current top of the region. + inline void update_top_at_rebuild_start(HeapRegion* r); + // TARS for the given region during remembered set rebuilding. + inline HeapWord* top_at_rebuild_start(uint region) const; + + // Notification for eagerly reclaimed regions to clean up. + void humongous_object_eagerly_reclaimed(HeapRegion* r); // Manipulation of the global mark stack. // Notice that the first mark_stack_push is CAS-based, whereas the // two below are Mutex-based. This is OK since the first one is only @@ -702,23 +708,8 @@ public: // Calculates the number of GC threads to be used in a concurrent phase. uint calc_parallel_marking_threads(); - // The following three are interaction between CM and - // G1CollectedHeap - - // This notifies CM that a root during initial-mark needs to be - // grayed. It is MT-safe. word_size is the size of the object in - // words. It is passed explicitly as sometimes we cannot calculate - // it from the given object because it might be in an inconsistent - // state (e.g., in to-space and being copied). So the caller is - // responsible for dealing with this issue (e.g., get the size from - // the from-space image when the to-space image might be - // inconsistent) and always passing the size. hr is the region that - // contains the object and it's passed optionally from callers who - // might already have it (no point in recalculating it). - inline void grayRoot(oop obj, - size_t word_size, - uint worker_id, - HeapRegion* hr = NULL); + // Moves all per-task cached data into global state. + void flush_all_task_caches(); // It iterates over the heap and for each object it comes across it // will dump the contents of its reference fields, as well as @@ -807,7 +798,7 @@ public: return _prevMarkBitMap->isMarked(addr); } - inline bool do_yield_check(uint worker_i = 0); + inline bool do_yield_check(); // Called to abort the marking cycle after a Full GC takes palce. void abort(); @@ -841,97 +832,20 @@ public: return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; } - // Liveness counting - - // Utility routine to set an exclusive range of cards on the given - // card liveness bitmap - inline void set_card_bitmap_range(BitMap* card_bm, - BitMap::idx_t start_idx, - BitMap::idx_t end_idx, - bool is_par); - - // Returns the card number of the bottom of the G1 heap. - // Used in biasing indices into accounting card bitmaps. - intptr_t heap_bottom_card_num() const { - return _heap_bottom_card_num; - } - - // Returns the card bitmap for a given task or worker id. - BitMap* count_card_bitmap_for(uint worker_id) { - assert(0 <= worker_id && worker_id < _max_worker_id, "oob"); - assert(_count_card_bitmaps != NULL, "uninitialized"); - BitMap* task_card_bm = &_count_card_bitmaps[worker_id]; - assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); - return task_card_bm; - } - - // Returns the array containing the marked bytes for each region, - // for the given worker or task id. - size_t* count_marked_bytes_array_for(uint worker_id) { - assert(0 <= worker_id && worker_id < _max_worker_id, "oob"); - assert(_count_marked_bytes != NULL, "uninitialized"); - size_t* marked_bytes_array = _count_marked_bytes[worker_id]; - assert(marked_bytes_array != NULL, "uninitialized"); - return marked_bytes_array; - } - - // Returns the index in the liveness accounting card table bitmap - // for the given address - inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr); - - // Counts the size of the given memory region in the the given - // marked_bytes array slot for the given HeapRegion. - // Sets the bits in the given card bitmap that are associated with the - // cards that are spanned by the memory region. - inline void count_region(MemRegion mr, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm); - - // Counts the given memory region in the task/worker counting - // data structures for the given worker id. - inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id); - - // Counts the given object in the given task/worker counting - // data structures. - inline void count_object(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm); - - // Attempts to mark the given object and, if successful, counts - // the object in the given task/worker counting structures. - inline bool par_mark_and_count(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm); - - // Attempts to mark the given object and, if successful, counts - // the object in the task/worker counting structures for the - // given worker id. - inline bool par_mark_and_count(oop obj, - size_t word_size, - HeapRegion* hr, - uint worker_id); + // Mark the given object on the next bitmap if it is below nTAMS. + // If the passed obj_size is zero, it is recalculated from the given object if + // needed. This is to be as lazy as possible with accessing the object's size. + inline bool mark_in_next_bitmap(uint worker_id, HeapRegion* const hr, oop const obj, size_t const obj_size = 0); + inline bool mark_in_next_bitmap(uint worker_id, oop const obj, size_t const obj_size = 0); // Returns true if initialization was successfully completed. bool completed_initialization() const { return _completed_initialization; } -protected: - // Clear all the per-task bitmaps and arrays used to store the - // counting data. - void clear_all_count_data(); - - // Aggregates the counting data for each worker/task - // that was constructed while marking. Also sets - // the amount of marked bytes for each region and - // the top at concurrent mark count. - void aggregate_count_data(); - - // Verification routine - void verify_count_data(); +private: +// Rebuilds the remembered sets for chosen regions in parallel and concurrently to the application. + void rebuild_rem_set_concurrently(); }; // A class representing a marking task. @@ -951,6 +865,10 @@ private: global_stack_transfer_size = 16 }; + // Number of entries in the per-task stats entry. This seems enough to have a very + // low cache miss rate. + static const uint RegionMarkStatsCacheSize = 1024; + G1CMObjArrayProcessor _objArray_processor; uint _worker_id; @@ -959,6 +877,8 @@ private: CMBitMap* _nextMarkBitMap; // the task queue of this task CMTaskQueue* _task_queue; + + G1RegionMarkStatsCache _mark_stats_cache; private: // the task queue set---needed for stealing CMTaskQueueSet* _task_queues; @@ -1033,12 +953,6 @@ private: TruncatedSeq _marking_step_diffs_ms; - // Counting data structures. Embedding the task's marked_bytes_array - // and card bitmap into the actual task saves having to go through - // the ConcurrentMark object. - size_t* _marked_bytes_array; - BitMap* _card_bm; - // LOTS of statistics related with this task #if _MARKING_STATS_ NumberSeq _all_clock_intervals_ms; @@ -1166,14 +1080,14 @@ public: // Grey the object by marking it. If not already marked, push it on // the local queue if below the finger. - // Precondition: obj is in region. - // Precondition: obj is below region's NTAMS. - inline void make_reference_grey(oop obj, HeapRegion* region); + // obj is below its region's NTAMS. + inline void make_reference_grey(oop obj); // Grey the object (by calling make_grey_reference) if required, // e.g. obj is below its containing region's NTAMS. // Precondition: obj is a valid heap object. - inline void deal_with_reference(oop obj); + template + inline void deal_with_reference(T* p); // It scans an object and visits its children. void scan_object(oop obj) { process_grey_object(obj); } @@ -1206,10 +1120,18 @@ public: CMTask(uint worker_id, ConcurrentMark *cm, - size_t* marked_bytes, - BitMap* card_bm, CMTaskQueue* task_queue, - CMTaskQueueSet* task_queues); + CMTaskQueueSet* task_queues, + G1RegionMarkStats* mark_stats, + uint max_regions); + + inline void update_liveness(oop const obj, size_t const obj_size); + + // Clear (without flushing) the mark cache entry for the given region. + void clear_mark_stats_cache(uint region_idx); + // Evict the whole statistics cache into the global statistics. Returns the + // number of cache hits and misses so far. + Pair flush_mark_stats_cache(); // it prints statistics associated with this task void print_stats(); diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp index 7dc2855ca..48864c778 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp @@ -27,143 +27,44 @@ #include "gc_implementation/g1/concurrentMark.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/shared/suspendibleThreadSet.hpp" #include "gc_implementation/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp" - -// Utility routine to set an exclusive range of cards on the given -// card liveness bitmap -inline void ConcurrentMark::set_card_bitmap_range(BitMap* card_bm, - BitMap::idx_t start_idx, - BitMap::idx_t end_idx, - bool is_par) { - - // Set the exclusive bit range [start_idx, end_idx). - assert((end_idx - start_idx) > 0, "at least one card"); - assert(end_idx <= card_bm->size(), "sanity"); - - // Silently clip the end index - end_idx = MIN2(end_idx, card_bm->size()); - - // For small ranges use a simple loop; otherwise use set_range or - // use par_at_put_range (if parallel). The range is made up of the - // cards that are spanned by an object/mem region so 8 cards will - // allow up to object sizes up to 4K to be handled using the loop. - if ((end_idx - start_idx) <= 8) { - for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) { - if (is_par) { - card_bm->par_set_bit(i); - } else { - card_bm->set_bit(i); - } - } - } else { - // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive. - if (is_par) { - card_bm->par_at_put_range(start_idx, end_idx, true); - } else { - card_bm->set_range(start_idx, end_idx); - } - } +#include "gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp" +#include "gc_implementation/g1/g1RemSetTrackingPolicy.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" +#include "gc_implementation/g1/heapRegion.hpp" + +inline bool ConcurrentMark::mark_in_next_bitmap(uint const worker_id, oop const obj, size_t const obj_size) { + HeapRegion* const hr = _g1h->heap_region_containing(obj); + return mark_in_next_bitmap(worker_id, hr, obj, obj_size); } -// Returns the index in the liveness accounting card bitmap -// for the given address -inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) { - // Below, the term "card num" means the result of shifting an address - // by the card shift -- address 0 corresponds to card number 0. One - // must subtract the card num of the bottom of the heap to obtain a - // card table index. - intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift); - return card_num - heap_bottom_card_num(); -} +inline bool ConcurrentMark::mark_in_next_bitmap(uint const worker_id, + HeapRegion* const hr, oop const obj, size_t const obj_size) { + assert(hr != NULL, "just checking"); + assert(hr->is_in_reserved(obj), err_msg("Attempting to mark object at " PTR_FORMAT " that" + " is not contained in the given region %u", p2i(obj), hr->hrm_index())); -// Counts the given memory region in the given task/worker -// counting data structures. -inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm) { - G1CollectedHeap* g1h = _g1h; - CardTableModRefBS* ct_bs = g1h->g1_barrier_set(); - - HeapWord* start = mr.start(); - HeapWord* end = mr.end(); - size_t region_size_bytes = mr.byte_size(); - uint index = hr->hrm_index(); - - assert(!hr->continuesHumongous(), "should not be HC region"); - assert(hr == g1h->heap_region_containing(start), "sanity"); - assert(hr == g1h->heap_region_containing(mr.last()), "sanity"); - assert(marked_bytes_array != NULL, "pre-condition"); - assert(task_card_bm != NULL, "pre-condition"); - - // Add to the task local marked bytes for this region. - marked_bytes_array[index] += region_size_bytes; - - BitMap::idx_t start_idx = card_bitmap_index_for(start); - BitMap::idx_t end_idx = card_bitmap_index_for(end); - - // Note: if we're looking at the last region in heap - end - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) { - // end of region is not card aligned - incremement to cover - // all the cards spanned by the region. - end_idx += 1; + if (hr->obj_allocated_since_next_marking(obj)) { + return false; } - // The card bitmap is task/worker specific => no need to use - // the 'par' BitMap routines. - // Set bits in the exclusive bit range [start_idx, end_idx). - set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */); -} -// Counts the given memory region in the task/worker counting -// data structures for the given worker id. -inline void ConcurrentMark::count_region(MemRegion mr, - HeapRegion* hr, - uint worker_id) { - size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id); - BitMap* task_card_bm = count_card_bitmap_for(worker_id); - count_region(mr, hr, marked_bytes_array, task_card_bm); -} + // Some callers may have stale objects to mark above nTAMS after humongous reclaim. + assert(obj->is_oop(true /* ignore mark word */), err_msg("Address " PTR_FORMAT " to mark is not an oop", p2i(obj))); + assert(!hr->continuesHumongous(), err_msg("Should not try to mark object " PTR_FORMAT " in Humongous" + " continues region %u above nTAMS " PTR_FORMAT, p2i(obj), hr->hrm_index(), p2i(hr->next_top_at_mark_start()))); -// Counts the given object in the given task/worker counting data structures. -inline void ConcurrentMark::count_object(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm) { - MemRegion mr((HeapWord*)obj, obj->size()); - count_region(mr, hr, marked_bytes_array, task_card_bm); -} - -// Attempts to mark the given object and, if successful, counts -// the object in the given task/worker counting structures. -inline bool ConcurrentMark::par_mark_and_count(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm) { - HeapWord* addr = (HeapWord*)obj; - if (_nextMarkBitMap->parMark(addr)) { - // Update the task specific count data for the object. - count_object(obj, hr, marked_bytes_array, task_card_bm); - return true; + HeapWord* const obj_addr = (HeapWord*)obj; + // Dirty read to avoid CAS. + if (_nextMarkBitMap->isMarked(obj_addr)) { + return false; } - return false; -} -// Attempts to mark the given object and, if successful, counts -// the object in the task/worker counting structures for the -// given worker id. -inline bool ConcurrentMark::par_mark_and_count(oop obj, - size_t word_size, - HeapRegion* hr, - uint worker_id) { - HeapWord* addr = (HeapWord*)obj; - if (_nextMarkBitMap->parMark(addr)) { - MemRegion mr(addr, word_size); - count_region(mr, hr, worker_id); - return true; + bool success = _nextMarkBitMap->parMark(obj_addr); + if (success) { + add_to_liveness(worker_id, obj, obj_size == 0 ? obj->size() : obj_size); } - return false; + return success; } inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { @@ -296,80 +197,79 @@ inline void CMTask::abort_marking_if_regular_check_fail() { } } -inline void CMTask::make_reference_grey(oop obj, HeapRegion* hr) { - if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) { +inline void CMTask::update_liveness(oop const obj, const size_t obj_size) { + _mark_stats_cache.add_live_words(_g1h->addr_to_region((HeapWord*)obj), obj_size); +} - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] marked object " PTR_FORMAT, - _worker_id, p2i(obj)); - } +inline void ConcurrentMark::add_to_liveness(uint worker_id, oop const obj, size_t size) { + task(worker_id)->update_liveness(obj, size); +} + +inline void CMTask::make_reference_grey(oop obj) { + if (!_cm->mark_in_next_bitmap(_worker_id, obj)) { + return; + } - // No OrderAccess:store_load() is needed. It is implicit in the - // CAS done in CMBitMap::parMark() call in the routine above. - HeapWord* global_finger = _cm->finger(); - - // We only need to push a newly grey object on the mark - // stack if it is in a section of memory the mark bitmap - // scan has already examined. Mark bitmap scanning - // maintains progress "fingers" for determining that. - // - // Notice that the global finger might be moving forward - // concurrently. This is not a problem. In the worst case, we - // mark the object while it is above the global finger and, by - // the time we read the global finger, it has moved forward - // past this object. In this case, the object will probably - // be visited when a task is scanning the region and will also - // be pushed on the stack. So, some duplicate work, but no - // correctness problems. - if (is_below_finger(obj, global_finger)) { - if (obj->is_typeArray()) { - // Immediately process arrays of primitive types, rather - // than pushing on the mark stack. This keeps us from - // adding humongous objects to the mark stack that might - // be reclaimed before the entry is processed - see - // selection of candidates for eager reclaim of humongous - // objects. The cost of the additional type test is - // mitigated by avoiding a trip through the mark stack, - // by only doing a bookkeeping update and avoiding the - // actual scan of the object - a typeArray contains no - // references, and the metadata is built-in. - process_grey_object(obj); - } else { - if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT - ", global: " PTR_FORMAT ") pushing " - PTR_FORMAT " on mark stack", - _worker_id, p2i(_finger), - p2i(global_finger), p2i(obj)); - } - push(obj); + if (_cm->verbose_high()) { + gclog_or_tty->print_cr("[%u] marked object " PTR_FORMAT, + _worker_id, p2i(obj)); + } + + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in CMBitMap::parMark() call in the routine above. + HeapWord* global_finger = _cm->finger(); + + // We only need to push a newly grey object on the mark + // stack if it is in a section of memory the mark bitmap + // scan has already examined. Mark bitmap scanning + // maintains progress "fingers" for determining that. + // + // Notice that the global finger might be moving forward + // concurrently. This is not a problem. In the worst case, we + // mark the object while it is above the global finger and, by + // the time we read the global finger, it has moved forward + // past this object. In this case, the object will probably + // be visited when a task is scanning the region and will also + // be pushed on the stack. So, some duplicate work, but no + // correctness problems. + if (is_below_finger(obj, global_finger)) { + if (obj->is_typeArray()) { + // Immediately process arrays of primitive types, rather + // than pushing on the mark stack. This keeps us from + // adding humongous objects to the mark stack that might + // be reclaimed before the entry is processed - see + // selection of candidates for eager reclaim of humongous + // objects. The cost of the additional type test is + // mitigated by avoiding a trip through the mark stack, + // by only doing a bookkeeping update and avoiding the + // actual scan of the object - a typeArray contains no + // references, and the metadata is built-in. + process_grey_object(obj); + } else { + if (_cm->verbose_high()) { + gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT + ", global: " PTR_FORMAT ") pushing " + PTR_FORMAT " on mark stack", + _worker_id, p2i(_finger), + p2i(global_finger), p2i(obj)); } + push(obj); } } } -inline void CMTask::deal_with_reference(oop obj) { +template +inline void CMTask::deal_with_reference(T *p) { + oop obj = oopDesc::load_decode_heap_oop(p); if (_cm->verbose_high()) { gclog_or_tty->print_cr("[%u] we're dealing with reference = " PTR_FORMAT, _worker_id, p2i((void*) obj)); } - increment_refs_reached(); - - HeapWord* objAddr = (HeapWord*) obj; - assert(obj->is_oop_or_null(true /* ignore mark word */), "Error"); - if (_g1h->is_in_g1_reserved(objAddr)) { - assert(obj != NULL, "null check is implicit"); - if (!_nextMarkBitMap->isMarked(objAddr)) { - // Only get the containing region if the object is not marked on the - // bitmap (otherwise, it's a waste of time since we won't do - // anything with it). - HeapRegion* hr = _g1h->heap_region_containing_raw(obj); - if (!hr->obj_allocated_since_next_marking(obj)) { - make_reference_grey(obj, hr); - } - } + if (obj == NULL) { + return; } + make_reference_grey(obj); } inline size_t CMTask::scan_objArray(objArrayOop obj, MemRegion mr) { @@ -377,6 +277,27 @@ inline size_t CMTask::scan_objArray(objArrayOop obj, MemRegion mr) { return mr.word_size(); } +inline HeapWord* ConcurrentMark::top_at_rebuild_start(uint region) const { + assert(region < _g1h->max_regions(), err_msg("Tried to access TARS for region %u out of bounds", region)); + return _top_at_rebuild_starts[region]; +} + +inline void ConcurrentMark::update_top_at_rebuild_start(HeapRegion* r) { + uint const region = r->hrm_index(); + assert(region < _g1h->max_regions(), err_msg("Tried to access TARS for region %u out of bounds", region)); + assert(_top_at_rebuild_starts[region] == NULL, + err_msg("TARS for region %u has already been set to " PTR_FORMAT " should be NULL", + region, p2i(_top_at_rebuild_starts[region]))); + G1RemSetTrackingPolicy* tracker = _g1h->g1_policy()->remset_tracker(); + if (tracker->needs_scan_for_rebuild(r)) { + _top_at_rebuild_starts[region] = r->top(); + } else { + // We could leave the TARS for this region at NULL, but we would not catch + // accidental double assignment then. + _top_at_rebuild_starts[region] = r->bottom(); + } +} + inline void ConcurrentMark::markPrev(oop p) { assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity"); // Note we are overriding the read-only view of the prev map here, via @@ -384,34 +305,13 @@ inline void ConcurrentMark::markPrev(oop p) { ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p); } -inline void ConcurrentMark::grayRoot(oop obj, size_t word_size, - uint worker_id, HeapRegion* hr) { - assert(obj != NULL, "pre-condition"); - HeapWord* addr = (HeapWord*) obj; - if (hr == NULL) { - hr = _g1h->heap_region_containing_raw(addr); +// We take a break if someone is trying to stop the world. +inline bool ConcurrentMark::do_yield_check() { + if (SuspendibleThreadSet::should_yield()) { + SuspendibleThreadSet::yield(); + return true; } else { - assert(hr->is_in(addr), "pre-condition"); - } - assert(hr != NULL, "sanity"); - // Given that we're looking for a region that contains an object - // header it's impossible to get back a HC region. - assert(!hr->continuesHumongous(), "sanity"); - - // We cannot assert that word_size == obj->size() given that obj - // might not be in a consistent state (another thread might be in - // the process of copying it). So the best thing we can do is to - // assert that word_size is under an upper bound which is its - // containing region's capacity. - assert(word_size * HeapWordSize <= hr->capacity(), - err_msg("size: " SIZE_FORMAT " capacity: " SIZE_FORMAT " " HR_FORMAT, - word_size * HeapWordSize, hr->capacity(), - HR_FORMAT_PARAMS(hr))); - - if (addr < hr->next_top_at_mark_start()) { - if (!_nextMarkBitMap->isMarked(addr)) { - par_mark_and_count(obj, word_size, hr, worker_id); - } + return false; } } diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp index 3c4553bf7..7a26679d4 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp @@ -25,10 +25,12 @@ #include "precompiled.hpp" #include "classfile/classLoaderData.hpp" #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" +#include "gc_implementation/g1/concurrentMark.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1MMUTracker.hpp" +#include "gc_implementation/g1/g1RemSet.hpp" #include "gc_implementation/g1/vm_operations_g1.hpp" #include "gc_implementation/shared/gcTrace.hpp" #include "memory/resourceArea.hpp" @@ -176,6 +178,10 @@ void ConcurrentMarkThread::run() { } } while (cm()->restart_for_overflow()); + if (!cm()->has_aborted()) { + cm()->rebuild_rem_set_concurrently(); + } + double end_time = os::elapsedVTime(); // Update the total virtual time before doing this, since it will try // to measure it to get the vtime for this marking. We purposely diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp index f6fb2cdee..3a0e4de9f 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp @@ -154,9 +154,6 @@ void G1DefaultAllocator::release_gc_alloc_regions(uint no_of_gc_workers, Evacuat // want either way so no reason to check explicitly for either // condition. _retained_old_gc_alloc_region = old_gc_alloc_region(context)->release(); - if (_retained_old_gc_alloc_region != NULL) { - _retained_old_gc_alloc_region->record_retained_region(); - } if (ResizePLAB) { _g1h->_survivor_plab_stats.adjust_desired_plab_sz(no_of_gc_workers); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp index 1977fc83d..b908e8faf 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp @@ -370,33 +370,44 @@ void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_ #endif } -bool -G1BlockOffsetArray::verify_for_object(HeapWord* obj_start, - size_t word_size) const { - size_t first_card = _array->index_for(obj_start); - size_t last_card = _array->index_for(obj_start + word_size - 1); - if (!_array->is_card_boundary(obj_start)) { - // If the object is not on a card boundary the BOT entry of the - // first card should point to another object so we should not - // check that one. - first_card += 1; - } - for (size_t card = first_card; card <= last_card; card += 1) { - HeapWord* card_addr = _array->address_for_index(card); - HeapWord* block_start = block_start_const(card_addr); - if (block_start != obj_start) { - gclog_or_tty->print_cr("block start: " PTR_FORMAT " is incorrect - " - "card index: " SIZE_FORMAT " " - "card addr: " PTR_FORMAT " BOT entry: %u " - "obj: " PTR_FORMAT " word size: " SIZE_FORMAT " " - "cards: [" SIZE_FORMAT "," SIZE_FORMAT "]", - block_start, card, card_addr, - _array->offset_array(card), - obj_start, word_size, first_card, last_card); - return false; +void G1BlockOffsetArray::verify() const { + size_t start_card = _array->index_for(gsp()->bottom()); + size_t end_card = _array->index_for(gsp()->top()); + + for (size_t current_card = start_card; current_card < end_card; current_card++) { + u_char entry = _array->offset_array(current_card); + if (entry < N_words) { + // The entry should point to an object before the current card. Verify that + // it is possible to walk from that object in to the current card by just + // iterating over the objects following it. + HeapWord* card_address = _array->address_for_index(current_card); + HeapWord* obj_end = card_address - entry; + while (obj_end < card_address) { + HeapWord* obj = obj_end; + size_t obj_size = block_size(obj); + obj_end = obj + obj_size; + guarantee(obj_end > obj && obj_end <= gsp()->top(), + err_msg("Invalid object end. obj: " PTR_FORMAT " obj_size: " SIZE_FORMAT " obj_end: " PTR_FORMAT " top: " PTR_FORMAT, + p2i(obj), obj_size, p2i(obj_end), p2i(gsp()->top()))); + } + } else { + // Because we refine the BOT based on which cards are dirty there is not much we can verify here. + // We need to make sure that we are going backwards and that we don't pass the start of the + // corresponding heap region. But that is about all we can verify. + size_t backskip = BlockOffsetArray::entry_to_cards_back(entry); + guarantee(backskip >= 1, "Must be going back at least one card."); + + size_t max_backskip = current_card - start_card; + guarantee(backskip <= max_backskip, + err_msg("Going backwards beyond the start_card. start_card: " SIZE_FORMAT " current_card: " SIZE_FORMAT " backskip: " SIZE_FORMAT, + start_card, current_card, backskip)); + + HeapWord* backskip_address = _array->address_for_index(current_card - backskip); + guarantee(backskip_address >= gsp()->bottom(), + err_msg("Going backwards beyond bottom of the region: bottom: " PTR_FORMAT ", backskip_address: " PTR_FORMAT, + p2i(gsp()->bottom()), p2i(backskip_address))); } } - return true; } #ifndef PRODUCT @@ -470,13 +481,14 @@ HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() { } void -G1BlockOffsetArrayContigSpace::set_for_starts_humongous(HeapWord* new_top) { - assert(new_top <= _end, "_end should have already been updated"); - - // The first BOT entry should have offset 0. +G1BlockOffsetArrayContigSpace::set_for_starts_humongous(HeapWord* obj_top, size_t fill_size) { + // The first BOT entry should have offset 0. reset_bot(); - alloc_block(_bottom, new_top); - } + alloc_block(_bottom, obj_top); + if (fill_size > 0) { + alloc_block(obj_top, fill_size); + } +} #ifndef PRODUCT void diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp index 2b360f0cd..e0f49ddd3 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp @@ -302,11 +302,7 @@ public: virtual HeapWord* block_start_unsafe(const void* addr); virtual HeapWord* block_start_unsafe_const(const void* addr) const; - // Used by region verification. Checks that the contents of the - // BOT reflect that there's a single object that spans the address - // range [obj_start, obj_start + word_size); returns true if this is - // the case, returns false if it's not. - bool verify_for_object(HeapWord* obj_start, size_t word_size) const; + void verify() const; void check_all_cards(size_t left_card, size_t right_card) const; @@ -367,7 +363,7 @@ class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray { HeapWord* block_start_unsafe(const void* addr); HeapWord* block_start_unsafe_const(const void* addr) const; - void set_for_starts_humongous(HeapWord* new_top); + void set_for_starts_humongous(HeapWord* obj_top, size_t fill_size); virtual void print_on(outputStream* out) PRODUCT_RETURN; }; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp index 9a8cb877d..5bf959c1b 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp @@ -151,7 +151,6 @@ G1BlockOffsetArray::block_at_or_preceding(const void* addr, // to go back by. size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset); q -= (N_words * n_cards_back); - assert(q >= gsp()->bottom(), "Went below bottom!"); index -= n_cards_back; offset = _array->offset_array(index); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 3ff5586c1..aeec4e576 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -35,6 +35,7 @@ #include "gc_implementation/g1/g1CollectorPolicy.hpp" #include "gc_implementation/g1/g1ErgoVerbose.hpp" #include "gc_implementation/g1/g1EvacFailure.hpp" +#include "gc_implementation/g1/g1FullGCScope.hpp" #include "gc_implementation/g1/g1GCPhaseTimes.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1MarkSweep.hpp" @@ -43,6 +44,7 @@ #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp" #include "gc_implementation/g1/g1RemSet.inline.hpp" #include "gc_implementation/g1/g1RootProcessor.hpp" +#include "gc_implementation/g1/g1SerialFullCollector.hpp" #include "gc_implementation/g1/g1StringDedup.hpp" #include "gc_implementation/g1/g1YCTypes.hpp" #include "gc_implementation/g1/heapRegion.inline.hpp" @@ -95,11 +97,7 @@ public: RefineCardTableEntryClosure() : _concurrent(true) { } bool do_card_ptr(jbyte* card_ptr, uint worker_i) { - bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false); - // This path is executed by the concurrent refine or mutator threads, - // concurrently, and so we do not care if card_ptr contains references - // that point into the collection set. - assert(!oops_into_cset, "should be"); + G1CollectedHeap::heap()->g1_rem_set()->refine_card_concurrently(card_ptr, worker_i); if (_concurrent && SuspendibleThreadSet::should_yield()) { // Caller will actually yield. @@ -614,8 +612,8 @@ G1CollectedHeap::humongous_obj_allocate_initialize_regions(uint first, assert(isHumongous(word_size), "word_size should be humongous"); assert(num_regions * HeapRegion::GrainWords >= word_size, "pre-condition"); - // Index of last region in the series + 1. - uint last = first + num_regions; + // Index of last region in the series. + uint last = first + num_regions - 1; // We need to initialize the region(s) we just discovered. This is // a bit tricky given that it can happen concurrently with @@ -634,12 +632,8 @@ G1CollectedHeap::humongous_obj_allocate_initialize_regions(uint first, // The header of the new object will be placed at the bottom of // the first region. HeapWord* new_obj = first_hr->bottom(); - // This will be the new end of the first region in the series that - // should also match the end of the last region in the series. - HeapWord* new_end = new_obj + word_size_sum; - // This will be the new top of the first region that will reflect - // this allocation. - HeapWord* new_top = new_obj + word_size; + // This will be the new top of the new object. + HeapWord* obj_top = new_obj + word_size; // First, we need to zero the header of the space that we will be // allocating. When we update top further down, some refinement @@ -656,23 +650,35 @@ G1CollectedHeap::humongous_obj_allocate_initialize_regions(uint first, // thread to calculate the object size incorrectly. Copy::fill_to_words(new_obj, oopDesc::header_size(), 0); + // How many words we use for filler objects. + size_t word_fill_size = word_size_sum - word_size; + + // How many words memory we "waste" which cannot hold a filler object. + size_t words_not_fillable = 0; + if (word_fill_size >= min_fill_size()) { + fill_with_objects(obj_top, word_fill_size); + } else if (word_fill_size > 0) { + // We have space to fill, but we cannot fit an object there. + words_not_fillable = word_fill_size; + word_fill_size = 0; + } + // We will set up the first region as "starts humongous". This // will also update the BOT covering all the regions to reflect // that there is a single object that starts at the bottom of the // first region. - first_hr->set_startsHumongous(new_top, new_end); + first_hr->set_startsHumongous(obj_top, word_fill_size); + _g1_policy->remset_tracker()->update_at_allocate(first_hr); first_hr->set_allocation_context(context); // Then, if there are any, we will set up the "continues // humongous" regions. HeapRegion* hr = NULL; - for (uint i = first + 1; i < last; ++i) { + for (uint i = first + 1; i <= last; ++i) { hr = region_at(i); hr->set_continuesHumongous(first_hr); + _g1_policy->remset_tracker()->update_at_allocate(hr); hr->set_allocation_context(context); } - // If we have "continues humongous" regions (hr != NULL), then the - // end of the last one should match new_end. - assert(hr == NULL || hr->end() == new_end, "sanity"); // Up to this point no concurrent thread would have been able to // do any scanning on any region in this series. All the top @@ -683,64 +689,44 @@ G1CollectedHeap::humongous_obj_allocate_initialize_regions(uint first, // object header and the BOT initialization. OrderAccess::storestore(); - // Now that the BOT and the object header have been initialized, - // we can update top of the "starts humongous" region. - assert(first_hr->bottom() < new_top && new_top <= first_hr->end(), - "new_top should be in this region"); - first_hr->set_top(new_top); - if (_hr_printer.is_active()) { - HeapWord* bottom = first_hr->bottom(); - HeapWord* end = first_hr->orig_end(); - if ((first + 1) == last) { - // the series has a single humongous region - _hr_printer.alloc(G1HRPrinter::SingleHumongous, first_hr, new_top); - } else { - // the series has more than one humongous regions - _hr_printer.alloc(G1HRPrinter::StartsHumongous, first_hr, end); - } - } - // Now, we will update the top fields of the "continues humongous" - // regions. The reason we need to do this is that, otherwise, - // these regions would look empty and this will confuse parts of - // G1. For example, the code that looks for a consecutive number - // of empty regions will consider them empty and try to - // re-allocate them. We can extend is_empty() to also include - // !continuesHumongous(), but it is easier to just update the top - // fields here. The way we set top for all regions (i.e., top == - // end for all regions but the last one, top == new_top for the - // last one) is actually used when we will free up the humongous - // region in free_humongous_region(). - hr = NULL; - for (uint i = first + 1; i < last; ++i) { + // regions except the last one. + for (uint i = first; i < last; ++i) { hr = region_at(i); - if ((i + 1) == last) { - // last continues humongous region - assert(hr->bottom() < new_top && new_top <= hr->end(), - "new_top should fall on this region"); - hr->set_top(new_top); - _hr_printer.alloc(G1HRPrinter::ContinuesHumongous, hr, new_top); - } else { - // not last one - assert(new_top > hr->end(), "new_top should be above this region"); - hr->set_top(hr->end()); - _hr_printer.alloc(G1HRPrinter::ContinuesHumongous, hr, hr->end()); - } + hr->set_top(hr->end()); } - // If we have continues humongous regions (hr != NULL), then the - // end of the last one should match new_end and its top should - // match new_top. - assert(hr == NULL || - (hr->end() == new_end && hr->top() == new_top), "sanity"); + + hr = region_at(last); + // If we cannot fit a filler object, we must set top to the end + // of the humongous object, otherwise we cannot iterate the heap + // and the BOT will not be complete. + hr->set_top(hr->end() - words_not_fillable); + + assert(hr->bottom() < obj_top && obj_top <= hr->end(), "obj_top should be in last region"); check_bitmaps("Humongous Region Allocation", first_hr); - assert(first_hr->used() == word_size * HeapWordSize, "invariant"); - _allocator->increase_used(first_hr->used()); - _humongous_set.add(first_hr); + assert(words_not_fillable == 0 || + first_hr->bottom() + word_size_sum - words_not_fillable == hr->top(), + "Miscalculation in humongous allocation"); + _allocator->increase_used((word_size_sum - words_not_fillable)* HeapWordSize); + for (uint i = first; i <= last; ++i) { + hr = region_at(i); + _humongous_set.add(hr); + if (i == first) { + _hr_printer.alloc(G1HRPrinter::StartsHumongous, hr, hr->top()); + } else { + _hr_printer.alloc(G1HRPrinter::ContinuesHumongous, hr, hr->top()); + } + } return new_obj; } +size_t G1CollectedHeap::humongous_obj_size_in_regions(size_t word_size) { + assert(isHumongous(word_size), err_msg("Object of size " SIZE_FORMAT " must be humongous here", word_size)); + return align_size_up_(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords; +} + // If could fit into free regions w/o expansion, try. // Otherwise, if can expand, do so. // Otherwise, if using ex regions might help, try with ex given back. @@ -750,7 +736,7 @@ HeapWord* G1CollectedHeap::humongous_obj_allocate(size_t word_size, AllocationCo verify_region_sets_optional(); uint first = G1_NO_HRM_INDEX; - uint obj_regions = (uint)(align_size_up_(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords); + uint obj_regions = (uint) humongous_obj_size_in_regions(word_size); if (obj_regions == 1) { // Only one region to allocate, try to use a fast path by directly allocating @@ -1159,77 +1145,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size, ShouldNotReachHere(); } -class PostMCRemSetClearClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - ModRefBarrierSet* _mr_bs; -public: - PostMCRemSetClearClosure(G1CollectedHeap* g1h, ModRefBarrierSet* mr_bs) : - _g1h(g1h), _mr_bs(mr_bs) {} - - bool doHeapRegion(HeapRegion* r) { - HeapRegionRemSet* hrrs = r->rem_set(); - - if (r->continuesHumongous()) { - // We'll assert that the strong code root list and RSet is empty - assert(hrrs->strong_code_roots_list_length() == 0, "sanity"); - assert(hrrs->occupied() == 0, "RSet should be empty"); - return false; - } - - _g1h->reset_gc_time_stamps(r); - hrrs->clear(); - // You might think here that we could clear just the cards - // corresponding to the used region. But no: if we leave a dirty card - // in a region we might allocate into, then it would prevent that card - // from being enqueued, and cause it to be missed. - // Re: the performance cost: we shouldn't be doing full GC anyway! - _mr_bs->clear(MemRegion(r->bottom(), r->end())); - - return false; - } -}; - -void G1CollectedHeap::clear_rsets_post_compaction() { - PostMCRemSetClearClosure rs_clear(this, g1_barrier_set()); - heap_region_iterate(&rs_clear); -} - -class RebuildRSOutOfRegionClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - UpdateRSOopClosure _cl; - int _worker_i; -public: - RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : - _cl(g1->g1_rem_set(), worker_i), - _worker_i(worker_i), - _g1h(g1) - { } - - bool doHeapRegion(HeapRegion* r) { - if (!r->continuesHumongous()) { - _cl.set_from(r); - r->oop_iterate(&_cl); - } - return false; - } -}; - -class ParRebuildRSTask: public AbstractGangTask { - G1CollectedHeap* _g1; - HeapRegionClaimer _hrclaimer; - -public: - ParRebuildRSTask(G1CollectedHeap* g1) - : AbstractGangTask("ParRebuildRSTask"), - _g1(g1), _hrclaimer(g1->workers()->active_workers()) - { } - - void work(uint worker_id) { - RebuildRSOutOfRegionClosure rebuild_rs(_g1, worker_id); - _g1->heap_region_par_iterate_chunked(&rebuild_rs, worker_id, &_hrclaimer); - } -}; - class PostCompactionPrinterClosure: public HeapRegionClosure { private: G1HRPrinter* _hr_printer; @@ -1239,12 +1154,7 @@ public: if (hr->is_free()) { // We only generate output for non-empty regions. } else if (hr->startsHumongous()) { - if (hr->region_num() == 1) { - // single humongous region - _hr_printer->post_compaction(hr, G1HRPrinter::SingleHumongous); - } else { _hr_printer->post_compaction(hr, G1HRPrinter::StartsHumongous); - } } else if (hr->continuesHumongous()) { _hr_printer->post_compaction(hr, G1HRPrinter::ContinuesHumongous); } else if (hr->is_old()) { @@ -1264,295 +1174,205 @@ void G1CollectedHeap::print_hrm_post_compaction() { heap_region_iterate(&cl); } -bool G1CollectedHeap::do_collection(bool explicit_gc, - bool clear_all_soft_refs, - size_t word_size) { - assert_at_safepoint(true /* should_be_vm_thread */); +void G1CollectedHeap::abort_concurrent_cycle() { + // Note: When we have a more flexible GC logging framework that + // allows us to add optional attributes to a GC log record we + // could consider timing and reporting how long we wait in the + // following two methods. + wait_while_free_regions_coming(); + // If we start the compaction before the CM threads finish + // scanning the root regions we might trip them over as we'll + // be moving objects / updating references. So let's wait until + // they are done. By telling them to abort, they should complete + // early. + _cm->root_regions()->abort(); + _cm->root_regions()->wait_until_scan_finished(); + append_secondary_free_list_if_not_empty_with_lock(); - if (GC_locker::check_active_before_gc()) { - return false; - } + // Disable discovery and empty the discovered lists + // for the CM ref processor. + ref_processor_cm()->disable_discovery(); + ref_processor_cm()->abandon_partial_discovery(); + ref_processor_cm()->verify_no_references_recorded(); - STWGCTimer* gc_timer = G1MarkSweep::gc_timer(); - gc_timer->register_gc_start(); + // Abandon current iterations of concurrent marking and concurrent + // refinement, if any are in progress. + concurrent_mark()->abort(); +} - SerialOldTracer* gc_tracer = G1MarkSweep::gc_tracer(); - gc_tracer->report_gc_start(gc_cause(), gc_timer->gc_start()); +void G1CollectedHeap::prepare_heap_for_full_collection() { + // Make sure we'll choose a new allocation region afterwards. + _allocator->release_mutator_alloc_regions(); + _allocator->abandon_gc_alloc_regions(); + g1_rem_set()->cleanupHRRS(); - SvcGCMarker sgcm(SvcGCMarker::FULL); - ResourceMark rm; + // We should call this after we retire any currently active alloc + // regions so that all the ALLOC / RETIRE events are generated + // before the start GC event. + _hr_printer.start_gc(true /* full */, (size_t) total_collections()); - print_heap_before_gc(); - trace_heap_before_gc(gc_tracer); + // We may have added regions to the current incremental collection + // set between the last GC or pause and now. We need to clear the + // incremental collection set and then start rebuilding it afresh + // after this full GC. + abandon_collection_set(g1_policy()->inc_cset_head()); + g1_policy()->clear_incremental_cset(); + g1_policy()->stop_incremental_cset_building(); - size_t metadata_prev_used = MetaspaceAux::used_bytes(); + tear_down_region_sets(false /* free_list_only */); + g1_policy()->set_gcs_are_young(true); +} +void G1CollectedHeap::verify_before_full_collection(bool explicit_gc) { + assert(!GCCause::is_user_requested_gc(gc_cause()) || explicit_gc, "invariant"); + assert(used() == recalculate_used(), "Should be equal"); verify_region_sets_optional(); + verify_before_gc(); + check_bitmaps("Full GC Start"); +} - const bool do_clear_all_soft_refs = clear_all_soft_refs || - collector_policy()->should_clear_all_soft_refs(); - - ClearedAllSoftRefs casr(do_clear_all_soft_refs, collector_policy()); - - { - IsGCActiveMark x; - - // Timing - assert(gc_cause() != GCCause::_java_lang_system_gc || explicit_gc, "invariant"); - TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty); - - { - GCTraceTime t(GCCauseString("Full GC", gc_cause()), G1Log::fine(), true, NULL, gc_tracer->gc_id()); - TraceCollectorStats tcs(g1mm()->full_collection_counters()); - TraceMemoryManagerStats tms(true /* fullGC */, gc_cause()); - - // Pause native trimming for the duration of the GC - GCTrimNative::pause_periodic_trim(); - - double start = os::elapsedTime(); - g1_policy()->record_full_collection_start(); - - // Note: When we have a more flexible GC logging framework that - // allows us to add optional attributes to a GC log record we - // could consider timing and reporting how long we wait in the - // following two methods. - wait_while_free_regions_coming(); - // If we start the compaction before the CM threads finish - // scanning the root regions we might trip them over as we'll - // be moving objects / updating references. So let's wait until - // they are done. By telling them to abort, they should complete - // early. - _cm->root_regions()->abort(); - _cm->root_regions()->wait_until_scan_finished(); - append_secondary_free_list_if_not_empty_with_lock(); - - gc_prologue(true); - increment_total_collections(true /* full gc */); - increment_old_marking_cycles_started(); - - assert(used() == recalculate_used(), "Should be equal"); - - verify_before_gc(); - - check_bitmaps("Full GC Start"); - pre_full_gc_dump(gc_timer); - - COMPILER2_PRESENT(DerivedPointerTable::clear()); - - // Disable discovery and empty the discovered lists - // for the CM ref processor. - ref_processor_cm()->disable_discovery(); - ref_processor_cm()->abandon_partial_discovery(); - ref_processor_cm()->verify_no_references_recorded(); - - // Abandon current iterations of concurrent marking and concurrent - // refinement, if any are in progress. - concurrent_mark()->abort(); - - // Make sure we'll choose a new allocation region afterwards. - _allocator->release_mutator_alloc_regions(); - _allocator->abandon_gc_alloc_regions(); - g1_rem_set()->cleanupHRRS(); - - // We should call this after we retire any currently active alloc - // regions so that all the ALLOC / RETIRE events are generated - // before the start GC event. - _hr_printer.start_gc(true /* full */, (size_t) total_collections()); - - // We may have added regions to the current incremental collection - // set between the last GC or pause and now. We need to clear the - // incremental collection set and then start rebuilding it afresh - // after this full GC. - abandon_collection_set(g1_policy()->inc_cset_head()); - g1_policy()->clear_incremental_cset(); - g1_policy()->stop_incremental_cset_building(); - - tear_down_region_sets(false /* free_list_only */); - g1_policy()->set_gcs_are_young(true); - - // See the comments in g1CollectedHeap.hpp and - // G1CollectedHeap::ref_processing_init() about - // how reference processing currently works in G1. - - // Temporarily make discovery by the STW ref processor single threaded (non-MT). - ReferenceProcessorMTDiscoveryMutator stw_rp_disc_ser(ref_processor_stw(), G1ParallelFullGC); - - // Temporarily clear the STW ref processor's _is_alive_non_header field. - ReferenceProcessorIsAliveMutator stw_rp_is_alive_null(ref_processor_stw(), NULL); - - ref_processor_stw()->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); - ref_processor_stw()->setup_policy(do_clear_all_soft_refs); - - // Do collection work - { - HandleMark hm; // Discard invalid handles created during gc - G1MarkSweep::invoke_at_safepoint(ref_processor_stw(), do_clear_all_soft_refs); - } - - assert(num_free_regions() == 0, "we should not have added any free regions"); - rebuild_region_sets(false /* free_list_only */); - - // Enqueue any discovered reference objects that have - // not been removed from the discovered lists. - ref_processor_stw()->enqueue_discovered_references(); - - COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); - - MemoryService::track_memory_usage(); +void G1CollectedHeap::prepare_heap_for_mutators(bool explicit_gc, size_t word_size) { + // Delete metaspaces for unloaded class loaders and clean up loader_data graph + ClassLoaderDataGraph::purge(); + MetaspaceAux::verify_metrics(); - assert(!ref_processor_stw()->discovery_enabled(), "Postcondition"); - ref_processor_stw()->verify_no_references_recorded(); + // Prepare heap for normal collections. + assert(num_free_regions() == 0, "we should not have added any free regions"); + rebuild_region_sets(false /* free_list_only */); + abort_refinement(); + resize_if_necessary_after_full_collection(explicit_gc ? 0 : word_size); - // Delete metaspaces for unloaded class loaders and clean up loader_data graph - ClassLoaderDataGraph::purge(); - MetaspaceAux::verify_metrics(); + // Rebuild the strong code root lists for each region + rebuild_strong_code_roots(); - // Note: since we've just done a full GC, concurrent - // marking is no longer active. Therefore we need not - // re-enable reference discovery for the CM ref processor. - // That will be done at the start of the next marking cycle. - assert(!ref_processor_cm()->discovery_enabled(), "Postcondition"); - ref_processor_cm()->verify_no_references_recorded(); + // Purge code root memory + purge_code_root_memory(); - reset_gc_time_stamp(); - // Since everything potentially moved, we will clear all remembered - // sets, and clear all cards. Later we will rebuild remembered - // sets. We will also reset the GC time stamps of the regions. - clear_rsets_post_compaction(); - check_gc_time_stamps(); + // Start a new incremental collection set for the next pause + assert(g1_policy()->collection_set() == NULL, "must be"); + g1_policy()->start_incremental_cset_building(); - // Resize the heap if necessary. - resize_if_necessary_after_full_collection(explicit_gc ? 0 : word_size); + clear_cset_fast_test(); - if (_hr_printer.is_active()) { - // We should do this after we potentially resize the heap so - // that all the COMMIT / UNCOMMIT events are generated before - // the end GC event. + _allocator->init_mutator_alloc_regions(); - print_hrm_post_compaction(); - _hr_printer.end_gc(true /* full */, (size_t) total_collections()); - } + // Post collection state updates. + MetaspaceGC::compute_new_size(); +} - G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); - if (hot_card_cache->use_cache()) { - hot_card_cache->reset_card_counts(); - hot_card_cache->reset_hot_cache(); - } +void G1CollectedHeap::abort_refinement() { + G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); + if (hot_card_cache->use_cache()) { + hot_card_cache->reset_card_counts(); + hot_card_cache->reset_hot_cache(); + } - // Rebuild remembered sets of all regions. - if (G1CollectedHeap::use_parallel_gc_threads()) { - uint n_workers = - AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), - workers()->active_workers(), - Threads::number_of_non_daemon_threads()); - assert(UseDynamicNumberOfGCThreads || - n_workers == workers()->total_workers(), - "If not dynamic should be using all the workers"); - workers()->set_active_workers(n_workers); - // Set parallel threads in the heap (_n_par_threads) only - // before a parallel phase and always reset it to 0 after - // the phase so that the number of parallel threads does - // no get carried forward to a serial phase where there - // may be code that is "possibly_parallel". - set_par_threads(n_workers); - - ParRebuildRSTask rebuild_rs_task(this); - assert(UseDynamicNumberOfGCThreads || - workers()->active_workers() == workers()->total_workers(), - "Unless dynamic should use total workers"); - // Use the most recent number of active workers - assert(workers()->active_workers() > 0, - "Active workers not properly set"); - set_par_threads(workers()->active_workers()); - workers()->run_task(&rebuild_rs_task); - set_par_threads(0); - } else { - RebuildRSOutOfRegionClosure rebuild_rs(this); - heap_region_iterate(&rebuild_rs); - } + // Discard all remembered set updates. + JavaThread::dirty_card_queue_set().abandon_logs(); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "DCQS should be empty"); +} - // Rebuild the strong code root lists for each region - rebuild_strong_code_roots(); +void G1CollectedHeap::verify_after_full_collection() { + check_gc_time_stamps(); + _hrm.verify_optional(); + verify_region_sets_optional(); + verify_after_gc(); + // Clear the previous marking bitmap, if needed for bitmap verification. + // Note we cannot do this when we clear the next marking bitmap in + // G1ConcurrentMark::abort() above since VerifyDuringGC verifies the + // objects marked during a full GC against the previous bitmap. + // But we need to clear it before calling check_bitmaps below since + // the full GC has compacted objects and updated TAMS but not updated + // the prev bitmap. + if (G1VerifyBitmaps) { + ((CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll(); + } + check_bitmaps("Full GC End"); - // Purge code root memory - purge_code_root_memory(); + _young_list->reset_sampled_info(); + // At this point there should be no regions in the + // entire heap tagged as young. + assert(check_young_list_empty(true /* check_heap */), + "young list should be empty at this point"); - if (true) { // FIXME - MetaspaceGC::compute_new_size(); - } + // Note: since we've just done a full GC, concurrent + // marking is no longer active. Therefore we need not + // re-enable reference discovery for the CM ref processor. + // That will be done at the start of the next marking cycle. + // We also know that the STW processor should no longer + // discover any new references. + assert(!ref_processor_stw()->discovery_enabled(), "Postcondition"); + assert(!ref_processor_cm()->discovery_enabled(), "Postcondition"); + ref_processor_stw()->verify_no_references_recorded(); + ref_processor_cm()->verify_no_references_recorded(); +} +void G1CollectedHeap::print_heap_after_full_collection() { + if (_hr_printer.is_active()) { + print_hrm_post_compaction(); + _hr_printer.end_gc(true /* full */, (size_t) total_collections()); + } + if (G1Log::finer()) { + g1_policy()->print_heap_transition(); + g1_policy()->print_detailed_heap_transition(true /* full */); + } + print_heap_after_gc(); #ifdef TRACESPINNING - ParallelTaskTerminator::print_termination_counts(); + ParallelTaskTerminator::print_termination_counts(); #endif +} - // Discard all rset updates - JavaThread::dirty_card_queue_set().abandon_logs(); - assert(dirty_card_queue_set().completed_buffers_num() == 0, "DCQS should be empty"); - - _young_list->reset_sampled_info(); - // At this point there should be no regions in the - // entire heap tagged as young. - assert(check_young_list_empty(true /* check_heap */), - "young list should be empty at this point"); - - // Update the number of full collections that have been completed. - increment_old_marking_cycles_completed(false /* concurrent */); - - _hrm.verify_optional(); - verify_region_sets_optional(); - - verify_after_gc(); - - // Clear the previous marking bitmap, if needed for bitmap verification. - // Note we cannot do this when we clear the next marking bitmap in - // ConcurrentMark::abort() above since VerifyDuringGC verifies the - // objects marked during a full GC against the previous bitmap. - // But we need to clear it before calling check_bitmaps below since - // the full GC has compacted objects and updated TAMS but not updated - // the prev bitmap. - if (G1VerifyBitmaps) { - ((CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll(); - } - check_bitmaps("Full GC End"); +void G1CollectedHeap::do_full_collection_inner(G1FullGCScope* scope, size_t word_size) { + GCTraceTime t(GCCauseString("Full GC", gc_cause()), G1Log::fine(), true, NULL, G1MarkSweep::gc_tracer()->gc_id()); + // Pause native trimming for the duration of the GC + GCTrimNative::pause_periodic_trim(); + g1_policy()->record_full_collection_start(); - // Start a new incremental collection set for the next pause - assert(g1_policy()->collection_set() == NULL, "must be"); - g1_policy()->start_incremental_cset_building(); + print_heap_before_gc(); - clear_cset_fast_test(); + abort_concurrent_cycle(); + verify_before_full_collection(scope->is_explicit_gc()); - _allocator->init_mutator_alloc_regions(); + gc_prologue(true); + prepare_heap_for_full_collection(); - double end = os::elapsedTime(); - g1_policy()->record_full_collection_end(); + G1SerialFullCollector serial(scope, ref_processor_stw()); + serial.prepare_collection(); + serial.collect(); + serial.complete_collection(); - if (G1Log::fine()) { - g1_policy()->print_heap_transition(); - } + prepare_heap_for_mutators(scope->is_explicit_gc(), word_size); - // We must call G1MonitoringSupport::update_sizes() in the same scoping level - // as an active TraceMemoryManagerStats object (i.e. before the destructor for the - // TraceMemoryManagerStats is called) so that the G1 memory pools are updated - // before any GC notifications are raised. - g1mm()->update_sizes(); + g1_policy()->record_full_collection_end(); + gc_epilogue(true); - gc_epilogue(true); - } + // Post collection verification. + verify_after_full_collection(); - if (G1Log::finer()) { - g1_policy()->print_detailed_heap_transition(true /* full */); - } + // Post collection logging. + // We should do this after we potentially resize the heap so + // that all the COMMIT / UNCOMMIT events are generated before + // the compaction events. + print_heap_after_full_collection(); + GCTrimNative::schedule_trim(); +} - print_heap_after_gc(); - trace_heap_after_gc(gc_tracer); +bool G1CollectedHeap::do_collection(bool explicit_gc, + bool clear_all_soft_refs, + size_t word_size) { + assert_at_safepoint(true /* should_be_vm_thread */); + if (GC_locker::check_active_before_gc()) { + // Full GC was not completed. + return false; + } - post_full_gc_dump(gc_timer); + const bool do_clear_all_soft_refs = clear_all_soft_refs || + collector_policy()->should_clear_all_soft_refs(); - gc_timer->register_gc_end(); - gc_tracer->report_gc_end(gc_timer->gc_end(), gc_timer->time_partitions()); + G1FullGCScope scope(explicit_gc, do_clear_all_soft_refs); + do_full_collection_inner(&scope, word_size); - GCTrimNative::schedule_trim(); - } + // Full collection was successfully completed. return true; } @@ -2124,6 +1944,7 @@ jint G1CollectedHeap::initialize() { // Initialize the from_card cache structure of HeapRegionRemSet. HeapRegionRemSet::init_heap(max_regions()); + _g1_rem_set->initialize(max_capacity(), max_regions()); // Now expand into the initial heap size. if (!expand(init_byte_size)) { @@ -2318,17 +2139,7 @@ size_t G1CollectedHeap::capacity() const { } void G1CollectedHeap::reset_gc_time_stamps(HeapRegion* hr) { - assert(!hr->continuesHumongous(), "pre-condition"); hr->reset_gc_time_stamp(); - if (hr->startsHumongous()) { - uint first_index = hr->hrm_index() + 1; - uint last_index = hr->last_hc_index(); - for (uint i = first_index; i < last_index; i += 1) { - HeapRegion* chr = region_at(i); - assert(chr->continuesHumongous(), "sanity"); - chr->reset_gc_time_stamp(); - } - } } #ifndef PRODUCT @@ -2362,14 +2173,11 @@ void G1CollectedHeap::check_gc_time_stamps() { } #endif // PRODUCT -void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl, - DirtyCardQueue* into_cset_dcq, - bool concurrent, - uint worker_i) { - // Clean cards in the hot card cache - G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); - hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq); +void G1CollectedHeap::iterate_hcc_closure(CardTableEntryClosure* cl, uint worker_i) { + _cg1r->hot_card_cache()->drain(cl, worker_i); +} +void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl, uint worker_i) { DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); size_t n_completed_buffers = 0; while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) { @@ -2394,9 +2202,7 @@ class SumUsedClosure: public HeapRegionClosure { public: SumUsedClosure() : _used(0) {} bool doHeapRegion(HeapRegion* r) { - if (!r->continuesHumongous()) { - _used += r->used(); - } + _used += r->used(); return false; } size_t result() { return _used; } @@ -2646,9 +2452,9 @@ void G1CollectedHeap::collect(GCCause::Cause cause) { bool G1CollectedHeap::is_in(const void* p) const { if (_hrm.reserved().contains(p)) { // Given that we know that p is in the reserved space, - // heap_region_containing_raw() should successfully + // heap_region_containing() should successfully // return the containing region. - HeapRegion* hr = heap_region_containing_raw(p); + HeapRegion* hr = heap_region_containing(p); return hr->is_in(p); } else { return false; @@ -3249,12 +3055,19 @@ public: } bool doHeapRegion(HeapRegion* r) { + guarantee(!r->is_young() || r->rem_set()->is_complete(), + err_msg("Remembered set for Young region %u must be " + "complete, is %s", r->hrm_index(), r->rem_set()->get_state_str())); + // Humongous and old regions regions might be of any state, so can't check here. + guarantee(!r->is_free() || !r->rem_set()->is_tracked(), + err_msg("Remembered set for free region %u must be " + "untracked, is %s", r->hrm_index(), r->rem_set()->get_state_str())); if (!r->continuesHumongous()) { bool failures = false; r->verify(_vo, &failures); if (failures) { _failures = true; - } else { + } else if (!r->startsHumongous()) { VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo); r->object_iterate(¬_dead_yet_cl); if (_vo != VerifyOption_G1UseNextMarking) { @@ -3635,17 +3448,30 @@ G1CollectedHeap* G1CollectedHeap::heap() { void G1CollectedHeap::gc_prologue(bool full /* Ignored */) { // always_do_update_barrier = false; assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer"); - // Fill TLAB's and such - accumulate_statistics_all_tlabs(); - ensure_parsability(true); - if (G1SummarizeRSetStats && (G1SummarizeRSetStatsPeriod > 0) && (total_collections() % G1SummarizeRSetStatsPeriod == 0)) { g1_rem_set()->print_periodic_summary_info("Before GC RS summary"); } + + // Update common counters. + increment_total_collections(full /* full gc */); + if (full) { + increment_old_marking_cycles_started(); + reset_gc_time_stamp(); + } else { + increment_gc_time_stamp(); + } + // Fill TLAB's and such + accumulate_statistics_all_tlabs(); + ensure_parsability(true); } void G1CollectedHeap::gc_epilogue(bool full) { + // Update common counters. + if (full) { + // Update the number of full collections that have been completed. + increment_old_marking_cycles_completed(false /* concurrent */); + } if (G1SummarizeRSetStats && (G1SummarizeRSetStatsPeriod > 0) && @@ -3664,6 +3490,7 @@ void G1CollectedHeap::gc_epilogue(bool full) { resize_all_tlabs(); allocation_context_stats().update(full); + MemoryService::track_memory_usage(); // We have just completed a GC. Update the soft reference // policy with the new heap occupancy Universe::update_heap_info_at_gc(); @@ -3725,6 +3552,16 @@ size_t G1CollectedHeap::cards_scanned() { return g1_rem_set()->cardsScanned(); } +bool G1CollectedHeap::is_potential_eager_reclaim_candidate(HeapRegion* r) const { + // We don't nominate objects with many remembered set entries, on + // the assumption that such objects are likely still live. + HeapRegionRemSet* rem_set = r->rem_set(); + + return G1EagerReclaimHumongousObjectsWithStaleRefs ? + rem_set->occupancy_less_or_equal_than(G1RSetSparseRegionEntries) : + G1EagerReclaimHumongousObjects && rem_set->is_empty(); +} + class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { private: size_t _total_humongous; @@ -3732,22 +3569,19 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { DirtyCardQueue _dcq; - // We don't nominate objects with many remembered set entries, on - // the assumption that such objects are likely still live. - bool is_remset_small(HeapRegion* region) const { - HeapRegionRemSet* const rset = region->rem_set(); - return G1EagerReclaimHumongousObjectsWithStaleRefs - ? rset->occupancy_less_or_equal_than(G1RSetSparseRegionEntries) - : rset->is_empty(); - } - bool is_typeArray_region(HeapRegion* region) const { return oop(region->bottom())->is_typeArray(); } - bool humongous_region_is_candidate(G1CollectedHeap* heap, HeapRegion* region) const { + bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const { assert(region->startsHumongous(), "Must start a humongous object"); + // If we do not have a complete remembered set for the region, then we can + // not be sure that we have all references to it. + if (!region->rem_set()->is_complete()) { + return false; + } + // Candidate selection must satisfy the following constraints // while concurrent marking is in progress: // @@ -3784,7 +3618,8 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { // important use case for eager reclaim, and this special handling // may reduce needed headroom. - return is_typeArray_region(region) && is_remset_small(region); + return is_typeArray_region(region) && + g1h->is_potential_eager_reclaim_candidate(region); } public: @@ -3832,7 +3667,15 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure { assert(hrrs.n_yielded() == r->rem_set()->occupied(), err_msg("Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries", hrrs.n_yielded(), r->rem_set()->occupied())); - r->rem_set()->clear_locked(); + // We should only clear the card based remembered set here as we will not + // implicitly rebuild anything else during eager reclaim. Note that at the moment + // (and probably never) we do not enter this path if there are other kind of + // remembered sets for this region. + r->rem_set()->clear_locked(true /* only_cardset */); + // Clear_locked() above sets the state to Empty. However we want to continue + // collecting remembered set entries for humongous regions that were not + // reclaimed. + r->rem_set()->set_state_complete(); } assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); } @@ -3911,28 +3754,6 @@ class VerifyRegionRemSetClosure : public HeapRegionClosure { } }; -#ifdef ASSERT -class VerifyCSetClosure: public HeapRegionClosure { -public: - bool doHeapRegion(HeapRegion* hr) { - // Here we check that the CSet region's RSet is ready for parallel - // iteration. The fields that we'll verify are only manipulated - // when the region is part of a CSet and is collected. Afterwards, - // we reset these fields when we clear the region's RSet (when the - // region is freed) so they are ready when the region is - // re-allocated. The only exception to this is if there's an - // evacuation failure and instead of freeing the region we leave - // it in the heap. In that case, we reset these fields during - // evacuation failure handling. - guarantee(hr->rem_set()->verify_ready_for_par_iteration(), "verification"); - - // Here's a good place to add any other checks we'd like to - // perform on CSet regions. - return false; - } -}; -#endif // ASSERT - #if TASKQUEUE_STATS void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) { st->print_raw_cr("GC Task Stats"); @@ -4103,8 +3924,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { IsGCActiveMark x; gc_prologue(false); - increment_total_collections(false /* full gc */); - increment_gc_time_stamp(); if (VerifyRememberedSets) { if (!VerifySilently) { @@ -4214,11 +4033,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { } } -#ifdef ASSERT - VerifyCSetClosure cl; - collection_set_iterate(&cl); -#endif // ASSERT - setup_surviving_young_words(); // Initialize the GC alloc regions. @@ -4319,8 +4133,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; g1_policy()->record_collection_pause_end(pause_time_ms, evacuation_info); - MemoryService::track_memory_usage(); - // In prepare_for_verify() below we'll need to scan the deferred // update buffers to bring the RSets up-to-date if // G1HRRSFlushLogBuffersOnVerify has been set. While scanning @@ -4552,9 +4364,8 @@ void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) { void G1ParCopyHelper::mark_object(oop obj) { assert(!_g1->heap_region_containing(obj)->in_collection_set(), "should not mark objects in the CSet"); - // We know that the object is not moving so it's safe to read its size. - _cm->grayRoot(obj, (size_t) obj->size(), _worker_id); + _cm->mark_in_next_bitmap(_worker_id, obj); } void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) { @@ -4564,17 +4375,16 @@ void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) { assert(_g1->heap_region_containing(from_obj)->in_collection_set(), "from obj should be in the CSet"); assert(!_g1->heap_region_containing(to_obj)->in_collection_set(), "should not mark objects in the CSet"); - // The object might be in the process of being copied by another // worker so we cannot trust that its to-space image is // well-formed. So we have to read its size from its from-space // image which we know should not be changing. - _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id); + _cm->mark_in_next_bitmap(_worker_id, to_obj, from_obj->size()); } template void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) { - if (_g1->heap_region_containing_raw(new_obj)->is_young()) { + if (_g1->heap_region_containing(new_obj)->is_young()) { _scanned_klass->record_modified_oops(); } } @@ -4590,8 +4400,6 @@ void G1ParCopyClosure::do_oop_work(T* p) { oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - assert(_worker_id == _par_scan_state->queue_num(), "sanity"); - const InCSetState state = _g1->in_cset_state(obj); if (state.is_in_cset()) { oop forwardee; @@ -4624,7 +4432,7 @@ void G1ParCopyClosure::do_oop_work(T* p) { } if (barrier == G1BarrierEvac) { - _par_scan_state->update_rs(_from, p, _worker_id); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } @@ -4834,8 +4642,7 @@ public: trace_metadata, worker_id); - G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss); - _root_processor->scan_remembered_sets(&push_heap_rs_cl, + _root_processor->scan_remembered_sets(pss, weak_root_cl, worker_id); pss->end_strong_roots(); @@ -5968,30 +5775,17 @@ void G1CollectedHeap::free_region(HeapRegion* hr, _cg1r->hot_card_cache()->reset_card_counts(hr); } hr->hr_clear(par, true /* clear_space */, locked /* locked */); + _g1_policy->remset_tracker()->update_at_free(hr); free_list->add_ordered(hr); } void G1CollectedHeap::free_humongous_region(HeapRegion* hr, FreeRegionList* free_list, bool par) { - assert(hr->startsHumongous(), "this is only for starts humongous regions"); + assert(hr->isHumongous(), "this is only for humongous regions"); assert(free_list != NULL, "pre-condition"); - - size_t hr_capacity = hr->capacity(); - // We need to read this before we make the region non-humongous, - // otherwise the information will be gone. - uint last_index = hr->last_hc_index(); hr->clear_humongous(); free_region(hr, free_list, par); - - uint i = hr->hrm_index() + 1; - while (i < last_index) { - HeapRegion* curr_hr = region_at(i); - assert(curr_hr->continuesHumongous(), "invariant"); - curr_hr->clear_humongous(); - free_region(curr_hr, free_list, par); - i += 1; - } } void G1CollectedHeap::remove_from_old_sets(const HeapRegionSetCount& old_regions_removed, @@ -6155,9 +5949,7 @@ public: bool failures() { return _failures; } virtual bool doHeapRegion(HeapRegion* hr) { - if (hr->continuesHumongous()) return false; - - bool result = _g1h->verify_bitmaps(_caller, hr); + bool result = _g1h->verify_bitmaps(_caller, hr); if (!result) { _failures = true; } @@ -6439,11 +6231,10 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure { !r->rem_set()->is_empty()) { if (G1TraceEagerReclaimHumongousObjects) { - gclog_or_tty->print_cr("Live humongous region %u size " SIZE_FORMAT " start " PTR_FORMAT " length %u with remset " SIZE_FORMAT " code roots " SIZE_FORMAT " is marked %d reclaim candidate %d type array %d", + gclog_or_tty->print_cr("Live humongous region %u object size " SIZE_FORMAT " start " PTR_FORMAT "with remset " SIZE_FORMAT " code roots " SIZE_FORMAT " is marked %d reclaim candidate %d type array %d", region_idx, (size_t)obj->size()*HeapWordSize, p2i(r->bottom()), - r->region_num(), r->rem_set()->occupied(), r->rem_set()->strong_code_roots_list_length(), next_bitmap->isMarked(r->bottom()), @@ -6461,11 +6252,10 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure { p2i(r->bottom()))); if (G1TraceEagerReclaimHumongousObjects) { - gclog_or_tty->print_cr("Dead humongous region %u size " SIZE_FORMAT " start " PTR_FORMAT " length %u with remset " SIZE_FORMAT " code roots " SIZE_FORMAT " is marked %d reclaim candidate %d type array %d", + gclog_or_tty->print_cr("Dead humongous region %u object size " SIZE_FORMAT " start " PTR_FORMAT " with remset " SIZE_FORMAT " code roots " SIZE_FORMAT " is marked %d reclaim candidate %d type array %d", region_idx, (size_t)obj->size()*HeapWordSize, p2i(r->bottom()), - r->region_num(), r->rem_set()->occupied(), r->rem_set()->strong_code_roots_list_length(), next_bitmap->isMarked(r->bottom()), @@ -6473,14 +6263,15 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure { obj->is_typeArray() ); } - // Need to clear mark bit of the humongous object if already set. - if (next_bitmap->isMarked(r->bottom())) { - next_bitmap->clear(r->bottom()); - } - _freed_bytes += r->used(); - r->set_containing_set(NULL); - _humongous_regions_removed.increment(1u, r->capacity()); - g1h->free_humongous_region(r, _free_region_list, false); + g1h->concurrent_mark()->humongous_object_eagerly_reclaimed(r); + do { + HeapRegion* next = g1h->next_region_in_humongous(r); + _freed_bytes += r->used(); + r->set_containing_set(NULL); + _humongous_regions_removed.increment(1u, r->capacity()); + g1h->free_humongous_region(r, _free_region_list, false); + r = next; + } while (r != NULL); return false; } @@ -6698,10 +6489,8 @@ public: } bool doHeapRegion(HeapRegion* r) { - if (r->continuesHumongous()) { - return false; - } - + // After full GC, no region should have a remembered set. + r->rem_set()->clear(true); if (r->is_empty()) { // Add free regions to the free list r->set_free(); @@ -6777,6 +6566,7 @@ HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size, set_region_short_lived_locked(new_alloc_region); _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full); check_bitmaps("Mutator Region Allocation", new_alloc_region); + _g1_policy->remset_tracker()->update_at_allocate(new_alloc_region); return new_alloc_region; } } @@ -6841,6 +6631,7 @@ HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old); check_bitmaps("Old Region Allocation", new_alloc_region); } + _g1_policy->remset_tracker()->update_at_allocate(new_alloc_region); bool during_im = g1_policy()->during_initial_mark_pause(); new_alloc_region->note_start_of_copying(during_im); return new_alloc_region; @@ -6883,14 +6674,10 @@ public: _old_count(), _humongous_count(), _free_count(){ } bool doHeapRegion(HeapRegion* hr) { - if (hr->continuesHumongous()) { - return false; - } - if (hr->is_young()) { // TODO - } else if (hr->startsHumongous()) { - assert(hr->containing_set() == _humongous_set, err_msg("Heap region %u is starts humongous but not in humongous set.", hr->hrm_index())); + } else if (hr->isHumongous()) { + assert(hr->containing_set() == _humongous_set, err_msg("Heap region %u is humongous but not in humongous set.", hr->hrm_index())); _humongous_count.increment(1u, hr->capacity()); } else if (hr->is_empty()) { assert(_hrm->is_free(hr), err_msg("Heap region %u is empty but not on the free list.", hr->hrm_index())); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index d0ec5a773..c6e3c5d7b 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -65,6 +65,7 @@ class SpaceClosure; class CompactibleSpaceClosure; class Space; class G1CollectorPolicy; +class G1FullGCScope; class GenRemSet; class G1RemSet; class HeapRegionRemSetIterator; @@ -493,6 +494,9 @@ protected: // NULL if unsuccessful. HeapWord* humongous_obj_allocate(size_t word_size, AllocationContext_t context); + // Returns the number of regions the humongous object of the given word size + // requires. + // The following two methods, allocate_new_tlab() and // mem_allocate(), are the two main entry points from the runtime // into the G1's allocation routines. They have the following @@ -634,7 +638,19 @@ protected: HeapWord* satisfy_failed_allocation(size_t word_size, AllocationContext_t context, bool* succeeded); +private: + // Internal helpers used during full GC to split it up to + // increase readability. + void do_full_collection_inner(G1FullGCScope* scope, size_t word_size); + void abort_concurrent_cycle(); + void verify_before_full_collection(bool explicit_gc); + void prepare_heap_for_full_collection(); + void prepare_heap_for_mutators(bool explicit_gc, size_t word_size); + void abort_refinement(); + void verify_after_full_collection(); + void print_heap_after_full_collection(); +protected: // Attempting to expand the heap sufficiently // to support an allocation of the given "word_size". If // successful, perform the allocation and return the address of the @@ -652,6 +668,7 @@ protected: void verify_numa_regions(const char* desc); public: + static size_t humongous_obj_size_in_regions(size_t word_size); G1Allocator* allocator() { return _allocator; @@ -684,6 +701,9 @@ public: virtual void gc_prologue(bool full); virtual void gc_epilogue(bool full); + // Does the given region fulfill remembered set based eager reclaim candidate requirements? + bool is_potential_eager_reclaim_candidate(HeapRegion* r) const; + // Modify the reclaim candidate set and test for presence. // These are only valid for starts_humongous regions. inline void set_humongous_reclaim_candidate(uint region, bool value); @@ -1096,9 +1116,11 @@ public: // continues humongous regions too. void reset_gc_time_stamps(HeapRegion* hr); - void iterate_dirty_card_closure(CardTableEntryClosure* cl, - DirtyCardQueue* into_cset_dcq, - bool concurrent, uint worker_i); + // Apply the given closure on all cards in the Hot Card Cache, emptying it. + void iterate_hcc_closure(CardTableEntryClosure* cl, uint worker_i); + + // Apply the given closure on all cards in the Dirty Card Queue Set, emptying it. + void iterate_dirty_card_closure(CardTableEntryClosure* cl, uint worker_i); // The shared block offset table array. G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; } @@ -1248,7 +1270,6 @@ public: void prepend_to_freelist(FreeRegionList* list); void decrement_summary_bytes(size_t bytes); - // Returns "TRUE" iff "p" points into the committed areas of the heap. virtual bool is_in(const void* p) const; #ifdef ASSERT // Returns whether p is in one of the available areas of the heap. Slow but @@ -1262,6 +1283,8 @@ public: inline bool is_in_cset(oop obj); + inline bool is_in_cset(HeapWord* addr); + inline bool is_in_cset_or_humongous(const oop obj); private: @@ -1320,6 +1343,10 @@ public: // Return the region with the given index. It assumes the index is valid. inline HeapRegion* region_at(uint index) const; + // Return the next region (by index) that is part of the same + // humongous object that hr is part of. + inline HeapRegion* next_region_in_humongous(HeapRegion* hr) const; + // Calculate the region index of the given address. Given address must be // within the heap. inline uint addr_to_region(HeapWord* addr) const; @@ -1363,10 +1390,6 @@ public: // space containing a given address, or else returns NULL. virtual Space* space_containing(const void* addr) const; - // Returns the HeapRegion that contains addr. addr must not be NULL. - template - inline HeapRegion* heap_region_containing_raw(const T addr) const; - // Returns the HeapRegion that contains addr. addr must not be NULL. // If addr is within a humongous continues region, it returns its humongous start region. template @@ -1514,9 +1537,7 @@ public: // iff a) it was not allocated since the last mark and b) it // is not marked. bool is_obj_dead(const oop obj, const HeapRegion* hr) const { - return - !hr->obj_allocated_since_prev_marking(obj) && - !isMarkedPrev(obj); + return hr->is_obj_dead(obj, _cm->prevMarkBitMap()); } // This function returns true when an object has been diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp index ead5b757a..3be033276 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp @@ -80,6 +80,10 @@ inline AllocationContextStats& G1CollectedHeap::allocation_context_stats() { // Return the region with the given index. It assumes the index is valid. inline HeapRegion* G1CollectedHeap::region_at(uint index) const { return _hrm.at(index); } +inline HeapRegion* G1CollectedHeap::next_region_in_humongous(HeapRegion* hr) const { + return _hrm.next_region_in_humongous(hr); +} + inline uint G1CollectedHeap::addr_to_region(HeapWord* addr) const { assert(is_in_reserved(addr), err_msg("Cannot calculate region index for address " PTR_FORMAT " that is outside of the heap [" PTR_FORMAT ", " PTR_FORMAT ")", @@ -92,7 +96,7 @@ inline HeapWord* G1CollectedHeap::bottom_addr_for_region(uint index) const { } template -inline HeapRegion* G1CollectedHeap::heap_region_containing_raw(const T addr) const { +inline HeapRegion* G1CollectedHeap::heap_region_containing(const T addr) const { assert(addr != NULL, "invariant"); assert(is_in_g1_reserved((const void*) addr), err_msg("Address " PTR_FORMAT " is outside of the heap ranging from [" PTR_FORMAT " to " PTR_FORMAT ")", @@ -100,15 +104,6 @@ inline HeapRegion* G1CollectedHeap::heap_region_containing_raw(const T addr) con return _hrm.addr_to_region((HeapWord*) addr); } -template -inline HeapRegion* G1CollectedHeap::heap_region_containing(const T addr) const { - HeapRegion* hr = heap_region_containing_raw(addr); - if (hr->continuesHumongous()) { - return hr->humongous_start_region(); - } - return hr; -} - inline void G1CollectedHeap::reset_gc_time_stamp() { _gc_time_stamp = 0; OrderAccess::fence(); @@ -197,9 +192,9 @@ G1CollectedHeap::dirty_young_block(HeapWord* start, size_t word_size) { assert_heap_not_locked(); // Assign the containing region to containing_hr so that we don't - // have to keep calling heap_region_containing_raw() in the + // have to keep calling heap_region_containing in the // asserts below. - DEBUG_ONLY(HeapRegion* containing_hr = heap_region_containing_raw(start);) + DEBUG_ONLY(HeapRegion* containing_hr = heap_region_containing(start);) assert(word_size > 0, "pre-condition"); assert(containing_hr->is_in(start), "it should contain start"); assert(containing_hr->is_young(), "it should be young"); @@ -224,17 +219,20 @@ inline bool G1CollectedHeap::isMarkedNext(oop obj) const { return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj); } -// This is a fast test on whether a reference points into the -// collection set or not. Assume that the reference -// points into the heap. inline bool G1CollectedHeap::is_in_cset(oop obj) { - bool ret = _in_cset_fast_test.is_in_cset((HeapWord*)obj); + bool ret = is_in_cset((HeapWord*)obj); // let's make sure the result is consistent with what the slower // test returns assert( ret || !obj_in_cs(obj), "sanity"); assert(!ret || obj_in_cs(obj), "sanity"); return ret; } +// This is a fast test on whether a reference points into the +// collection set or not. Assume that the reference +// points into the heap. +inline bool G1CollectedHeap::is_in_cset(HeapWord* addr) { + return _in_cset_fast_test.is_in_cset(addr); +} bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) { return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp index 0acdd2b69..dc05454ad 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @@ -85,6 +85,7 @@ G1CollectorPolicy::G1CollectorPolicy() : _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _stop_world_start(0.0), + _remset_tracker(), _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), @@ -95,6 +96,7 @@ G1CollectorPolicy::G1CollectorPolicy() : _prev_collection_pause_end_ms(0.0), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -131,6 +133,7 @@ G1CollectorPolicy::G1CollectorPolicy() : _initiate_conc_mark_if_possible(false), _during_initial_mark_pause(false), _last_young_gc(false), + _mixed_gc_pending(false), _last_gc_was_young(false), _eden_used_bytes_before_gc(0), @@ -218,6 +221,7 @@ G1CollectorPolicy::G1CollectorPolicy() : _rs_length_diff_seq->add(rs_length_diff_defaults[index]); _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); + _cost_scan_hcc_seq->add(0.0); _young_cards_per_entry_ratio_seq->add( young_cards_per_entry_ratio_defaults[index]); _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]); @@ -804,6 +808,7 @@ void G1CollectorPolicy::record_full_collection_start() { record_heap_size_info_at_start(true /* full */); // Release the future to-space so that it is available for compaction into. _g1->set_full_collection(); + _collectionSetChooser->clear(); } void G1CollectorPolicy::record_full_collection_end() { @@ -822,7 +827,7 @@ void G1CollectorPolicy::record_full_collection_end() { // "Nuke" the heuristics that control the young/mixed GC // transitions and make sure we start with young GCs after the Full GC. set_gcs_are_young(true); - _last_young_gc = false; + set_last_young_gc(false); clear_initiate_conc_mark_if_possible(); clear_during_initial_mark_pause(); _in_marking_window = false; @@ -837,7 +842,6 @@ void G1CollectorPolicy::record_full_collection_end() { // Reset survivors SurvRateGroup. _survivor_surv_rate_group->reset(); update_young_list_target_length(); - _collectionSetChooser->clear(); } void G1CollectorPolicy::record_stop_world_start() { @@ -903,7 +907,7 @@ void G1CollectorPolicy::record_concurrent_mark_cleanup_start() { } void G1CollectorPolicy::record_concurrent_mark_cleanup_completed() { - _last_young_gc = true; + set_last_young_gc(_mixed_gc_pending); _in_marking_window = false; } @@ -915,6 +919,8 @@ void G1CollectorPolicy::record_concurrent_pause() { } bool G1CollectorPolicy::about_to_start_mixed_phase() const { + guarantee(_g1->concurrent_mark()->cmThread()->during_cycle() || !_mixed_gc_pending, + "Pending mixed phase when CM is idle!"); return _g1->concurrent_mark()->cmThread()->during_cycle() || _last_young_gc; } @@ -1066,11 +1072,11 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua // This is supposed to to be the "last young GC" before we start // doing mixed GCs. Here we decide whether to start mixed GCs or not. assert(!last_pause_included_initial_mark, "The last young GC is not allowed to be an initial mark GC"); - if (next_gc_should_be_mixed("start mixed GCs", - "do not start mixed GCs")) { - set_gcs_are_young(false); - } - _last_young_gc = false; + // This has been the "last young GC" before we start doing mixed GCs. We already + // decided to start mixed GCs much earlier, so there is nothing to do except + // advancing the state. + set_gcs_are_young(false); + set_last_young_gc(false); } if (!_last_gc_was_young) { @@ -1080,6 +1086,7 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua if (!next_gc_should_be_mixed("continue mixed GCs", "do not continue mixed GCs")) { set_gcs_are_young(true); + clear_collection_set_candidates(); } } @@ -1088,10 +1095,12 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua if (update_stats) { double cost_per_card_ms = 0.0; + double cost_scan_hcc = phase_times()->average_time_ms(G1GCPhaseTimes::ScanHCC); if (_pending_cards > 0) { - cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards; + cost_per_card_ms = (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) - cost_scan_hcc) / (double) _pending_cards; _cost_per_card_ms_seq->add(cost_per_card_ms); } + _cost_scan_hcc_seq->add(cost_scan_hcc); size_t cards_scanned = _g1->cards_scanned(); @@ -1190,8 +1199,23 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS), - phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms); + double scan_hcc_time_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanHCC); + if (update_rs_time_goal_ms < scan_hcc_time_ms) { + ergo_verbose2(ErgoTiming, + "adjust concurrent refinement thresholds", + ergo_format_reason("Scanning the HCC expected to take longer than Update RS time goal") + ergo_format_ms("Update RS time goal") + ergo_format_ms("Scan HCC time"), + update_rs_time_goal_ms, + scan_hcc_time_ms); + + update_rs_time_goal_ms = 0; + } else { + update_rs_time_goal_ms -= scan_hcc_time_ms; + } + adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) - scan_hcc_time_ms, + phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), + update_rs_time_goal_ms); _collectionSetChooser->verify(); } @@ -1524,7 +1548,12 @@ G1CollectorPolicy::decide_on_conc_mark_initiation() { // Initiate a user requested initial mark. An initial mark must be young only // GC, so the collector state must be updated to reflect this. set_gcs_are_young(true); - _last_young_gc = false; + set_last_young_gc(false); + // We might have ended up coming here about to start a mixed phase with a collection set + // active. The following remark might change the change the "evacuation efficiency" of + // the regions in this set, leading to failing asserts later. + // Since the concurrent cycle will recreate the collection set anyway, simply drop it here. + clear_collection_set_candidates(); initiate_conc_mark(); ergo_verbose0(ErgoConcCycles, "initiate concurrent cycle", @@ -1593,6 +1622,10 @@ public: // before we fill them up). if (_cset_updater.should_add(r) && !_g1h->is_old_gc_alloc_region(r)) { _cset_updater.add_region(r); + } else if (r->is_old()) { + // Can clean out the remembered sets of all regions that we did not choose but + // we created the remembered set for. + r->rem_set()->clear(true); } } return false; @@ -1657,6 +1690,12 @@ G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) { _collectionSetChooser->sort_regions(); + bool mixed_gc_pending = next_gc_should_be_mixed("request mixed gcs", "request young-only gcs"); + if (!mixed_gc_pending) { + clear_collection_set_candidates(); + } + set_mixed_gc_pending(mixed_gc_pending); + double end_sec = os::elapsedTime(); double elapsed_time_ms = (end_sec - _mark_cleanup_start_sec) * 1000.0; _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms); @@ -1665,6 +1704,21 @@ G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) { _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_sec, true); } +class G1ClearCollectionSetCandidateRemSets : public HeapRegionClosure { + virtual bool doHeapRegion(HeapRegion* r) { + r->rem_set()->clear_locked(true /* only_cardset */); + return false; + } +}; + +void G1CollectorPolicy::clear_collection_set_candidates() { + // Clear remembered sets of remaining candidate regions and the actual candidate + // list. + G1ClearCollectionSetCandidateRemSets cl; + _collectionSetChooser->iterate(&cl); + _collectionSetChooser->clear(); +} + // Add the heap region at the head of the non-incremental collection set void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) { assert(_inc_cset_build_state == Active, "Precondition"); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp index af5d5d57a..459677783 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp @@ -28,6 +28,7 @@ #include "gc_implementation/g1/collectionSetChooser.hpp" #include "gc_implementation/g1/g1Allocator.hpp" #include "gc_implementation/g1/g1MMUTracker.hpp" +#include "gc_implementation/g1/g1RemSetTrackingPolicy.hpp" #include "memory/collectorPolicy.hpp" // A G1CollectorPolicy makes policy decisions that determine the @@ -176,6 +177,8 @@ private: NumPrevPausesForHeuristics = 10 }; + G1RemSetTrackingPolicy _remset_tracker; + G1MMUTracker* _mmu_tracker; void initialize_alignments(); @@ -236,6 +239,7 @@ private: TruncatedSeq* _rs_length_diff_seq; TruncatedSeq* _cost_per_card_ms_seq; + TruncatedSeq* _cost_scan_hcc_seq; TruncatedSeq* _young_cards_per_entry_ratio_seq; TruncatedSeq* _mixed_cards_per_entry_ratio_seq; TruncatedSeq* _cost_per_entry_ms_seq; @@ -310,6 +314,8 @@ private: volatile double _os_load; double _uncommit_start_time; public: + + G1RemSetTrackingPolicy* remset_tracker() { return &_remset_tracker; } // Accessors void set_region_eden(HeapRegion* hr, int young_index_in_cset) { hr->set_eden(); @@ -357,8 +363,12 @@ public: return get_new_prediction(_cost_per_card_ms_seq); } + double predict_scan_hcc_ms() { + return get_new_prediction(_cost_scan_hcc_seq); + } + double predict_rs_update_time_ms(size_t pending_cards) { - return (double) pending_cards * predict_cost_per_card_ms(); + return (double) pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms(); } double predict_young_cards_per_entry_ratio() { @@ -495,6 +505,7 @@ public: jlong collection_pause_end_millis() { return _collection_pause_end_millis; } private: + void clear_collection_set_candidates(); // Statistics kept per GC stoppage, pause or full. TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec; @@ -601,6 +612,7 @@ private: volatile bool _during_initial_mark_pause; bool _last_young_gc; + bool _mixed_gc_pending; // This set of variables tracks the collector efficiency, in order to // determine whether we should initiate a new marking. @@ -608,6 +620,9 @@ private: double _mark_remark_start_sec; double _mark_cleanup_start_sec; + void set_last_young_gc(bool v) { _last_young_gc = v; _mixed_gc_pending = false;} + void set_mixed_gc_pending(bool v) { _mixed_gc_pending = v; } + // Update the young list target length either by setting it to the // desired fixed value or by calculating it using G1's pause // prediction model. If no rs_lengths parameter is passed, predict diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp index 167d19c2e..322708f66 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp @@ -56,6 +56,7 @@ const char* G1ErgoVerbose::to_string(int tag) { case ErgoCSetConstruction: return "CSet Construction"; case ErgoConcCycles: return "Concurrent Cycles"; case ErgoMixedGCs: return "Mixed GCs"; + case ErgoTiming: return "Timing"; default: ShouldNotReachHere(); // Keep the Windows compiler happy diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp index b44b4090c..88792d222 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp @@ -70,6 +70,7 @@ typedef enum { ErgoCSetConstruction, ErgoConcCycles, ErgoMixedGCs, + ErgoTiming, ErgoHeuristicNum } ErgoHeuristic; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp index f3930a89d..14bbfc4a5 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp @@ -140,7 +140,7 @@ public: // explicitly and all objects in the CSet are considered // (implicitly) live. So, we won't mark them explicitly and // we'll leave them over NTAMS. - _cm->grayRoot(obj, obj_size, _worker_id, _hr); + _cm->mark_in_next_bitmap(_worker_id, obj); } _marked_bytes += (obj_size * HeapWordSize); obj->set_mark(markOopDesc::prototype()); @@ -207,20 +207,12 @@ public: during_conc_mark); _g1h->check_bitmaps("Self-Forwarding Ptr Removal", hr); - // In the common case (i.e. when there is no evacuation - // failure) we make sure that the following is done when - // the region is freed so that it is "ready-to-go" when it's - // re-allocated. However, when evacuation failure happens, a - // region will remain in the heap and might ultimately be added - // to a CSet in the future. So we have to be careful here and - // make sure the region's RSet is ready for parallel iteration - // whenever this might be required in the future. - hr->rem_set()->reset_for_par_iteration(); hr->reset_bot(); _update_rset_cl.set_region(hr); hr->object_iterate(&rspc); hr->rem_set()->clean_strong_code_roots(hr); + hr->rem_set()->clear_locked(true); hr->note_self_forwarding_removal_end(during_initial_mark, during_conc_mark, diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.cpp new file mode 100644 index 000000000..1db1ab3b9 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1FullGCScope.hpp" +#include "gc_implementation/g1/g1Log.hpp" + +G1FullGCScope* G1FullGCScope::_instance = NULL; + +G1FullGCScope* G1FullGCScope::instance() { + assert(_instance != NULL, "Must be setup already"); + return _instance; +} + +G1FullGCScope::G1FullGCScope(bool explicit_gc, bool clear_soft) : + _rm(), + _explicit_gc(explicit_gc), + _g1h(G1CollectedHeap::heap()), + _svc_marker(SvcGCMarker::FULL), + _timer(), + _tracer(), + _active(), + _cpu_time(G1Log::finer(), true, gclog_or_tty), + _soft_refs(clear_soft, _g1h->collector_policy()), + _memory_stats(true, _g1h->gc_cause()), + _collector_stats(_g1h->g1mm()->full_collection_counters()) { + assert(_instance == NULL, "Only one scope at a time"); + _timer.register_gc_start(); + _tracer.report_gc_start(_g1h->gc_cause(), _timer.gc_start()); + _g1h->pre_full_gc_dump(&_timer); + _g1h->trace_heap_before_gc(&_tracer); + _instance = this; +} + +G1FullGCScope::~G1FullGCScope() { + // We must call G1MonitoringSupport::update_sizes() in the same scoping level + // as an active TraceMemoryManagerStats object (i.e. before the destructor for the + // TraceMemoryManagerStats is called) so that the G1 memory pools are updated + // before any GC notifications are raised. + _g1h->g1mm()->update_sizes(); + _g1h->trace_heap_after_gc(&_tracer); + _g1h->post_full_gc_dump(&_timer); + _timer.register_gc_end(); + _tracer.report_gc_end(_timer.gc_end(), _timer.time_partitions()); + _instance = NULL; +} + +bool G1FullGCScope::is_explicit_gc() { + return _explicit_gc; +} + +bool G1FullGCScope::should_clear_soft_refs() { + return _soft_refs.should_clear(); +} + +STWGCTimer* G1FullGCScope::timer() { + return &_timer; +} + +SerialOldTracer* G1FullGCScope::tracer() { + return &_tracer; +} \ No newline at end of file diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.hpp new file mode 100644 index 000000000..d22f307f9 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1FullGCScope.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1FULLGCSCOPE_HPP +#define SHARE_VM_GC_G1_G1FULLGCSCOPE_HPP + +#include "gc_implementation/g1/g1CollectedHeap.hpp" +#include "gc_implementation/shared/collectorCounters.hpp" +#include "gc_implementation/shared/gcId.hpp" +#include "gc_implementation/shared/gcTrace.hpp" +#include "gc_implementation/shared/gcTraceTime.hpp" +#include "gc_implementation/shared/gcTimer.hpp" +#include "gc_implementation/shared/isGCActiveMark.hpp" +#include "gc_implementation/shared/vmGCOperations.hpp" +#include "memory/allocation.hpp" +#include "services/memoryService.hpp" + +// Class used to group scoped objects used in the Full GC together. +class G1FullGCScope : public StackObj { + ResourceMark _rm; + bool _explicit_gc; + G1CollectedHeap* _g1h; + SvcGCMarker _svc_marker; + STWGCTimer _timer; + SerialOldTracer _tracer; + IsGCActiveMark _active; + TraceCPUTime _cpu_time; + ClearedAllSoftRefs _soft_refs; + TraceCollectorStats _collector_stats; + TraceMemoryManagerStats _memory_stats; + + // Singleton instance. + static G1FullGCScope* _instance; +public: + static G1FullGCScope* instance(); + + G1FullGCScope(bool explicit_gc, bool clear_soft); + ~G1FullGCScope(); + + bool is_explicit_gc(); + bool should_clear_soft_refs(); + + STWGCTimer* timer(); + SerialOldTracer* tracer(); +}; + +#endif //SHARE_VM_GC_G1_G1FULLGCSCOPE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp index 185b7d67e..e8310e245 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "gc_implementation/g1/concurrentG1Refine.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1GCPhaseTimes.hpp" #include "gc_implementation/g1/g1Log.hpp" @@ -270,6 +271,8 @@ G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) : _gc_par_phases[SATBFiltering] = new WorkerDataArray(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFinest, 3); _gc_par_phases[UpdateRS] = new WorkerDataArray(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2); + _gc_par_phases[ScanHCC] = new WorkerDataArray(max_gc_threads, "Scan HCC (ms)", true, G1Log::LevelFiner, 3); + _gc_par_phases[ScanHCC]->set_enabled(ConcurrentG1Refine::hot_card_cache_enabled()); _gc_par_phases[ScanRS] = new WorkerDataArray(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2); _gc_par_phases[CodeRoots] = new WorkerDataArray(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2); _gc_par_phases[ObjCopy] = new WorkerDataArray(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp index 56f9cb741..5b5f8a6c7 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp @@ -57,6 +57,7 @@ class G1GCPhaseTimes : public CHeapObj { WeakCLDRoots, SATBFiltering, UpdateRS, + ScanHCC, ScanRS, CodeRoots, ObjCopy, diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.cpp index ac56309d0..751033ee8 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.cpp @@ -53,7 +53,6 @@ const char* G1HRPrinter::region_type_name(RegionType type) { case Eden: return "Eden"; case Survivor: return "Survivor"; case Old: return "Old"; - case SingleHumongous: return "SingleH"; case StartsHumongous: return "StartsH"; case ContinuesHumongous: return "ContinuesH"; default: ShouldNotReachHere(); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.hpp index 425cbaca2..589031b0f 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1HRPrinter.hpp @@ -50,7 +50,6 @@ public: Eden, Survivor, Old, - SingleHumongous, StartsHumongous, ContinuesHumongous } RegionType; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp index f1f807c67..3aeda5af8 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp @@ -26,7 +26,6 @@ #include "gc_implementation/g1/dirtyCardQueue.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1HotCardCache.hpp" -#include "gc_implementation/g1/g1RemSet.hpp" #include "runtime/atomic.hpp" G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h): @@ -80,9 +79,7 @@ jbyte* G1HotCardCache::insert(jbyte* card_ptr) { return (previous_ptr == current_ptr) ? previous_ptr : card_ptr; } -void G1HotCardCache::drain(uint worker_i, - G1RemSet* g1rs, - DirtyCardQueue* into_cset_dcq) { +void G1HotCardCache::drain(CardTableEntryClosure* cl, uint worker_i) { if (!default_use_cache()) { assert(_hot_cache == NULL, "Logic"); return; @@ -99,22 +96,8 @@ void G1HotCardCache::drain(uint worker_i, for (size_t i = start_idx; i < end_idx; i++) { jbyte* card_ptr = _hot_cache[i]; if (card_ptr != NULL) { - if (g1rs->refine_card(card_ptr, worker_i, true)) { - // The part of the heap spanned by the card contains references - // that point into the current collection set. - // We need to record the card pointer in the DirtyCardQueueSet - // that we use for such cards. - // - // The only time we care about recording cards that contain - // references that point into the collection set is during - // RSet updating while within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread - assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); - assert(worker_i < ParallelGCThreads, - err_msg("incorrect worker id: %u", worker_i)); - - into_cset_dcq->enqueue(card_ptr); - } + bool result = cl->do_card_ptr(card_ptr, worker_i); + assert(result, "Closure should always return true"); } else { break; } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp index b065e36ce..a5ac41b16 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp @@ -32,9 +32,10 @@ #include "runtime/thread.inline.hpp" #include "utilities/globalDefinitions.hpp" +class CardTableEntryClosure; class DirtyCardQueue; class G1CollectedHeap; -class G1RemSet; + class HeapRegion; // An evicting cache of cards that have been logged by the G1 post @@ -82,11 +83,11 @@ class G1HotCardCache: public CHeapObj { // The number of cached cards a thread claims when flushing the cache static const int ClaimChunkSize = 32; - bool default_use_cache() const { + public: + static bool default_use_cache() { return (G1ConcRSLogCacheSize > 0); } - public: G1HotCardCache(G1CollectedHeap* g1h); ~G1HotCardCache(); @@ -111,7 +112,7 @@ class G1HotCardCache: public CHeapObj { // Refine the cards that have delayed as a result of // being in the cache. - void drain(uint worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq); + void drain(CardTableEntryClosure* cl, uint worker_i); // Set up for parallel processing of the cards in the hot cache void reset_hot_cache_claimed_index() { diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp index 9ab422405..d6de7d508 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp @@ -29,6 +29,7 @@ #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" #include "code/icBuffer.hpp" +#include "gc_implementation/g1/g1FullGCScope.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1MarkSweep.hpp" #include "gc_implementation/g1/g1RootProcessor.hpp" @@ -222,6 +223,9 @@ uint G1MarkSweep::_active_workers = 0; void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, bool clear_all_softrefs) { assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); + HandleMark hm; // Discard invalid handles created during gc + + COMPILER2_PRESENT(DerivedPointerTable::clear()); _active_workers = G1CollectedHeap::heap()->workers()->active_workers(); @@ -302,6 +306,9 @@ void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, } } + // Now update the derived pointers. + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + // "free at last gc" is calculated from these. // CHF: cheating for now!!! // Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity()); @@ -314,6 +321,14 @@ void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, GenMarkSweep::_ref_processor = NULL; } +STWGCTimer* G1MarkSweep::gc_timer() { + return G1FullGCScope::instance()->timer(); +} + +SerialOldTracer* G1MarkSweep::gc_tracer() { + return G1FullGCScope::instance()->tracer(); +} + void G1MarkSweep::run_task(AbstractGangTask* task) { G1CollectedHeap::heap()->workers()->run_task(task); } @@ -567,11 +582,14 @@ protected: protected: void free_humongous_region(HeapRegion* hr) { FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep"); - assert(hr->startsHumongous(), - "Only the start of a humongous region should be freed."); - hr->set_containing_set(NULL); - _humongous_regions_removed.increment(1u, hr->capacity()); - G1CollectedHeap::heap()->free_humongous_region(hr, &dummy_free_list, false); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + do { + HeapRegion* next = g1h->next_region_in_humongous(hr); + hr->set_containing_set(NULL); + _humongous_regions_removed.increment(1u, hr->capacity()); + g1h->free_humongous_region(hr, &dummy_free_list, false); + hr = next; + } while (hr != NULL); dummy_free_list.remove_all(); } @@ -772,8 +790,8 @@ public: } else { assert(hr->is_empty(), "Should have been cleared in phase 2."); } - hr->reset_during_compaction(); } + hr->reset_during_compaction(); } else { hr->compact(); } @@ -813,6 +831,7 @@ public: } const GrowableArray* marked_huge_regions = _cps->cp_at(worker_id)->huge_regions(); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); for (GrowableArrayIterator it = marked_huge_regions->begin(); it != marked_huge_regions->end(); ++it) { @@ -820,7 +839,11 @@ public: oop obj = oop(hr->bottom()); assert(obj->is_gc_marked(), "Must be"); obj->init_mark(); - hr->reset_during_compaction(); + do { + HeapRegion* next = g1h->next_region_in_humongous(hr); + hr->reset_during_compaction(); + hr = next; + } while (hr != NULL); } } @@ -888,9 +911,6 @@ protected: HeapWord* end = hr->end(); FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep"); - assert(hr->startsHumongous(), - "Only the start of a humongous region should be freed."); - hr->set_containing_set(NULL); _humongous_regions_removed.increment(1u, hr->capacity()); @@ -916,16 +936,13 @@ public: } bool doHeapRegion(HeapRegion* hr) { if (hr->isHumongous()) { - if (hr->startsHumongous()) { - oop obj = oop(hr->bottom()); - if (obj->is_gc_marked()) { + oop obj = oop(hr->humongous_start_region()->bottom()); + if (hr->startsHumongous() && obj->is_gc_marked()) { obj->forward_to(obj); - } else { + } + if (!obj->is_gc_marked()) { free_humongous_region(hr); } - } else { - assert(hr->continuesHumongous(), "Invalid humongous."); - } } else { prepare_for_compaction(hr, hr->end()); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp index 0787cfe86..8ace960b3 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp @@ -69,8 +69,8 @@ class G1MarkSweep : AllStatic { static void invoke_at_safepoint(ReferenceProcessor* rp, bool clear_all_softrefs); - static STWGCTimer* gc_timer() { return GenMarkSweep::_gc_timer; } - static SerialOldTracer* gc_tracer() { return GenMarkSweep::_gc_tracer; } + static STWGCTimer* gc_timer(); + static SerialOldTracer* gc_tracer(); private: static bool _parallel_prepare_compact; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.cpp index 2bdbca586..2e4c1c1ac 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.cpp @@ -28,25 +28,20 @@ #include "gc_implementation/g1/g1ParScanThreadState.hpp" G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : - G1ParClosureSuper(g1, par_scan_state), _scanned_klass(NULL), + G1ScanClosureBase(g1, par_scan_state), _scanned_klass(NULL), _worker_id(par_scan_state->queue_num()), _cm(_g1->concurrent_mark()) {} -G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1) : - _g1(g1), _par_scan_state(NULL), _worker_id(UINT_MAX) { } +G1ScanClosureBase::G1ScanClosureBase(G1CollectedHeap* g1) : + _g1(g1), _par_scan_state(NULL) { } -G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : - _g1(g1), _par_scan_state(NULL), - _worker_id(UINT_MAX) { +G1ScanClosureBase::G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : + _g1(g1), _par_scan_state(NULL) { set_par_scan_thread_state(par_scan_state); } -void G1ParClosureSuper::set_par_scan_thread_state(G1ParScanThreadState* par_scan_state) { +void G1ScanClosureBase::set_par_scan_thread_state(G1ParScanThreadState* par_scan_state) { assert(_par_scan_state == NULL, "_par_scan_state must only be set once"); assert(par_scan_state != NULL, "Must set par_scan_state to non-NULL."); _par_scan_state = par_scan_state; - _worker_id = par_scan_state->queue_num(); - - assert(_worker_id < MAX2((uint)ParallelGCThreads, 1u), - err_msg("The given worker id %u must be less than the number of threads %u", _worker_id, MAX2((uint)ParallelGCThreads, 1u))); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp index 4f6e655b5..b61a44724 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp @@ -25,6 +25,7 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP +#include "gc_implementation/g1/g1InCSetState.hpp" #include "memory/iterator.hpp" #include "oops/markOop.hpp" @@ -48,36 +49,63 @@ public: void set_region(HeapRegion* from) { _from = from; } }; -class G1ParClosureSuper : public OopsInHeapRegionClosure { +class G1ScanClosureBase : public OopsInHeapRegionClosure { protected: G1CollectedHeap* _g1; G1ParScanThreadState* _par_scan_state; - uint _worker_id; + + template + inline void prefetch_and_push(T* p, oop const obj); + + template + inline void handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj); + public: // Initializes the instance, leaving _par_scan_state uninitialized. Must be done // later using the set_par_scan_thread_state() method. - G1ParClosureSuper(G1CollectedHeap* g1); - G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state); + G1ScanClosureBase(G1CollectedHeap* g1); + G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state); bool apply_to_weak_ref_discovered_field() { return true; } void set_par_scan_thread_state(G1ParScanThreadState* par_scan_state); }; -class G1ParPushHeapRSClosure : public G1ParClosureSuper { -public: - G1ParPushHeapRSClosure(G1CollectedHeap* g1, - G1ParScanThreadState* par_scan_state): - G1ParClosureSuper(g1, par_scan_state) { } +// Used during the Update RS phase to refine remaining cards in the DCQ during garbage collection. +class G1ScanObjsDuringUpdateRSClosure: public G1ScanClosureBase { + uint _worker_i; + bool _has_refs_into_cset; + + public: + G1ScanObjsDuringUpdateRSClosure(G1CollectedHeap* g1h, + G1ParScanThreadState* pss, + uint worker_i) : + G1ScanClosureBase(g1h, pss), _has_refs_into_cset(false), _worker_i(worker_i) { } + + void reset_has_refs_into_cset() { _has_refs_into_cset = false; } + bool has_refs_into_cset() const { return _has_refs_into_cset; } + + template void do_oop_nv(T* p); + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + virtual void do_oop(oop* p) { do_oop_nv(p); } +}; + +// Used during the Scan RS phase to scan cards from the remembered set during garbage collection. +class G1ScanObjsDuringScanRSClosure : public G1ScanClosureBase { + public: + G1ScanObjsDuringScanRSClosure(G1CollectedHeap* g1, + G1ParScanThreadState* par_scan_state): + G1ScanClosureBase(g1, par_scan_state) { } template void do_oop_nv(T* p); virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } }; -class G1ParScanClosure : public G1ParClosureSuper { +// This closure is applied to the fields of the objects that have just been copied during evacuation. +class G1ScanEvacuatedObjClosure : public G1ScanClosureBase { public: - G1ParScanClosure(G1CollectedHeap* g1, ReferenceProcessor* rp) : - G1ParClosureSuper(g1) { + G1ScanEvacuatedObjClosure(G1CollectedHeap* g1, ReferenceProcessor* rp) : + G1ScanClosureBase(g1) { assert(_ref_processor == NULL, "sanity"); _ref_processor = rp; } @@ -88,11 +116,11 @@ public: }; // Add back base class for metadata -class G1ParCopyHelper : public G1ParClosureSuper { +class G1ParCopyHelper : public G1ScanClosureBase { protected: Klass* _scanned_klass; ConcurrentMark* _cm; - + uint _worker_id; // Cache value from par_scan_state. // Mark the object if it's not already marked. This is used to mark // objects pointed to by roots that are guaranteed not to move // during the GC (i.e., non-CSet objects). It is MT-safe. @@ -138,34 +166,6 @@ typedef G1ParCopyClosure G1ParScanAndMar typedef G1ParCopyClosure G1ParScanHeapEvacFailureClosure; -class FilterIntoCSClosure: public ExtendedOopClosure { - G1CollectedHeap* _g1; - OopClosure* _oc; - DirtyCardToOopClosure* _dcto_cl; -public: - FilterIntoCSClosure( DirtyCardToOopClosure* dcto_cl, - G1CollectedHeap* g1, - OopClosure* oc) : - _dcto_cl(dcto_cl), _g1(g1), _oc(oc) { } - - template void do_oop_nv(T* p); - virtual void do_oop(oop* p) { do_oop_nv(p); } - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } - bool apply_to_weak_ref_discovered_field() { return true; } -}; - -class FilterOutOfRegionClosure: public ExtendedOopClosure { - HeapWord* _r_bottom; - HeapWord* _r_end; - OopClosure* _oc; -public: - FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc); - template void do_oop_nv(T* p); - virtual void do_oop(oop* p) { do_oop_nv(p); } - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } - bool apply_to_weak_ref_discovered_field() { return true; } -}; - // Closure for iterating over object fields during concurrent marking class G1CMOopClosure : public MetadataAwareOopClosure { protected: @@ -187,8 +187,7 @@ private: ConcurrentMark* _cm; uint _worker_id; public: - G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm, - uint worker_id) : + G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm, uint worker_id) : _g1h(g1h), _cm(cm), _worker_id(worker_id) { } template void do_oop_nv(T* p); virtual void do_oop( oop* p) { do_oop_nv(p); } @@ -210,63 +209,32 @@ public: virtual void do_oop(narrowOop* p) { do_oop_nv(p); } }; -// A closure that returns true if it is actually applied -// to a reference - -class G1TriggerClosure : public ExtendedOopClosure { - bool _triggered; -public: - G1TriggerClosure(); - bool triggered() const { return _triggered; } - template void do_oop_nv(T* p); - virtual void do_oop(oop* p) { do_oop_nv(p); } - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } -}; +class G1ConcurrentRefineOopClosure: public ExtendedOopClosure { + G1CollectedHeap* _g1; + uint _worker_i; -// A closure which uses a triggering closure to determine -// whether to apply an oop closure. + public: + G1ConcurrentRefineOopClosure(G1CollectedHeap* g1h, uint worker_i) : + _g1(g1h), + _worker_i(worker_i) { + } -class G1InvokeIfNotTriggeredClosure: public ExtendedOopClosure { - G1TriggerClosure* _trigger_cl; - OopClosure* _oop_cl; -public: - G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t, OopClosure* oc); template void do_oop_nv(T* p); - virtual void do_oop(oop* p) { do_oop_nv(p); } - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + virtual void do_oop(oop* p) { do_oop_nv(p); } }; -class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure { +class G1RebuildRemSetClosure : public ExtendedOopClosure { G1CollectedHeap* _g1; - G1RemSet* _g1_rem_set; - HeapRegion* _from; - G1ParPushHeapRSClosure* _push_ref_cl; - bool _record_refs_into_cset; - uint _worker_i; - -public: - G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h, - G1RemSet* rs, - G1ParPushHeapRSClosure* push_ref_cl, - bool record_refs_into_cset, - uint worker_i = 0); - - void set_from(HeapRegion* from) { - assert(from != NULL, "from region must be non-NULL"); - _from = from; - } - - bool self_forwarded(oop obj) { - markOop m = obj->mark(); - bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj)); - return result; + uint _worker_id; + public: + G1RebuildRemSetClosure(G1CollectedHeap* g1, uint worker_id) : _g1(g1), _worker_id(worker_id) { } - bool apply_to_weak_ref_discovered_field() { return true; } - template void do_oop_nv(T* p); - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } virtual void do_oop(oop* p) { do_oop_nv(p); } + virtual void do_oop(narrowOop* p) { do_oop_nv(p); } + // This closure needs special handling for InstanceRefKlass. }; #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp index 0385f66dd..e45e5bd2d 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp @@ -35,103 +35,70 @@ #include "memory/iterator.inline.hpp" #include "runtime/prefetch.inline.hpp" -/* - * This really ought to be an inline function, but apparently the C++ - * compiler sometimes sees fit to ignore inline declarations. Sigh. - */ - +// This closure is applied to the fields of the objects that have just been copied. template -inline void FilterIntoCSClosure::do_oop_nv(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop) && - _g1->is_in_cset_or_humongous(oopDesc::decode_heap_oop_not_null(heap_oop))) { - _oc->do_oop(p); - } +inline void G1ScanClosureBase::prefetch_and_push(T* p, const oop obj) { + // We're not going to even bother checking whether the object is + // already forwarded or not, as this usually causes an immediate + // stall. We'll try to prefetch the object (for write, given that + // we might need to install the forwarding reference) and we'll + // get back to it when pop it from the queue + Prefetch::write(obj->mark_addr(), 0); + Prefetch::read(obj->mark_addr(), (HeapWordSize*2)); + + // slightly paranoid test; I'm trying to catch potential + // problems before we go into push_on_queue to know where the + // problem is coming from + assert((obj == oopDesc::load_decode_heap_oop(p)) || + (obj->is_forwarded() && + obj->forwardee() == oopDesc::load_decode_heap_oop(p)), + "p should still be pointing to obj or to its forwardee"); + + _par_scan_state->push_on_queue(p); } template -inline void FilterOutOfRegionClosure::do_oop_nv(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop); - if (obj_hw < _r_bottom || obj_hw >= _r_end) { - _oc->do_oop(p); +inline void G1ScanClosureBase::handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj) { + if (state.is_humongous()) { + _g1->set_humongous_is_live(obj); } - } } -// This closure is applied to the fields of the objects that have just been copied. template -inline void G1ParScanClosure::do_oop_nv(T* p) { +inline void G1ScanEvacuatedObjClosure::do_oop_nv(T* p) { T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - const InCSetState state = _g1->in_cset_state(obj); - if (state.is_in_cset()) { - // We're not going to even bother checking whether the object is - // already forwarded or not, as this usually causes an immediate - // stall. We'll try to prefetch the object (for write, given that - // we might need to install the forwarding reference) and we'll - // get back to it when pop it from the queue - Prefetch::write(obj->mark_addr(), 0); - Prefetch::read(obj->mark_addr(), (HeapWordSize*2)); - - // slightly paranoid test; I'm trying to catch potential - // problems before we go into push_on_queue to know where the - // problem is coming from - assert((obj == oopDesc::load_decode_heap_oop(p)) || - (obj->is_forwarded() && - obj->forwardee() == oopDesc::load_decode_heap_oop(p)), - "p should still be pointing to obj or to its forwardee"); - - _par_scan_state->push_on_queue(p); - } else { - if (state.is_humongous()) { - _g1->set_humongous_is_live(obj); - } - _par_scan_state->update_rs(_from, p, _worker_id); - } + if (oopDesc::is_null(heap_oop)) { + return; } -} - -template -inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - if (_g1->is_in_cset_or_humongous(obj)) { - Prefetch::write(obj->mark_addr(), 0); - Prefetch::read(obj->mark_addr(), (HeapWordSize*2)); - // Place on the references queue - _par_scan_state->push_on_queue(p); - } else { - assert(!_g1->obj_in_cs(obj), "checking"); - } + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + const InCSetState state = _g1->in_cset_state(obj); + if (state.is_in_cset()) { + prefetch_and_push(p, obj); + } else { + handle_non_cset_obj_common(state, p, obj); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } template inline void G1CMOopClosure::do_oop_nv(T* p) { - oop obj = oopDesc::load_decode_heap_oop(p); if (_cm->verbose_high()) { - gclog_or_tty->print_cr("[%u] we're looking at location " - "*" PTR_FORMAT " = " PTR_FORMAT, - _task->worker_id(), p2i(p), p2i((void*) obj)); + gclog_or_tty->print_cr("[%u] we're looking at location " PTR_FORMAT "", + _task->worker_id(), p2i(p)); } - _task->deal_with_reference(obj); + _task->deal_with_reference(p); } template inline void G1RootRegionScanClosure::do_oop_nv(T* p) { T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj); - _cm->grayRoot(obj, obj->size(), _worker_id, hr); + if (oopDesc::is_null(heap_oop)) { + return; } + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + _cm->mark_in_next_bitmap(_worker_id, obj); } template @@ -142,84 +109,119 @@ inline void G1Mux2Closure::do_oop_nv(T* p) { } template -inline void G1TriggerClosure::do_oop_nv(T* p) { - // Record that this closure was actually applied (triggered). - _triggered = true; -} - -template -inline void G1InvokeIfNotTriggeredClosure::do_oop_nv(T* p) { - if (!_trigger_cl->triggered()) { - _oop_cl->do_oop(p); - } +inline static void check_obj_during_refinement(T* p, oop const obj) { +#ifdef ASSERT + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + // can't do because of races + // assert(obj == NULL || obj->is_oop(), "expected an oop"); + assert(check_obj_alignment(obj), "not oop aligned"); + assert(g1->is_in_reserved(obj), "must be in heap"); + + HeapRegion* from = g1->heap_region_containing(p); + + assert(from != NULL, "from region must be non-NULL"); + assert(from->is_in_reserved(p) || + (from->isHumongous() && + g1->heap_region_containing(p)->isHumongous() && + from->humongous_start_region() == g1->heap_region_containing(p)->humongous_start_region()), + err_msg("p " PTR_FORMAT " is not in the same region %u or part of the correct humongous object" + " starting at region %u.", p2i(p), from->hrm_index(), from->humongous_start_region()->hrm_index())); +#endif // ASSERT } template -inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) { - oop obj = oopDesc::load_decode_heap_oop(p); - if (obj == NULL) { +inline void G1ConcurrentRefineOopClosure::do_oop_nv(T* p) { + T o = oopDesc::load_heap_oop(p); + if (oopDesc::is_null(o)) { return; } -#ifdef ASSERT - // can't do because of races - // assert(obj == NULL || obj->is_oop(), "expected an oop"); - - // Do the safe subset of is_oop -#ifdef CHECK_UNHANDLED_OOPS - oopDesc* o = obj.obj(); -#else - oopDesc* o = obj; -#endif // CHECK_UNHANDLED_OOPS - assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned"); - assert(Universe::heap()->is_in_reserved(obj), "must be in heap"); -#endif // ASSERT + oop obj = oopDesc::decode_heap_oop_not_null(o); - assert(_from != NULL, "from region must be non-NULL"); - assert(_from->is_in_reserved(p), "p is not in from"); + check_obj_during_refinement(p, obj); - HeapRegion* to = _g1->heap_region_containing(obj); - if (_from == to) { + if (HeapRegion::is_in_same_region(p, obj)) { // Normally this closure should only be called with cross-region references. // But since Java threads are manipulating the references concurrently and we // reload the values things may have changed. + // Also this check lets slip through references from a humongous continues region + // to its humongous start region, as they are in different regions, and adds a + // remembered set entry. This is benign (apart from memory usage), as we never + // try to either evacuate or eager reclaim humonguous arrays of j.l.O. + return; + } + + HeapRegionRemSet* to_rem_set = _g1->heap_region_containing(obj)->rem_set(); + + assert(to_rem_set != NULL, "Need per-region 'into' remsets."); + if (to_rem_set->is_tracked()) { + to_rem_set->add_reference(p, _worker_i); + } +} + + +template +inline void G1ScanObjsDuringUpdateRSClosure::do_oop_nv(T* p) { + T o = oopDesc::load_heap_oop(p); + if (oopDesc::is_null(o)) { return; } - // The _record_refs_into_cset flag is true during the RSet - // updating part of an evacuation pause. It is false at all - // other times: - // * rebuilding the remembered sets after a full GC - // * during concurrent refinement. - // * updating the remembered sets of regions in the collection - // set in the event of an evacuation failure (when deferred - // updates are enabled). - - if (_record_refs_into_cset && to->in_collection_set()) { - // We are recording references that point into the collection - // set and this particular reference does exactly that... - // If the referenced object has already been forwarded - // to itself, we are handling an evacuation failure and - // we have already visited/tried to copy this object - // there is no need to retry. - if (!self_forwarded(obj)) { - assert(_push_ref_cl != NULL, "should not be null"); - // Push the reference in the refs queue of the G1ParScanThreadState - // instance for this worker thread. - _push_ref_cl->do_oop(p); - } - - // Deferred updates to the CSet are either discarded (in the normal case), - // or processed (if an evacuation failure occurs) at the end - // of the collection. - // See G1RemSet::cleanup_after_oops_into_collection_set_do(). + oop obj = oopDesc::decode_heap_oop_not_null(o); + check_obj_during_refinement(p, obj); + + assert(!_g1->is_in_cset((HeapWord*)p), + err_msg("Oop originates from " PTR_FORMAT " (region: %u) which is in the collection set.", p2i(p), + _g1->addr_to_region((HeapWord*)p))) + ; + const InCSetState state = _g1->in_cset_state(obj); + if (state.is_in_cset()) { + // Since the source is always from outside the collection set, here we implicitly know + // that this is a cross-region reference too. + prefetch_and_push(p, obj); + + _has_refs_into_cset = true; } else { - // We either don't care about pushing references that point into the - // collection set (i.e. we're not during an evacuation pause) _or_ - // the reference doesn't point into the collection set. Either way - // we add the reference directly to the RSet of the region containing - // the referenced object. - assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); + HeapRegion* to = _g1->heap_region_containing(obj); + if (_from == to) { + // Normally this closure should only be called with cross-region references. + // But since Java threads are manipulating the references concurrently and we + // reload the values things may have changed. + return; + } + handle_non_cset_obj_common(state, p, obj); to->rem_set()->add_reference(p, _worker_i); } } +template +inline void G1ScanObjsDuringScanRSClosure::do_oop_nv(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (oopDesc::is_null(heap_oop)) { + return; + } + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + + const InCSetState state = _g1->in_cset_state(obj); + if (state.is_in_cset()) { + prefetch_and_push(p, obj); + } else { + handle_non_cset_obj_common(state, p, obj); + } +} + +template void G1RebuildRemSetClosure::do_oop_nv(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (oopDesc::is_null(heap_oop)) { + return; + } + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + + if (HeapRegion::is_in_same_region(p, obj)) { + return; + } + + HeapRegion* to = _g1->heap_region_containing(obj); + HeapRegionRemSet* rem_set = to->rem_set(); + rem_set->add_reference(p, _worker_id); +} + #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_INLINE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp index a095abaf6..73772f2cd 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp @@ -221,7 +221,7 @@ oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, oop const old, markOop const old_mark) { const size_t word_sz = old->size(); - HeapRegion* const from_region = _g1h->heap_region_containing_raw(old); + HeapRegion* const from_region = _g1h->heap_region_containing(old); // +1 to make the -1 indexes valid... const int young_index = from_region->young_index_in_cset()+1; assert( (from_region->is_young() && young_index > 0) || @@ -293,9 +293,9 @@ oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, if (G1StringDedup::is_enabled()) { const bool is_from_young = state.is_young(); const bool is_to_young = dest_state.is_young(); - assert(is_from_young == _g1h->heap_region_containing_raw(old)->is_young(), + assert(is_from_young == _g1h->heap_region_containing(old)->is_young(), "sanity"); - assert(is_to_young == _g1h->heap_region_containing_raw(obj)->is_young(), + assert(is_to_young == _g1h->heap_region_containing(obj)->is_young(), "sanity"); G1StringDedup::enqueue_from_evacuation(is_from_young, is_to_young, @@ -314,7 +314,7 @@ oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, oop* old_p = set_partial_array_mask(old); push_on_queue(old_p); } else { - HeapRegion* const to_region = _g1h->heap_region_containing_raw(obj_ptr); + HeapRegion* const to_region = _g1h->heap_region_containing(obj_ptr); _scanner.set_region(to_region); obj->oop_iterate_backwards(&_scanner); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp index 60c00b178..7da6ee4ea 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp @@ -52,7 +52,7 @@ class G1ParScanThreadState : public CHeapObj { InCSetState _dest[InCSetState::Num]; // Local tenuring threshold. uint _tenuring_threshold; - G1ParScanClosure _scanner; + G1ScanEvacuatedObjClosure _scanner; size_t _alloc_buffer_waste; size_t _undo_waste; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp index b3dc22b30..3390ff5fa 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp @@ -96,7 +96,7 @@ inline void G1ParScanThreadState::do_oop_partial_array(oop* p) { // so that the heap remains parsable in case of evacuation failure. to_obj_array->set_length(end); } - _scanner.set_region(_g1h->heap_region_containing_raw(to_obj)); + _scanner.set_region(_g1h->heap_region_containing(to_obj)); // Process indexes [start,end). It will also process the header // along with the first chunk (i.e., the chunk with start == 0). // Note that at this point the length field of to_obj_array is not @@ -110,10 +110,7 @@ inline void G1ParScanThreadState::do_oop_partial_array(oop* p) { template inline void G1ParScanThreadState::deal_with_reference(T* ref_to_scan) { if (!has_partial_array_mask(ref_to_scan)) { - // Note: we can use "raw" versions of "region_containing" because - // "obj_to_scan" is definitely in the heap, and is not in a - // humongous region. - HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan); + HeapRegion* r = _g1h->heap_region_containing(ref_to_scan); do_oop_evac(ref_to_scan, r); } else { do_oop_partial_array((oop*)ref_to_scan); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.cpp new file mode 100644 index 000000000..7919882d2 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp" +#include "memory/allocation.inline.hpp" + +G1RegionMarkStatsCache::G1RegionMarkStatsCache(G1RegionMarkStats* target, uint max_regions, uint num_cache_entries) : + _num_stats(max_regions), + _target(target), + _num_cache_entries(num_cache_entries), + _cache_hits(0), + _cache_misses(0) { + + guarantee(is_power_of_2(num_cache_entries), + err_msg("Number of cache entries must be power of two, but is %u", num_cache_entries)); + _cache = NEW_C_HEAP_ARRAY(G1RegionMarkStatsCacheEntry, _num_cache_entries, mtGC); + for (uint i = 0; i < _num_cache_entries; i++) { + _cache[i].clear(); + } + _num_cache_entries_mask = _num_cache_entries - 1; +} + +G1RegionMarkStatsCache::~G1RegionMarkStatsCache() { + FREE_C_HEAP_ARRAY(G1RegionMarkStatsCacheEntry, _cache, mtGC); +} + +// Evict all remaining statistics, returning cache hits and misses. +Pair G1RegionMarkStatsCache::evict_all() { + for (uint i = 0; i < _num_cache_entries; i++) { + evict(i); + } + return Pair(_cache_hits, _cache_misses); +} + +// Reset all cache entries to their default values. +void G1RegionMarkStatsCache::reset() { + _cache_hits = 0; + _cache_misses = 0; + + for (uint i = 0; i < _num_cache_entries; i++) { + _cache[i].clear(); + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.hpp new file mode 100644 index 000000000..b35312f24 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.hpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1REGIONMARKSTATSCACHE_HPP +#define SHARE_VM_GC_G1_G1REGIONMARKSTATSCACHE_HPP + +#include "memory/allocation.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/pair.hpp" + +// Per-Region statistics gathered during marking. +// +// This includes +// * the number of live words gathered during marking for the area from bottom +// to ntams. This is an exact measure. +// The code corrects later for the live data between ntams and top. +struct G1RegionMarkStats { + size_t _live_words; + + // Clear all members. + void clear() { + _live_words = 0; + } + // Clear all members after a marking overflow. Nothing to do as the live words + // are updated by the atomic mark. We do not remark objects after overflow. + void clear_during_overflow() { + } + + bool is_clear() const { return _live_words == 0; } +}; + +// Per-marking thread cache for the region mark statistics. +// +// Each cache is a larg'ish map of region-idx -> G1RegionMarkStats entries that cache +// currently gathered statistics; entries are evicted to the global statistics array +// on every collision. This minimizes synchronization overhead which would be required +// every time statistics change, as marking is very localized. +// The map entry number is a power of two to allow simple and fast hashing using +// logical and. +class G1RegionMarkStatsCache { +private: + // The array of statistics entries to evict to; the global array. + G1RegionMarkStats* _target; + // Number of entries in the eviction target. + uint _num_stats; + + // An entry of the statistics cache. + struct G1RegionMarkStatsCacheEntry { + uint _region_idx; + G1RegionMarkStats _stats; + + void clear() { + _region_idx = 0; + _stats.clear(); + } + + bool is_clear() const { + return _region_idx == 0 && _stats.is_clear(); + } + }; + + // The actual cache and its number of entries. + G1RegionMarkStatsCacheEntry* _cache; + uint _num_cache_entries; + + // Cache hits/miss counters. + size_t _cache_hits; + size_t _cache_misses; + + // Evict a given element of the statistics cache. + void evict(uint idx); + + size_t _num_cache_entries_mask; + + uint hash(uint idx) { + return idx & _num_cache_entries_mask; + } + + G1RegionMarkStatsCacheEntry* find_for_add(uint region_idx); +public: + G1RegionMarkStatsCache(G1RegionMarkStats* target, uint max_regions, uint num_cache_entries); + + ~G1RegionMarkStatsCache(); + + void add_live_words(uint region_idx, size_t live_words) { + G1RegionMarkStatsCacheEntry* const cur = find_for_add(region_idx); + cur->_stats._live_words += live_words; + } + + void reset(uint region_idx) { + uint const cache_idx = hash(region_idx); + G1RegionMarkStatsCacheEntry* cur = &_cache[cache_idx]; + if (cur->_region_idx == region_idx) { + _cache[cache_idx].clear(); + } + } + + // Evict all remaining statistics, returning cache hits and misses. + Pair evict_all(); + + // Reset all cache entries to their default values. + void reset(); + + size_t hits() const { return _cache_hits; } + size_t misses() const { return _cache_misses; } +}; + +#endif // SHARE_VM_GC_G1_G1REGIONMARKSTATSCACHE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp new file mode 100644 index 000000000..9c4bf06a4 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionMarkStatsCache.inline.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1REGIONMARKSTATSCACHE_INLINE_HPP +#define SHARE_VM_GC_G1_G1REGIONMARKSTATSCACHE_INLINE_HPP + +#include "gc_implementation/g1/g1RegionMarkStatsCache.hpp" +#include "runtime/atomic.hpp" + +inline G1RegionMarkStatsCache::G1RegionMarkStatsCacheEntry* G1RegionMarkStatsCache::find_for_add(uint region_idx) { + uint const cache_idx = hash(region_idx); + + G1RegionMarkStatsCacheEntry* cur = &_cache[cache_idx]; + if (cur->_region_idx != region_idx) { + evict(cache_idx); + cur->_region_idx = region_idx; + _cache_misses++; + } else { + _cache_hits++; + } + + return cur; +} + +inline void G1RegionMarkStatsCache::evict(uint idx) { + G1RegionMarkStatsCacheEntry* cur = &_cache[idx]; + if (cur->_stats._live_words != 0) { + Atomic::add(cur->_stats._live_words, &_target[cur->_region_idx]._live_words); + } + cur->clear(); +} + +#endif // SHARE_VM_GC_G1_G1REGIONMARKSTATSCACHE_INLINE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp index 70c14bf7d..db9318434 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -70,29 +70,158 @@ void ct_freq_update_histo_and_reset() { } #endif +// Collects information about the overall remembered set scan progress during an evacuation. +class G1RemSetScanState : public CHeapObj { + private: + size_t _max_regions; + + // Scan progress for the remembered set of a single region. Transitions from + // Unclaimed -> Claimed -> Complete. + // At each of the transitions the thread that does the transition needs to perform + // some special action once. This is the reason for the extra "Claimed" state. + typedef jint G1RemsetIterState; + + static const G1RemsetIterState Unclaimed = 0; // The remembered set has not been scanned yet. + static const G1RemsetIterState Claimed = 1; // The remembered set is currently being scanned. + static const G1RemsetIterState Complete = 2; // The remembered set has been completely scanned. + + G1RemsetIterState volatile* _iter_states; + // The current location where the next thread should continue scanning in a region's + // remembered set. + size_t volatile* _iter_claims; + + public: + G1RemSetScanState() : + _max_regions(0), + _iter_states(NULL), + _iter_claims(NULL), + _scan_top(NULL) { + + } + + ~G1RemSetScanState() { + if (_iter_states != NULL) { + FREE_C_HEAP_ARRAY(G1RemsetIterState, _iter_states, mtGC); + } + if (_iter_claims != NULL) { + FREE_C_HEAP_ARRAY(size_t, _iter_claims, mtGC); + } + if (_scan_top != NULL) { + FREE_C_HEAP_ARRAY(HeapWord*, _scan_top, mtGC); + } + } + + void initialize(uint max_regions) { + assert(_iter_states == NULL, "Must not be initialized twice"); + assert(_iter_claims == NULL, "Must not be initialized twice"); + _max_regions = max_regions; + _iter_states = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC); + _iter_claims = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); + _scan_top = NEW_C_HEAP_ARRAY(HeapWord*, max_regions, mtGC); + } + + void reset() { + for (uint i = 0; i < _max_regions; i++) { + _iter_states[i] = Unclaimed; + } + + G1ResetScanTopClosure cl(_scan_top); + G1CollectedHeap::heap()->heap_region_iterate(&cl); + + memset((void*)_iter_claims, 0, _max_regions * sizeof(size_t)); + } + + // Attempt to claim the remembered set of the region for iteration. Returns true + // if this call caused the transition from Unclaimed to Claimed. + inline bool claim_iter(uint region) { + assert(region < _max_regions, err_msg("Tried to access invalid region %u", region)); + if (_iter_states[region] != Unclaimed) { + return false; + } + jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_states[region]), Unclaimed); + return (res == Unclaimed); + } + + // Try to atomically sets the iteration state to "complete". Returns true for the + // thread that caused the transition. + inline bool set_iter_complete(uint region) { + if (iter_is_complete(region)) { + return false; + } + jint res = Atomic::cmpxchg(Complete, (jint*)(&_iter_states[region]), Claimed); + return (res == Claimed); + } + + // Returns true if the region's iteration is complete. + inline bool iter_is_complete(uint region) const { + assert(region < _max_regions, err_msg("Tried to access invalid region %u", region)); + return _iter_states[region] == Complete; + } + + // The current position within the remembered set of the given region. + inline size_t iter_claimed(uint region) const { + assert(region < _max_regions, err_msg("Tried to access invalid region %u", region)); + return _iter_claims[region]; + } + + // Claim the next block of cards within the remembered set of the region with + // step size. + inline size_t iter_claimed_next(uint region, size_t step) { + return Atomic::add(step, &_iter_claims[region]) - step; + } + + HeapWord* scan_top(uint region_idx) const { + return _scan_top[region_idx]; + } + + + // Creates a snapshot of the current _top values at the start of collection to + // filter out card marks that we do not want to scan. + class G1ResetScanTopClosure : public HeapRegionClosure { + private: + HeapWord** _scan_top; + public: + G1ResetScanTopClosure(HeapWord** scan_top) : _scan_top(scan_top) { } + + virtual bool doHeapRegion(HeapRegion* r) { + uint hrm_index = r->hrm_index(); + if (!r->in_collection_set() && r->is_old_or_humongous()) { + _scan_top[hrm_index] = r->top(); + } else { + _scan_top[hrm_index] = r->bottom(); + } + return false; + } + }; + + // For each region, contains the maximum top() value to be used during this garbage + // collection. Subsumes common checks like filtering out everything but old and + // humongous regions outside the collection set. + // This is valid because we are not interested in scanning stray remembered set + // entries from free or archive regions. + HeapWord** _scan_top; +}; + G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) - : _g1(g1), _conc_refine_cards(0), + : _g1(g1), + _scan_state(new G1RemSetScanState()), + _conc_refine_cards(0), _ct_bs(ct_bs), _g1p(_g1->g1_policy()), _cg1r(g1->concurrent_g1_refine()), - _cset_rs_update_cl(NULL), _cards_scanned(NULL), _total_cards_scanned(0), _prev_period_summary() { guarantee(n_workers() > 0, "There should be some workers"); - _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC); - for (uint i = 0; i < n_workers(); i++) { - _cset_rs_update_cl[i] = NULL; - } + if (G1SummarizeRSetStats) { _prev_period_summary.initialize(this); } } G1RemSet::~G1RemSet() { - for (uint i = 0; i < n_workers(); i++) { - assert(_cset_rs_update_cl[i] == NULL, "it should be"); + if (_scan_state != NULL) { + delete _scan_state; } - FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl, mtGC); } void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { @@ -102,11 +231,16 @@ void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { } } +void G1RemSet::initialize(size_t capacity, uint max_regions) { + _scan_state->initialize(max_regions); +} + class ScanRSClosure : public HeapRegionClosure { + G1RemSetScanState* _scan_state; size_t _cards_done, _cards; G1CollectedHeap* _g1h; - G1ParPushHeapRSClosure* _oc; + G1ScanObjsDuringScanRSClosure* _scan_objs_on_card_cl; CodeBlobClosure* _code_root_cl; G1BlockOffsetSharedArray* _bot_shared; @@ -115,19 +249,19 @@ class ScanRSClosure : public HeapRegionClosure { double _strong_code_root_scan_time_sec; uint _worker_i; int _block_size; - bool _try_claimed; public: - ScanRSClosure(G1ParPushHeapRSClosure* oc, + ScanRSClosure(G1RemSetScanState* scan_state, + G1ScanObjsDuringScanRSClosure* scan_obj_on_card, CodeBlobClosure* code_root_cl, uint worker_i) : - _oc(oc), + _scan_state(scan_state), + _scan_objs_on_card_cl(scan_obj_on_card), _code_root_cl(code_root_cl), _strong_code_root_scan_time_sec(0.0), _cards(0), _cards_done(0), - _worker_i(worker_i), - _try_claimed(false) + _worker_i(worker_i) { _g1h = G1CollectedHeap::heap(); _bot_shared = _g1h->bot_shared(); @@ -135,38 +269,21 @@ public: _block_size = MAX2(G1RSetScanBlockSize, 1); } - void set_try_claimed() { _try_claimed = true; } - - void scanCard(size_t index, HeapRegion *r) { - // Stack allocate the DirtyCardToOopClosure instance - HeapRegionDCTOC cl(_g1h, r, _oc, - CardTableModRefBS::Precise); - - // Set the "from" region in the closure. - _oc->set_region(r); - MemRegion card_region(_bot_shared->address_for_index(index), G1BlockOffsetSharedArray::N_words); - MemRegion pre_gc_allocated(r->bottom(), r->scan_top()); + void scanCard(size_t index, HeapWord* card_start, HeapRegion *r) { + MemRegion card_region(card_start, G1BlockOffsetSharedArray::N_words); + MemRegion pre_gc_allocated(r->bottom(), _scan_state->scan_top(r->hrm_index())); MemRegion mr = pre_gc_allocated.intersection(card_region); if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) { // We make the card as "claimed" lazily (so races are possible // but they're benign), which reduces the number of duplicate // scans (the rsets of the regions in the cset can intersect). _ct_bs->set_card_claimed(index); + _scan_objs_on_card_cl->set_region(r); + r->oops_on_card_seq_iterate_careful(mr, _scan_objs_on_card_cl); _cards_done++; - cl.do_MemRegion(mr); } } - void printCard(HeapRegion* card_region, size_t card_index, - HeapWord* card_start) { - gclog_or_tty->print_cr("T " UINT32_FORMAT " Region [" PTR_FORMAT ", " PTR_FORMAT ") " - "RS names card %p: " - "[" PTR_FORMAT ", " PTR_FORMAT ")", - _worker_i, - card_region->bottom(), card_region->end(), - card_index, - card_start, card_start + G1BlockOffsetSharedArray::N_words); - } void scan_strong_code_roots(HeapRegion* r) { double scan_start = os::elapsedTime(); @@ -176,29 +293,30 @@ public: bool doHeapRegion(HeapRegion* r) { assert(r->in_collection_set(), "should only be called on elements of CS."); - HeapRegionRemSet* hrrs = r->rem_set(); - if (hrrs->iter_is_complete()) return false; // All done. - if (!_try_claimed && !hrrs->claim_iter()) return false; - // If we ever free the collection set concurrently, we should also - // clear the card table concurrently therefore we won't need to - // add regions of the collection set to the dirty cards region. - _g1h->push_dirty_cards_region(r); - // If we didn't return above, then - // _try_claimed || r->claim_iter() - // is true: either we're supposed to work on claimed-but-not-complete - // regions, or we successfully claimed the region. - - HeapRegionRemSetIterator iter(hrrs); + uint region_idx = r->hrm_index(); + if (_scan_state->iter_is_complete(region_idx)) { + return false; + } + if (_scan_state->claim_iter(region_idx)) { + // If we ever free the collection set concurrently, we should also + // clear the card table concurrently therefore we won't need to + // add regions of the collection set to the dirty cards region. + _g1h->push_dirty_cards_region(r); + } + + HeapRegionRemSetIterator iter(r->rem_set()); size_t card_index; // We claim cards in block so as to recude the contention. The block size is determined by // the G1RSetScanBlockSize parameter. - size_t jump_to_card = hrrs->iter_claimed_next(_block_size); + size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size); for (size_t current_card = 0; iter.has_next(card_index); current_card++) { - if (current_card >= jump_to_card + _block_size) { - jump_to_card = hrrs->iter_claimed_next(_block_size); + if (current_card >= claimed_card_block + _block_size) { + claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size); + } + if (current_card < claimed_card_block) { + continue; } - if (current_card < jump_to_card) continue; HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index); #if 0 gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n", @@ -218,14 +336,12 @@ public: // If the card is dirty, then we will scan it during updateRS. if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) { - scanCard(card_index, card_region); + scanCard(card_index, card_start, card_region); } } - if (!_try_claimed) { + if (_scan_state->set_iter_complete(region_idx)) { // Scan the strong code root list attached to the current region scan_strong_code_roots(r); - - hrrs->set_iter_complete(); } return false; } @@ -238,26 +354,24 @@ public: size_t cards_looked_up() { return _cards;} }; -void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc, +void G1RemSet::scanRS(G1ParScanThreadState* pss, CodeBlobClosure* code_root_cl, uint worker_i) { double rs_time_start = os::elapsedTime(); - HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i); + G1ScanObjsDuringScanRSClosure scan_cl(_g1, pss); + ScanRSClosure cl(_scan_state, &scan_cl, code_root_cl, worker_i); - ScanRSClosure scanRScl(oc, code_root_cl, worker_i); - - _g1->collection_set_iterate_from(startRegion, &scanRScl); - scanRScl.set_try_claimed(); - _g1->collection_set_iterate_from(startRegion, &scanRScl); + HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i); + _g1->collection_set_iterate_from(startRegion, &cl); double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - - scanRScl.strong_code_root_scan_time_sec(); + - cl.strong_code_root_scan_time_sec(); assert(_cards_scanned != NULL, "invariant"); - _cards_scanned[worker_i] = scanRScl.cards_done(); + _cards_scanned[worker_i] = cl.cards_done(); _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec); - _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec()); + _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec()); } // Closure used for updating RSets and recording references that @@ -267,10 +381,12 @@ void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc, class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure { G1RemSet* _g1rs; DirtyCardQueue* _into_cset_dcq; + G1ScanObjsDuringUpdateRSClosure* _update_rs_cl; public: RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h, - DirtyCardQueue* into_cset_dcq) : - _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq) + DirtyCardQueue* into_cset_dcq, + G1ScanObjsDuringUpdateRSClosure* update_rs_cl) : + _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _update_rs_cl(update_rs_cl) {} bool do_card_ptr(jbyte* card_ptr, uint worker_i) { // The only time we care about recording cards that @@ -278,9 +394,8 @@ public: // is during RSet updating within an evacuation pause. // In this case worker_i should be the id of a GC worker thread. assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause"); - assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "should be a GC worker"); - if (_g1rs->refine_card(card_ptr, worker_i, true)) { + if (_g1rs->refine_card_during_gc(card_ptr, _update_rs_cl)) { // 'card_ptr' contains references that point into the collection // set. We need to record the card in the DCQS // (G1CollectedHeap::into_cset_dirty_card_queue_set()) @@ -293,30 +408,32 @@ public: } }; -void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) { +void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, + G1ParScanThreadState* pss, + uint worker_i) { + G1ScanObjsDuringUpdateRSClosure update_rs_cl(_g1, pss, worker_i); + RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, &update_rs_cl); G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i); - // Apply the given closure to all remaining log entries. - RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq); - - _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i); + { + // Apply the closure to the entries of the hot card cache. + G1GCParPhaseTimesTracker y(_g1p->phase_times(), G1GCPhaseTimes::ScanHCC, worker_i); + _g1->iterate_hcc_closure(&into_cset_update_rs_cl, worker_i); + } + // Apply the closure to all remaining log entries. + _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, worker_i); } void G1RemSet::cleanupHRRS() { HeapRegionRemSet::cleanup(); } -void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc, +void G1RemSet::oops_into_collection_set_do(G1ParScanThreadState* pss, CodeBlobClosure* code_root_cl, uint worker_i) { #if CARD_REPEAT_HISTO ct_freq_update_histo_and_reset(); #endif - // We cache the value of 'oc' closure into the appropriate slot in the - // _cset_rs_update_cl for this worker - assert(worker_i < n_workers(), "sanity"); - _cset_rs_update_cl[worker_i] = oc; - // A DirtyCardQueue that is used to hold cards containing references // that point into the collection set. This DCQ is associated with a // special DirtyCardQueueSet (see g1CollectedHeap.hpp). Under normal @@ -330,11 +447,9 @@ void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc, assert((ParallelGCThreads > 0) || worker_i == 0, "invariant"); - updateRS(&into_cset_dcq, worker_i); - scanRS(oc, code_root_cl, worker_i); + updateRS(&into_cset_dcq, pss, worker_i); + scanRS(pss, code_root_cl, worker_i); - // We now clear the cached values of _cset_rs_update_cl for this worker - _cset_rs_update_cl[worker_i] = NULL; } void G1RemSet::prepare_for_oops_into_collection_set_do() { @@ -342,6 +457,8 @@ void G1RemSet::prepare_for_oops_into_collection_set_do() { DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); dcqs.concatenate_logs(); + _scan_state->reset(); + guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers(), mtGC); for (uint i = 0; i < n_workers(); ++i) { @@ -384,74 +501,35 @@ void G1RemSet::cleanup_after_oops_into_collection_set_do() { "all buffers should be freed"); } -class ScrubRSClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - BitMap* _region_bm; - BitMap* _card_bm; - CardTableModRefBS* _ctbs; -public: - ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) : - _g1h(G1CollectedHeap::heap()), - _region_bm(region_bm), _card_bm(card_bm), - _ctbs(_g1h->g1_barrier_set()) {} - - bool doHeapRegion(HeapRegion* r) { - if (!r->continuesHumongous()) { - r->rem_set()->scrub(_ctbs, _region_bm, _card_bm); - } - return false; - } -}; - -void G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) { - ScrubRSClosure scrub_cl(region_bm, card_bm); - _g1->heap_region_iterate(&scrub_cl); -} - -void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, - uint worker_num, HeapRegionClaimer *hrclaimer) { - ScrubRSClosure scrub_cl(region_bm, card_bm); - _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, hrclaimer); +inline void check_card_ptr(jbyte* card_ptr, CardTableModRefBS* ct_bs) { +#ifdef ASSERT + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + assert(g1->is_in_exact(ct_bs->addr_for(card_ptr)), + err_msg("Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap" + " at " PTR_FORMAT " (%u) must be in committed heap", + p2i(card_ptr), + ct_bs->index_for(ct_bs->addr_for(card_ptr)), + p2i(ct_bs->addr_for(card_ptr)), + g1->addr_to_region(ct_bs->addr_for(card_ptr)))); +#endif } -G1TriggerClosure::G1TriggerClosure() : - _triggered(false) { } - -G1InvokeIfNotTriggeredClosure::G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t_cl, - OopClosure* oop_cl) : - _trigger_cl(t_cl), _oop_cl(oop_cl) { } - G1Mux2Closure::G1Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { } -G1UpdateRSOrPushRefOopClosure:: -G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h, - G1RemSet* rs, - G1ParPushHeapRSClosure* push_ref_cl, - bool record_refs_into_cset, - uint worker_i) : - _g1(g1h), _g1_rem_set(rs), _from(NULL), - _record_refs_into_cset(record_refs_into_cset), - _push_ref_cl(push_ref_cl), _worker_i(worker_i) { } - // Returns true if the given card contains references that point // into the collection set, if we're checking for such references; // false otherwise. -bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, - bool check_for_refs_into_cset) { - assert(_g1->is_in_exact(_ct_bs->addr_for(card_ptr)), - err_msg("Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap", - p2i(card_ptr), - _ct_bs->index_for(_ct_bs->addr_for(card_ptr)), - _ct_bs->addr_for(card_ptr), - _g1->addr_to_region(_ct_bs->addr_for(card_ptr)))); +void G1RemSet::refine_card_concurrently(jbyte *card_ptr, uint worker_i) { + assert(!_g1->is_gc_active(), "Only call concurrently"); + check_card_ptr(card_ptr, _ct_bs); // If the card is no longer dirty, nothing to do. if (*card_ptr != CardTableModRefBS::dirty_card_val()) { // No need to return that this card contains refs that point // into the collection set. - return false; + return ; } // Construct the region representing the card. @@ -479,7 +557,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // enqueueing of the card and processing it here will have ensured // we see the up-to-date region type here. if (!r->is_old_or_humongous()) { - return false; + return ; } // While we are processing RSet buffers during the collection, we @@ -493,7 +571,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // however, that if evacuation fails, we have to scan any objects // that were not moved and create any missing entries. if (r->in_collection_set()) { - return false; + return ; } // The result from the hot card cache insert call is either: @@ -507,14 +585,13 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache(); if (hot_card_cache->use_cache()) { - assert(!check_for_refs_into_cset, "sanity"); assert(!SafepointSynchronize::is_at_safepoint(), "sanity"); const jbyte* orig_card_ptr = card_ptr; card_ptr = hot_card_cache->insert(card_ptr); if (card_ptr == NULL) { // There was no eviction. Nothing to do. - return false; + return ; } else if (card_ptr != orig_card_ptr) { // Original card was inserted and an old card was evicted. start = _ct_bs->addr_for(card_ptr); @@ -525,7 +602,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // region could have been freed while in the cache. The cset is // not relevant here, since we're in concurrent phase. if (!r->is_old_or_humongous()) { - return false; + return ; } } // Else we still have the original card. } @@ -534,25 +611,19 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // in the region. The card could be stale, or the card could cover // (part of) an object at the end of the allocated space and extend // beyond the end of allocation. - HeapWord* scan_limit; - if (_g1->is_gc_active()) { - // If we're in a STW GC, then a card might be in a GC alloc region - // and extend onto a GC LAB, which may not be parsable. Stop such - // at the "scan_top" of the region. - scan_limit = r->scan_top(); - } else { - // Non-humongous objects are only allocated in the old-gen during - // GC, so if region is old then top is stable. Humongous object - // allocation sets top last; if top has not yet been set, this is - // a stale card and we'll end up with an empty intersection. If - // this is not a stale card, the synchronization between the - // enqueuing of the card and processing it here will have ensured - // we see the up-to-date top here. - scan_limit = r->top(); - } + + // Non-humongous objects are only allocated in the old-gen during + // GC, so if region is old then top is stable. Humongous object + // allocation sets top last; if top has not yet been set, this is + // a stale card and we'll end up with an empty intersection. If + // this is not a stale card, the synchronization between the + // enqueuing of the card and processing it here will have ensured + // we see the up-to-date top here. + HeapWord* scan_limit = r->top(); + if (scan_limit <= start) { // If the trimmed region is empty, the card must be stale. - return false; + return ; } // Okay to clean and process the card now. There are still some @@ -574,36 +645,7 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, MemRegion dirty_region(start, MIN2(scan_limit, end)); assert(!dirty_region.is_empty(), "sanity"); -#if CARD_REPEAT_HISTO - init_ct_freq_table(_g1->max_capacity()); - ct_freq_note_card(_ct_bs->index_for(start)); -#endif - - G1ParPushHeapRSClosure* oops_in_heap_closure = NULL; - if (check_for_refs_into_cset) { - // ConcurrentG1RefineThreads have worker numbers larger than what - // _cset_rs_update_cl[] is set up to handle. But those threads should - // only be active outside of a collection which means that when they - // reach here they should have check_for_refs_into_cset == false. - assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length"); - oops_in_heap_closure = _cset_rs_update_cl[worker_i]; - } - G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1, - _g1->g1_rem_set(), - oops_in_heap_closure, - check_for_refs_into_cset, - worker_i); - update_rs_oop_cl.set_from(r); - - G1TriggerClosure trigger_cl; - FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl); - G1InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl); - G1Mux2Closure mux(&invoke_cl, &update_rs_oop_cl); - - FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, - (check_for_refs_into_cset ? - (OopClosure*)&mux : - (OopClosure*)&update_rs_oop_cl)); + G1ConcurrentRefineOopClosure conc_refine_cl(_g1, worker_i); // The region for the current card may be a young region. The // current card may have been a card that was evicted from the @@ -619,10 +661,8 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // filtering when it has been determined that there has been an actual // allocation in this region and making it safe to check the young type. - bool card_processed = - r->oops_on_card_seq_iterate_careful(dirty_region, - &filter_then_update_rs_oop_cl, - card_ptr); + bool card_processed = r->oops_on_card_seq_iterate_careful(dirty_region, &conc_refine_cl); + // If unable to process the card then we encountered an unparsable // part of the heap (e.g. a partially allocated object) while @@ -630,7 +670,6 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, // and re-enqueue, because we've already cleaned the card. Without // this we could incorrectly discard a non-stale card. if (!card_processed) { - assert(!_g1->is_gc_active(), "Unparsable heap during GC"); // The card might have gotten re-dirtied and re-enqueued while we // worked. (In fact, it's pretty likely.) if (*card_ptr != CardTableModRefBS::dirty_card_val()) { @@ -644,18 +683,56 @@ bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i, } else { _conc_refine_cards++; } +} - // This gets set to true if the card being refined has - // references that point into the collection set. - bool has_refs_into_cset = trigger_cl.triggered(); +bool G1RemSet::refine_card_during_gc(jbyte* card_ptr, + G1ScanObjsDuringUpdateRSClosure* update_rs_cl) { + assert(_g1->is_gc_active(), "Only call during GC"); + + check_card_ptr(card_ptr, _ct_bs); + + // If the card is no longer dirty, nothing to do. This covers cards that were already + // scanned as parts of the remembered sets. + if (*card_ptr != CardTableModRefBS::dirty_card_val()) { + // No need to return that this card contains refs that point + // into the collection set. + return false; + } + + // During GC we can immediately clean the card since we will not re-enqueue stale + // cards as we know they can be disregarded. + *card_ptr = CardTableModRefBS::clean_card_val(); + + // Construct the region representing the card. + HeapWord* card_start = _ct_bs->addr_for(card_ptr); + // And find the region containing it. + HeapRegion* r = _g1->heap_region_containing(card_start); + + HeapWord* scan_limit = _scan_state->scan_top(r->hrm_index()); + + if (scan_limit <= card_start) { + // If the card starts above the area in the region containing objects to scan, skip it. + return false; + } + + // Don't use addr_for(card_ptr + 1) which can ask for + // a card beyond the heap. + HeapWord* card_end = card_start + CardTableModRefBS::card_size_in_words; + MemRegion dirty_region(card_start, MIN2(scan_limit, card_end)); + assert(!dirty_region.is_empty(), "sanity"); + +#if CARD_REPEAT_HISTO + init_ct_freq_table(_g1->max_capacity()); + ct_freq_note_card(_ct_bs->index_for(start)); +#endif + update_rs_cl->set_region(r); + update_rs_cl->reset_has_refs_into_cset(); - // We should only be detecting that the card contains references - // that point into the collection set if the current thread is - // a GC worker thread. - assert(!has_refs_into_cset || SafepointSynchronize::is_at_safepoint(), - "invalid result at non safepoint"); + bool card_processed = r->oops_on_card_seq_iterate_careful(dirty_region, update_rs_cl); + assert(card_processed, "must be"); + _conc_refine_cards++; - return has_refs_into_cset; + return update_rs_cl->has_refs_into_cset(); } void G1RemSet::print_periodic_summary_info(const char* header) { @@ -707,10 +784,172 @@ void G1RemSet::prepare_for_verify() { hot_card_cache->set_use_cache(false); DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set()); - updateRS(&into_cset_dcq, 0); + updateRS(&into_cset_dcq, NULL, 0); _g1->into_cset_dirty_card_queue_set().clear(); hot_card_cache->set_use_cache(use_hot_card_cache); assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); } } + +class G1RebuildRemSetTask: public AbstractGangTask { + // Aggregate the counting data that was constructed concurrently + // with marking. + class G1RebuildRemSetHeapRegionClosure : public HeapRegionClosure { + ConcurrentMark* _cm; + G1RebuildRemSetClosure _update_cl; + + void scan_for_references(oop const obj, MemRegion mr) { + obj->oop_iterate(&_update_cl, mr); + } + + void scan_for_references(oop const obj) { + obj->oop_iterate(&_update_cl); + } + + // A humongous object is live (with respect to the scanning) either + // a) it is marked on the bitmap as such + // b) its TARS is larger than nTAMS, i.e. has been allocated during marking. + bool is_humongous_live(oop const humongous_obj, HeapWord* ntams, HeapWord* tars) const { + return _cm->nextMarkBitMap()->isMarked(humongous_obj) || (tars > ntams); + } + + // Rebuilds the remembered sets by scanning the objects that were allocated before + // rebuild start in the given region, applying the given closure to each of these objects. + // Uses the bitmap to get live objects in the area from [bottom, nTAMS), and all + // objects from [nTAMS, TARS). + // Returns the number of bytes marked in that region between bottom and nTAMS. + size_t rebuild_rem_set_in_region(CMBitMap* const mark_bitmap, HeapRegion* hr, HeapWord* const top_at_rebuild_start) { + size_t marked_bytes = 0; + + HeapWord* start = hr->bottom(); + HeapWord* const ntams = hr->next_top_at_mark_start(); + + if (top_at_rebuild_start <= start) { + return 0; + } + + if (hr->isHumongous()) { + oop const humongous_obj = oop(hr->humongous_start_region()->bottom()); + if (is_humongous_live(humongous_obj, ntams, top_at_rebuild_start)) { + // We need to scan both [bottom, nTAMS) and [nTAMS, top_at_rebuild_start); + // however in case of humongous objects it is sufficient to scan the encompassing + // area (top_at_rebuild_start is always larger or equal to nTAMS) as one of the + // two areas will be zero sized. I.e. nTAMS is either + // the same as bottom or top(_at_rebuild_start). There is no way ntams has a different + // value: this would mean that nTAMS points somewhere into the object. + assert(hr->top() == hr->next_top_at_mark_start() || hr->top() == top_at_rebuild_start, + "More than one object in the humongous region?"); + scan_for_references(humongous_obj, MemRegion(start, top_at_rebuild_start)); + return ntams != start ? pointer_delta(hr->next_top_at_mark_start(), start, 1) : 0; + } else { + return 0; + } + } + + assert(start <= hr->end() && start <= ntams && + ntams <= top_at_rebuild_start && top_at_rebuild_start <= hr->end(), + err_msg("Inconsistency between bottom, nTAMS, TARS, end - " + "start: " PTR_FORMAT ", nTAMS: " PTR_FORMAT ", TARS: " PTR_FORMAT ", end: " PTR_FORMAT, + p2i(start), p2i(ntams), p2i(top_at_rebuild_start), p2i(hr->end()))); + + // Iterate live objects between bottom and nTAMS. + start = mark_bitmap->getNextMarkedWordAddress(start, ntams); + while (start < ntams) { + oop obj = oop(start); + + size_t obj_size = obj->size(); + HeapWord* obj_end = start + obj_size; + + assert(obj_end <= hr->end(), "Humongous objects must have been handled elsewhere."); + + scan_for_references(obj); + + // Add the size of this object to the number of marked bytes. + marked_bytes += obj_size; + + // Find the next marked object after this one. + start = mark_bitmap->getNextMarkedWordAddress(obj_end, ntams); + } + + // Finally process live objects (all of them) between nTAMS and top_at_rebuild_start. + // Objects between top_at_rebuild_start and top are implicitly managed by concurrent refinement. + while (start < top_at_rebuild_start) { + oop obj = oop(start); + size_t obj_size = obj->size(); + HeapWord* obj_end = start + obj_size; + + assert(obj_end <= hr->end(), "Humongous objects must have been handled elsewhere."); + + scan_for_references(obj); + start = obj_end; + } + return marked_bytes * HeapWordSize; + } + public: + G1RebuildRemSetHeapRegionClosure(G1CollectedHeap* g1h, + ConcurrentMark* cm, + uint worker_id) : + HeapRegionClosure(), + _cm(cm), + _update_cl(g1h, worker_id) { } + + bool doHeapRegion(HeapRegion* hr) { + if (_cm->has_aborted()) { + return true; + } + uint const region_idx = hr->hrm_index(); + HeapWord* const top_at_rebuild_start = _cm->top_at_rebuild_start(region_idx); + // TODO: smaller increments to do yield checks with + size_t marked_bytes = rebuild_rem_set_in_region(_cm->nextMarkBitMap(), hr, top_at_rebuild_start); + if (marked_bytes > 0) { + hr->add_to_marked_bytes(marked_bytes); + assert(!hr->is_old() || marked_bytes == (_cm->liveness(hr->hrm_index()) * HeapWordSize), + err_msg("Marked bytes " SIZE_FORMAT " for region %u do not match liveness during mark " SIZE_FORMAT, + marked_bytes, hr->hrm_index(), _cm->liveness(hr->hrm_index()) * HeapWordSize)); + } + _cm->do_yield_check(); + // Abort state may have changed after the yield check. + return _cm->has_aborted(); + } + }; + + HeapRegionClaimer _hr_claimer; + ConcurrentMark* _cm; + + uint _worker_id_offset; + public: + G1RebuildRemSetTask(ConcurrentMark* cm, + uint n_workers, + uint worker_id_offset) : + AbstractGangTask("G1 Rebuild Remembered Set"), + _cm(cm), + _hr_claimer(n_workers), + _worker_id_offset(worker_id_offset) { + } + + void work(uint worker_id) { + SuspendibleThreadSetJoiner sts_join; + + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + G1RebuildRemSetHeapRegionClosure cl(g1h, _cm, _worker_id_offset + worker_id); + g1h->heap_region_par_iterate_chunked(&cl, worker_id, &_hr_claimer); + } +}; + +void G1RemSet::rebuild_rem_set(ConcurrentMark* cm, + FlexibleWorkGang* workers, + bool use_parallel, + uint num_workers, + uint worker_id_offset) { + G1RebuildRemSetTask cl(cm, + num_workers, + worker_id_offset); + if (use_parallel) { + workers->set_active_workers((int) num_workers); + workers->run_task(&cl); + } else { + cl.work(0); + } +} \ No newline at end of file diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp index 4a9b286a6..e9dba5b04 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp @@ -26,6 +26,7 @@ #define SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP #include "gc_implementation/g1/g1RemSetSummary.hpp" +#include "gc_implementation/g1/g1OopClosures.hpp" // A G1RemSet provides ways of iterating over pointers into a selected // collection set. @@ -34,6 +35,8 @@ class G1CollectedHeap; class CardTableModRefBarrierSet; class ConcurrentG1Refine; class G1ParPushHeapRSClosure; +class G1RemSetScanState; +class CMBitMap; // A G1RemSet in which each heap region has a rem set that records the // external heap references into it. Uses a mod ref bs to track updates, @@ -41,6 +44,8 @@ class G1ParPushHeapRSClosure; class G1RemSet: public CHeapObj { private: + G1RemSetScanState* _scan_state; + G1RemSetSummary _prev_period_summary; protected: G1CollectedHeap* _g1; @@ -66,13 +71,12 @@ protected: size_t* _cards_scanned; size_t _total_cards_scanned; - // Used for caching the closure that is responsible for scanning - // references into the collection set. - G1ParPushHeapRSClosure** _cset_rs_update_cl; - // Print the given summary info virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL); public: + + void initialize(size_t capacity, uint max_regions); + // This is called to reset dual hash tables after the gc pause // is finished and the initial hash table is no longer being // scanned. @@ -81,21 +85,12 @@ public: G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs); ~G1RemSet(); - // Invoke "blk->do_oop" on all pointers into the collection set - // from objects in regions outside the collection set (having - // invoked "blk->set_region" to set the "from" region correctly - // beforehand.) - // - // Invoke code_root_cl->do_code_blob on the unmarked nmethods - // on the strong code roots list for each region in the - // collection set. + // Process all oops in the collection set from the cards in the refinement buffers and + // remembered sets using pss. // - // The "worker_i" param is for the parallel case where the id - // of the worker thread calling this function can be helpful in - // partitioning the work to be done. It should be the same as - // the "i" passed to the calling thread's work(i) function. - // In the sequential case this param will be ignored. - void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk, + // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code + // roots list for each region in the collection set. + void oops_into_collection_set_do(G1ParScanThreadState* pss, CodeBlobClosure* code_root_cl, uint worker_i); @@ -107,13 +102,16 @@ public: void prepare_for_oops_into_collection_set_do(); void cleanup_after_oops_into_collection_set_do(); - void scanRS(G1ParPushHeapRSClosure* oc, + void scanRS(G1ParScanThreadState* pss, CodeBlobClosure* code_root_cl, uint worker_i); - void updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i); + G1RemSetScanState* scan_state() const { return _scan_state; } + + // Flush remaining refinement buffers into the remembered set, + void updateRS(DirtyCardQueue* into_cset_dcq, G1ParScanThreadState* pss, uint worker_i); + - CardTableModRefBS* ct_bs() { return _ct_bs; } size_t cardsScanned() { return _total_cards_scanned; } // Record, if necessary, the fact that *p (where "p" is in region "from", @@ -121,25 +119,13 @@ public: template void write_ref(HeapRegion* from, T* p); template void par_write_ref(HeapRegion* from, T* p, int tid); - // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region - // or card, respectively, such that a region or card with a corresponding - // 0 bit contains no part of any live object. Eliminates any remembered - // set entries that correspond to dead heap ranges. - void scrub(BitMap* region_bm, BitMap* card_bm); - - // Like the above, but assumes is called in parallel: "worker_num" is the - // parallel thread id of the current thread, and "claim_val" is the - // value that should be used to claim heap regions. - void scrub_par(BitMap* region_bm, BitMap* card_bm, - uint worker_num, HeapRegionClaimer *hrclaimer); - // Refine the card corresponding to "card_ptr". - // If check_for_refs_into_cset is true, a true result is returned - // if the given card contains oops that have references into the - // current collection set. - virtual bool refine_card(jbyte* card_ptr, - uint worker_i, - bool check_for_refs_into_cset); + void refine_card_concurrently(jbyte* card_ptr, + uint worker_i); + // Refine the card corresponding to "card_ptr". Returns "true" if the given card contains + // oops that have references into the current collection set. + virtual bool refine_card_during_gc(jbyte* card_ptr, + G1ScanObjsDuringUpdateRSClosure* update_rs_cl); // Print accumulated summary info from the start of the VM. virtual void print_summary_info(); @@ -153,6 +139,14 @@ public: size_t conc_refine_cards() const { return _conc_refine_cards; } + // Rebuilds the remembered set by scanning from bottom to TARS for all regions + // using the given work gang. + void rebuild_rem_set(ConcurrentMark* cm, + FlexibleWorkGang* workers, + bool use_parallel, + uint num_workers, + uint worker_id_offset); + }; class CountNonCleanMemRegionClosure: public MemRegionClosure { diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp index 1afef2fb5..e0630f649 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp @@ -64,7 +64,7 @@ inline void G1RemSet::par_write_ref(HeapRegion* from, T* p, int tid) { assert(Universe::heap()->is_in_reserved(obj), "must be in heap"); #endif // ASSERT - assert(from == NULL || from->is_in_reserved(p), "p is not in from"); + assert(from->is_in_reserved(p) || from->startsHumongous(), "p is not in from"); HeapRegion* to = _g1->heap_region_containing(obj); if (from != to) { diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.cpp new file mode 100644 index 000000000..eff6815ae --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/collectionSetChooser.hpp" +#include "gc_implementation/g1/g1RemSetTrackingPolicy.hpp" +#include "gc_implementation/g1/heapRegion.inline.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" +#include "runtime/safepoint.hpp" + +bool G1RemSetTrackingPolicy::is_interesting_humongous_region(HeapRegion* r) const { + return r->startsHumongous() && oop(r->bottom())->is_typeArray(); +} + +bool G1RemSetTrackingPolicy::needs_scan_for_rebuild(HeapRegion* r) const { + // All non-young and non-closed archive regions need to be scanned for references; + // At every gc we gather references to other regions in young, and closed archive + // regions by definition do not have references going outside the closed archive. + return !(r->is_young() || r->is_free()); +} + +void G1RemSetTrackingPolicy::update_at_allocate(HeapRegion* r) { + if (r->is_young()) { + // Always collect remembered set for young regions. + r->rem_set()->set_state_complete(); + } else if (r->isHumongous()) { + // Collect remembered sets for humongous regions by default to allow eager reclaim. + r->rem_set()->set_state_complete(); + } else if (r->is_old()) { + // By default, do not create remembered set for new old regions. + r->rem_set()->set_state_empty(); + } else { + guarantee(false, err_msg("Unhandled region %u with heap region type %s", r->hrm_index(), r->get_type_str())); + } +} + +void G1RemSetTrackingPolicy::update_at_free(HeapRegion* r) { + r->rem_set()->set_state_empty(); +} + +bool G1RemSetTrackingPolicy::update_before_rebuild(HeapRegion* r, size_t live_bytes) { + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); + + bool selected_for_rebuild = false; + + // Only consider updating the remembered set for old gen regions - excluding archive regions + // which never move (but are "Old" regions). + if (r->is_old_or_humongous()) { + size_t between_ntams_and_top = (r->top() - r->next_top_at_mark_start()) * HeapWordSize; + size_t total_live_bytes = live_bytes + between_ntams_and_top; + // Completely free regions after rebuild are of no interest wrt rebuilding the + // remembered set. + assert(!r->rem_set()->is_updating(), err_msg("Remembered set of region %u is updating before rebuild", r->hrm_index())); + // To be of interest for rebuilding the remembered set the following must apply: + // - They must contain some live data in them. + // - We always try to update the remembered sets of humongous regions containing + // type arrays if they are empty as they might have been reset after full gc. + // - Only need to rebuild non-complete remembered sets. + // - Otherwise only add those old gen regions which occupancy is low enough that there + // is a chance that we will ever evacuate them in the mixed gcs. + if ((total_live_bytes > 0) && + (is_interesting_humongous_region(r) || CollectionSetChooser::region_occupancy_low_enough_for_evac(total_live_bytes)) && + !r->rem_set()->is_tracked()) { + + r->rem_set()->set_state_updating(); + selected_for_rebuild = true; + } + } + + return selected_for_rebuild; +} + +void G1RemSetTrackingPolicy::update_after_rebuild(HeapRegion* r) { + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); + + if (r->is_old_or_humongous()) { + if (r->rem_set()->is_updating()) { + r->rem_set()->set_state_complete(); + } + // We can drop remembered sets of humongous regions that have a too large remembered set: + // We will never try to eagerly reclaim or move them anyway until the next concurrent + // cycle as e.g. remembered set entries will always be added. + if (r->isHumongous() && !G1CollectedHeap::heap()->is_potential_eager_reclaim_candidate(r)) { + r->rem_set()->clear_locked(true /* only_cardset */); + } + assert(!r->continuesHumongous() || r->rem_set()->is_empty(), "Continues humongous object remsets should be empty"); + } +} + diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.hpp new file mode 100644 index 000000000..dc31166d5 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetTrackingPolicy.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1REMSETTRACKINGPOLICY_HPP +#define SHARE_VM_GC_G1_G1REMSETTRACKINGPOLICY_HPP + +#include "gc_implementation/g1/heapRegion.hpp" +#include "gc_implementation/g1/heapRegionType.hpp" +#include "memory/allocation.hpp" + +// The remembered set tracking policy determines for a given region the state of +// the remembered set, ie. when it should be tracked, and if/when the remembered +// set is complete. +class G1RemSetTrackingPolicy : public CHeapObj { +private: + // Is the given region an interesting humongous region to start remembered set tracking + // for? + bool is_interesting_humongous_region(HeapRegion* r) const; +public: + // Do we need to scan the given region to get all outgoing references for remembered + // set rebuild? + bool needs_scan_for_rebuild(HeapRegion* r) const; + // Update remembered set tracking state at allocation of the region. May be + // called at any time. The caller makes sure that the changes to the remembered + // set state are visible to other threads. + void update_at_allocate(HeapRegion* r); + // Update remembered set tracking state before we are going to rebuild remembered + // sets. Called at safepoint in the remark pause. + bool update_before_rebuild(HeapRegion* r, size_t live_bytes); + // Update remembered set tracking state after rebuild is complete, i.e. the cleanup + // pause. Called at safepoint. + void update_after_rebuild(HeapRegion* r); + // Update remembered set tracking state when the region is freed. + void update_at_free(HeapRegion* r); +}; + +#endif /* SHARE_VM_GC_G1_G1REMSETTRACKINGPOLICY_HPP */ + diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp index 6b0f8e8bd..f51caba9b 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp @@ -50,7 +50,7 @@ class G1CodeBlobClosure : public CodeBlobClosure { T oop_or_narrowoop = oopDesc::load_heap_oop(p); if (!oopDesc::is_null(oop_or_narrowoop)) { oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop); - HeapRegion* hr = _g1h->heap_region_containing_raw(o); + HeapRegion* hr = _g1h->heap_region_containing(o); assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset"); hr->add_strong_code_root(_nm); } @@ -350,7 +350,7 @@ void G1RootProcessor::process_code_cache_roots(CodeBlobClosure* code_closure, } } -void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, +void G1RootProcessor::scan_remembered_sets(G1ParScanThreadState* pss, OopClosure* scan_non_heap_weak_roots, uint worker_i) { G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times(); @@ -359,7 +359,7 @@ void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, // Now scan the complement of the collection set. G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots); - _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i); + _g1h->g1_rem_set()->oops_into_collection_set_do(pss, &scavenge_cs_nmethods, worker_i); } void G1RootProcessor::set_num_workers(int active_workers) { diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp index 8395ee2e4..7d5041caa 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp @@ -33,7 +33,7 @@ class CLDClosure; class CodeBlobClosure; class G1CollectedHeap; class G1GCPhaseTimes; -class G1ParPushHeapRSClosure; + class G1RootClosures; class Monitor; class OopClosure; @@ -127,7 +127,7 @@ public: // Apply scan_rs to all locations in the union of the remembered sets for all // regions in the collection set // (having done "set_region" to indicate the region in which the root resides), - void scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs, + void scan_remembered_sets(G1ParScanThreadState* pss, OopClosure* scan_non_heap_weak_roots, uint worker_i); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.cpp new file mode 100644 index 000000000..d3c642043 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1FullGCScope.hpp" +#include "gc_implementation/g1/g1MarkSweep.hpp" +#include "gc_implementation/g1/g1RemSet.inline.hpp" +#include "gc_implementation/g1/g1SerialFullCollector.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" + +G1SerialFullCollector::G1SerialFullCollector(G1FullGCScope* scope, + ReferenceProcessor* reference_processor) : + _scope(scope), + _reference_processor(reference_processor), + _is_alive_mutator(_reference_processor, NULL), + _mt_discovery_mutator(_reference_processor, G1ParallelFullGC) { + // Temporarily make discovery by the STW ref processor single threaded (non-MT) + // and clear the STW ref processor's _is_alive_non_header field. +} + +void G1SerialFullCollector::prepare_collection() { + _reference_processor->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); + _reference_processor->setup_policy(_scope->should_clear_soft_refs()); +} + +void G1SerialFullCollector::complete_collection() { + // Enqueue any discovered reference objects that have + // not been removed from the discovered lists. + _reference_processor->enqueue_discovered_references(); + + // Iterate the heap and rebuild the remembered sets. + rebuild_remembered_sets(); +} + +void G1SerialFullCollector::collect() { + // Do the actual collection work. + G1MarkSweep::invoke_at_safepoint(_reference_processor, _scope->should_clear_soft_refs()); +} + +class PostMCRemSetClearClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + ModRefBarrierSet* _mr_bs; +public: + PostMCRemSetClearClosure(G1CollectedHeap* g1h, ModRefBarrierSet* mr_bs) : + _g1h(g1h), _mr_bs(mr_bs) {} + + bool doHeapRegion(HeapRegion* r) { + HeapRegionRemSet* hrrs = r->rem_set(); + + _g1h->reset_gc_time_stamps(r); + + if (r->continuesHumongous()) { + // We'll assert that the strong code root list and RSet is empty + assert(hrrs->strong_code_roots_list_length() == 0, "sanity"); + assert(hrrs->occupied() == 0, "RSet should be empty"); + } else { + hrrs->clear(); + } + + // You might think here that we could clear just the cards + // corresponding to the used region. But no: if we leave a dirty card + // in a region we might allocate into, then it would prevent that card + // from being enqueued, and cause it to be missed. + // Re: the performance cost: we shouldn't be doing full GC anyway! + _mr_bs->clear(MemRegion(r->bottom(), r->end())); + + return false; + } +}; + + +class RebuildRSOutOfRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + UpdateRSOopClosure _cl; + int _worker_i; +public: + RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : + _cl(g1->g1_rem_set(), worker_i), + _worker_i(worker_i), + _g1h(g1) + { } + + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + _cl.set_from(r); + r->oop_iterate(&_cl); + } + return false; + } +}; + +class ParRebuildRSTask: public AbstractGangTask { + G1CollectedHeap* _g1; + HeapRegionClaimer _hrclaimer; + +public: + ParRebuildRSTask(G1CollectedHeap* g1) + : AbstractGangTask("ParRebuildRSTask"), + _g1(g1), _hrclaimer(g1->workers()->active_workers()) + { } + + void work(uint worker_id) { + RebuildRSOutOfRegionClosure rebuild_rs(_g1, worker_id); + _g1->heap_region_par_iterate_chunked(&rebuild_rs, worker_id, &_hrclaimer); + } +}; + +void G1SerialFullCollector::rebuild_remembered_sets() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + // First clear the stale remembered sets. + PostMCRemSetClearClosure rs_clear(g1h, g1h->g1_barrier_set()); + g1h->heap_region_iterate(&rs_clear); + + // Rebuild remembered sets of all regions. + if (G1CollectedHeap::use_parallel_gc_threads()) { + uint n_workers = + AdaptiveSizePolicy::calc_active_workers(g1h->workers()->total_workers(), + g1h->workers()->active_workers(), + Threads::number_of_non_daemon_threads()); + assert(UseDynamicNumberOfGCThreads || + n_workers == g1h->workers()->total_workers(), + "If not dynamic should be using all the workers"); + g1h->workers()->set_active_workers(n_workers); + // Set parallel threads in the heap (_n_par_threads) only + // before a parallel phase and always reset it to 0 after + // the phase so that the number of parallel threads does + // no get carried forward to a serial phase where there + // may be code that is "possibly_parallel". + g1h->set_par_threads(n_workers); + + ParRebuildRSTask rebuild_rs_task(g1h); + + assert(UseDynamicNumberOfGCThreads || + g1h->workers()->active_workers() == g1h->workers()->total_workers(), + "Unless dynamic should use total workers"); + // Use the most recent number of active workers + assert(g1h->workers()->active_workers() > 0, + "Active workers not properly set"); + g1h->set_par_threads(g1h->workers()->active_workers()); + g1h->workers()->run_task(&rebuild_rs_task); + g1h->set_par_threads(0); + } else { + RebuildRSOutOfRegionClosure rebuild_rs_task(g1h); + g1h->heap_region_iterate(&rebuild_rs_task); + } +} diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.hpp new file mode 100644 index 000000000..a80492030 --- /dev/null +++ b/hotspot/src/share/vm/gc_implementation/g1/g1SerialFullCollector.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1SERIALCOLLECTOR_HPP +#define SHARE_VM_GC_G1_G1SERIALCOLLECTOR_HPP + +#include "memory/allocation.hpp" + +class G1FullGCScope; +class ReferenceProcessor; + +class G1SerialFullCollector : StackObj { + G1FullGCScope* _scope; + ReferenceProcessor* _reference_processor; + ReferenceProcessorIsAliveMutator _is_alive_mutator; + ReferenceProcessorMTDiscoveryMutator _mt_discovery_mutator; + + void rebuild_remembered_sets(); + +public: + G1SerialFullCollector(G1FullGCScope* scope, ReferenceProcessor* reference_processor); + + void prepare_collection(); + void collect(); + void complete_collection(); +}; + +#endif // SHARE_VM_GC_G1_G1SERIALCOLLECTOR_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.cpp index 804d1e141..918fd42fa 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1StringDedup.cpp @@ -51,7 +51,7 @@ void G1StringDedup::stop() { bool G1StringDedup::is_candidate_from_mark(oop obj, uint age) { if (java_lang_String::is_instance(obj)) { - bool from_young = G1CollectedHeap::heap()->heap_region_containing_raw(obj)->is_young(); + bool from_young = G1CollectedHeap::heap()->heap_region_containing(obj)->is_young(); if (from_young && age < StringDeduplicationAgeThreshold) { // Candidate found. String is being evacuated from young to old but has not // reached the deduplication age threshold, i.e. has not previously been a diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp index edac4d72c..77f402741 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -274,6 +274,9 @@ product(uintx, G1MixedGCCountTarget, 8, \ "The target number of mixed GCs after a marking cycle.") \ \ + experimental(bool, G1PretouchAuxiliaryMemory, false, \ + "Pre-touch large auxiliary data structures used by the GC.") \ + \ experimental(bool, G1EagerReclaimHumongousObjects, true, \ "Try to reclaim dead large objects at every young GC.") \ \ diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp index 309392cc0..6c422ef88 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp @@ -47,35 +47,32 @@ enum G1Mark { template class G1ParCopyClosure; -class G1ParScanClosure; -class G1ParPushHeapRSClosure; +class G1ScanEvacuatedObjClosure; + +class G1ScanObjsDuringUpdateRSClosure; +class G1ScanObjsDuringScanRSClosure; + -class FilterIntoCSClosure; -class FilterOutOfRegionClosure; class G1CMOopClosure; class G1RootRegionScanClosure; - +class G1RebuildRemSetClosure; // Specialized oop closures from g1RemSet.cpp class G1Mux2Closure; -class G1TriggerClosure; -class G1InvokeIfNotTriggeredClosure; -class G1UpdateRSOrPushRefOopClosure; +class G1ConcurrentRefineOopClosure; #ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES #error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined." #endif #define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ - f(G1ParScanClosure,_nv) \ - f(G1ParPushHeapRSClosure,_nv) \ - f(FilterIntoCSClosure,_nv) \ - f(FilterOutOfRegionClosure,_nv) \ + f(G1ScanEvacuatedObjClosure,_nv) \ + f(G1ScanObjsDuringUpdateRSClosure,_nv) \ f(G1CMOopClosure,_nv) \ f(G1RootRegionScanClosure,_nv) \ f(G1Mux2Closure,_nv) \ - f(G1TriggerClosure,_nv) \ - f(G1InvokeIfNotTriggeredClosure,_nv) \ - f(G1UpdateRSOrPushRefOopClosure,_nv) + f(G1ConcurrentRefineOopClosure,_nv) \ + f(G1RebuildRemSetClosure,_nv) \ + f(G1ScanObjsDuringScanRSClosure,_nv) #ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES #error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined." diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp index 466002977..5759964eb 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp @@ -50,60 +50,6 @@ size_t HeapRegion::GrainBytes = 0; size_t HeapRegion::GrainWords = 0; size_t HeapRegion::CardsPerRegion = 0; -HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1, - HeapRegion* hr, - G1ParPushHeapRSClosure* cl, - CardTableModRefBS::PrecisionStyle precision) : - DirtyCardToOopClosure(hr, cl, precision, NULL), - _hr(hr), _rs_scan(cl), _g1(g1) { } - -FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r, - OopClosure* oc) : - _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { } - -void HeapRegionDCTOC::walk_mem_region(MemRegion mr, - HeapWord* bottom, - HeapWord* top) { - G1CollectedHeap* g1h = _g1; - size_t oop_size; - HeapWord* cur = bottom; - - // Start filtering what we add to the remembered set. If the object is - // not considered dead, either because it is marked (in the mark bitmap) - // or it was allocated after marking finished, then we add it. Otherwise - // we can safely ignore the object. - if (!g1h->is_obj_dead(oop(cur), _hr)) { - oop_size = oop(cur)->oop_iterate(_rs_scan, mr); - } else { - oop_size = _hr->block_size(cur); - } - - cur += oop_size; - - if (cur < top) { - oop cur_oop = oop(cur); - oop_size = _hr->block_size(cur); - HeapWord* next_obj = cur + oop_size; - while (next_obj < top) { - // Keep filtering the remembered set. - if (!g1h->is_obj_dead(cur_oop, _hr)) { - // Bottom lies entirely below top, so we can call the - // non-memRegion version of oop_iterate below. - cur_oop->oop_iterate(_rs_scan); - } - cur = next_obj; - cur_oop = oop(cur); - oop_size = _hr->block_size(cur); - next_obj = cur + oop_size; - } - - // Last object. Need to do dead-obj filtering here too. - if (!g1h->is_obj_dead(oop(cur), _hr)) { - oop(cur)->oop_iterate(_rs_scan, mr); - } - } -} - size_t HeapRegion::max_region_size() { return HeapRegionBounds::max_size(); } @@ -157,9 +103,6 @@ void HeapRegion::reset_after_compaction() { void HeapRegion::hr_clear(bool par, bool clear_space, bool locked) { assert(_humongous_start_region == NULL, "we should have already filtered out humongous regions"); - assert(_end == _orig_end, - "we should have already filtered out humongous regions"); - _in_collection_set = false; set_allocation_context(AllocationContext::system()); @@ -233,25 +176,19 @@ void HeapRegion::set_old() { _type.set_old(); } -void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) { +void HeapRegion::set_startsHumongous(HeapWord* obj_top, size_t fill_size) { assert(!isHumongous(), "sanity / pre-condition"); - assert(end() == _orig_end, - "Should be normal before the humongous object allocation"); assert(top() == bottom(), "should be empty"); - assert(bottom() <= new_top && new_top <= new_end, "pre-condition"); report_region_type_change(G1HeapRegionTraceType::StartsHumongous); _type.set_starts_humongous(); _humongous_start_region = this; - set_end(new_end); - _offsets.set_for_starts_humongous(new_top); + _offsets.set_for_starts_humongous(obj_top, fill_size); } void HeapRegion::set_continuesHumongous(HeapRegion* first_hr) { assert(!isHumongous(), "sanity / pre-condition"); - assert(end() == _orig_end, - "Should be normal before the humongous object allocation"); assert(top() == bottom(), "should be empty"); assert(first_hr->startsHumongous(), "pre-condition"); @@ -263,18 +200,6 @@ void HeapRegion::set_continuesHumongous(HeapRegion* first_hr) { void HeapRegion::clear_humongous() { assert(isHumongous(), "pre-condition"); - if (startsHumongous()) { - assert(top() <= end(), "pre-condition"); - set_end(_orig_end); - if (top() > end()) { - // at least one "continues humongous" region after it - set_top(end()); - } - } else { - // continues humongous - assert(end() == _orig_end, "sanity"); - } - assert(capacity() == HeapRegion::GrainBytes, "pre-condition"); _humongous_start_region = NULL; } @@ -412,120 +337,6 @@ HeapRegion::object_iterate_mem_careful(MemRegion mr, return NULL; } -// Humongous objects are allocated directly in the old-gen. Need -// special handling for concurrent processing encountering an -// in-progress allocation. -static bool do_oops_on_card_in_humongous(MemRegion mr, - FilterOutOfRegionClosure* cl, - HeapRegion* hr, - G1CollectedHeap* g1h) { - assert(hr->isHumongous(), "precondition"); - HeapRegion* sr = hr->humongous_start_region(); - oop obj = oop(sr->bottom()); - - // If concurrent and klass_or_null is NULL, then space has been - // allocated but the object has not yet been published by setting - // the klass. That can only happen if the card is stale. However, - // we've already set the card clean, so we must return failure, - // since the allocating thread could have performed a write to the - // card that might be missed otherwise. - if (!g1h->is_gc_active() && (obj->klass_or_null_acquire() == NULL)) { - return false; - } - - // Only filler objects follow a humongous object in the containing - // regions, and we can ignore those. So only process the one - // humongous object. - if (!g1h->is_obj_dead(obj, sr)) { - if (obj->is_objArray() || (sr->bottom() < mr.start())) { - // objArrays are always marked precisely, so limit processing - // with mr. Non-objArrays might be precisely marked, and since - // it's humongous it's worthwhile avoiding full processing. - // However, the card could be stale and only cover filler - // objects. That should be rare, so not worth checking for; - // instead let it fall out from the bounded iteration. - obj->oop_iterate(cl, mr); - } else { - // If obj is not an objArray and mr contains the start of the - // obj, then this could be an imprecise mark, and we need to - // process the entire object. - obj->oop_iterate(cl); - } - } - return true; -} - -bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr, - FilterOutOfRegionClosure* cl, - jbyte* card_ptr) { - assert(card_ptr != NULL, "pre-condition"); - assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region"); - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - - - // We can only clean the card here, after we make the decision that - // the card is not young. - *card_ptr = CardTableModRefBS::clean_card_val(); - // We must complete this write before we do any of the reads below. - OrderAccess::storeload(); - - // Special handling for humongous regions. - if (isHumongous()) { - return do_oops_on_card_in_humongous(mr, cl, this, g1h); - } - - // During GC we limit mr by scan_top. So we never get here with an - // mr covering objects allocated during GC. Non-humongous objects - // are only allocated in the old-gen during GC. So the parts of the - // heap that may be examined here are always parsable; there's no - // need to use klass_or_null here to detect in-progress allocations. - - // Cache the boundaries of the memory region in some const locals - HeapWord* const start = mr.start(); - HeapWord* const end = mr.end(); - - // Find the obj that extends onto mr.start(). - // Update BOT as needed while finding start of (possibly dead) - // object containing the start of the region. - HeapWord* cur = block_start(start); - -#ifdef ASSERT - { - assert(cur <= start, - err_msg("cur: " PTR_FORMAT ", start: " PTR_FORMAT, p2i(cur), p2i(start))); - HeapWord* next = cur + block_size(cur); - assert(start < next, - err_msg("start: " PTR_FORMAT ", next: " PTR_FORMAT, p2i(start), p2i(next))); - } -#endif - - do { - oop obj = oop(cur); - assert(obj->is_oop(true), err_msg("Not an oop at " PTR_FORMAT, p2i(cur))); - assert(obj->klass_or_null() != NULL, - err_msg("Unparsable heap at " PTR_FORMAT, p2i(cur))); - - if (g1h->is_obj_dead(obj, this)) { - // Carefully step over dead object. - cur += block_size(cur); - } else { - // Step over live object, and process its references. - cur += obj->size(); - // Non-objArrays are usually marked imprecise at the object - // start, in which case we need to iterate over them in full. - // objArrays are precisely marked, but can still be iterated - // over in full if completely covered. - if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) { - obj->oop_iterate(cl); - } else { - obj->oop_iterate(cl, mr); - } - } - } while (cur < end); - - return true; -} - // Code roots support void HeapRegion::add_strong_code_root(nmethod* nm) { @@ -686,8 +497,8 @@ void HeapRegion::print_on(outputStream* st) const { else st->print(" "); st->print(" TS %5d", _gc_time_stamp); - st->print(" PTAMS " PTR_FORMAT " NTAMS " PTR_FORMAT, - prev_top_at_mark_start(), next_top_at_mark_start()); + st->print(" PTAMS " PTR_FORMAT " NTAMS " PTR_FORMAT " %s ", + prev_top_at_mark_start(), next_top_at_mark_start(), rem_set()->get_state_str()); if (UseNUMA) { G1NUMA* numa = G1NUMA::numa(); if (node_index() < numa->num_active_nodes()) { @@ -775,8 +586,8 @@ public: p, (void*) _containing_obj, from->bottom(), from->end()); print_object(gclog_or_tty, _containing_obj); - gclog_or_tty->print_cr("points to obj " PTR_FORMAT " not in the heap", - (void*) obj); + HeapRegion* const to = _g1h->heap_region_containing(obj); + gclog_or_tty->print_cr("points to obj " PTR_FORMAT " in region " HR_FORMAT " remset %s", p2i(obj), HR_FORMAT_PARAMS(to), to->rem_set()->get_state_str()); } else { HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); HeapRegion* to = _g1h->heap_region_containing((HeapWord*)obj); @@ -825,7 +636,8 @@ public: HeapRegion* to = _g1h->heap_region_containing(obj); if (from != NULL && to != NULL && from != to && - !to->isHumongous()) { + !to->isHumongous() && + to->rem_set()->is_complete()) { jbyte cv_obj = *_bs->byte_for_const(_containing_obj); jbyte cv_field = *_bs->byte_for_const(p); const jbyte dirty = CardTableModRefBS::dirty_card_val(); @@ -852,9 +664,9 @@ public: HR_FORMAT_PARAMS(from)); _containing_obj->print_on(gclog_or_tty); gclog_or_tty->print_cr("points to obj " PTR_FORMAT " " - "in region " HR_FORMAT, + "in region " HR_FORMAT " remset %s", (void*) obj, - HR_FORMAT_PARAMS(to)); + HR_FORMAT_PARAMS(to), to->rem_set()->get_state_str()); if (obj->is_oop()) { obj->print_on(gclog_or_tty); } @@ -882,32 +694,13 @@ void HeapRegion::verify(VerifyOption vo, VerifyLiveClosure vl_cl(g1, vo); VerifyRemSetClosure vr_cl(g1, vo); bool is_humongous = isHumongous(); - bool do_bot_verify = !is_young(); + size_t object_num = 0; while (p < top()) { oop obj = oop(p); size_t obj_size = block_size(p); object_num += 1; - if (is_humongous != g1->isHumongous(obj_size) && - !g1->is_obj_dead(obj, this)) { // Dead objects may have bigger block_size since they span several objects. - gclog_or_tty->print_cr("obj " PTR_FORMAT " is of %shumongous size (" - SIZE_FORMAT " words) in a %shumongous region", - p, g1->isHumongous(obj_size) ? "" : "non-", - obj_size, is_humongous ? "" : "non-"); - *failures = true; - return; - } - - // If it returns false, verify_for_object() will output the - // appropriate message. - if (do_bot_verify && - !g1->is_obj_dead(obj, this) && - !_offsets.verify_for_object(p, obj_size)) { - *failures = true; - return; - } - if (!g1->is_obj_dead_cond(obj, this, vo)) { if (obj->is_oop()) { Klass* klass = obj->klass(); @@ -961,7 +754,20 @@ void HeapRegion::verify(VerifyOption vo, p += obj_size; } - if (p != top()) { + if (!is_young() && !is_empty()) { + _offsets.verify(); + } + + if (is_humongous) { + oop obj = oop(this->humongous_start_region()->bottom()); + if ((HeapWord*)obj > bottom() || (HeapWord*)obj + obj->size() < bottom()) { + gclog_or_tty->print_cr("this humongous region is not part of its' humongous object " PTR_FORMAT, p2i(obj)); + *failures = true; + return; + } + } + + if (!is_humongous && p != top()) { gclog_or_tty->print_cr("end of last object " PTR_FORMAT " " "does not match top " PTR_FORMAT, p, top()); *failures = true; @@ -969,7 +775,6 @@ void HeapRegion::verify(VerifyOption vo, } HeapWord* the_end = end(); - assert(p == top(), "it should still hold"); // Do some extra BOT consistency checking for addresses in the // range [top, end). BOT look-ups in this range should yield // top. No point in doing that if top == end (there's nothing there). @@ -1024,14 +829,6 @@ void HeapRegion::verify(VerifyOption vo, } } - if (is_humongous && object_num > 1) { - gclog_or_tty->print_cr("region [" PTR_FORMAT "," PTR_FORMAT "] is humongous " - "but has " SIZE_FORMAT ", objects", - bottom(), end(), object_num); - *failures = true; - return; - } - verify_strong_code_roots(vo, failures); } @@ -1085,7 +882,6 @@ void HeapRegion::verify_rem_set() const { void G1OffsetTableContigSpace::clear(bool mangle_space) { set_top(bottom()); - _scan_top = bottom(); CompactibleSpace::clear(mangle_space); reset_bot(); } @@ -1096,6 +892,7 @@ void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) { } void G1OffsetTableContigSpace::set_end(HeapWord* new_end) { + assert(new_end == _bottom + HeapRegion::GrainWords, "set_end should only ever be set to _bottom + HeapRegion::GrainWords"); Space::set_end(new_end); _offsets.resize(new_end - bottom()); } @@ -1117,42 +914,15 @@ HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start, return _offsets.threshold(); } -HeapWord* G1OffsetTableContigSpace::scan_top() const { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - HeapWord* local_top = top(); - OrderAccess::loadload(); - const unsigned local_time_stamp = _gc_time_stamp; - assert(local_time_stamp <= g1h->get_gc_time_stamp(), "invariant"); - if (local_time_stamp < g1h->get_gc_time_stamp()) { - return local_top; - } else { - return _scan_top; - } -} - void G1OffsetTableContigSpace::record_timestamp() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp(); if (_gc_time_stamp < curr_gc_time_stamp) { - // Setting the time stamp here tells concurrent readers to look at - // scan_top to know the maximum allowed address to look at. - - // scan_top should be bottom for all regions except for the - // retained old alloc region which should have scan_top == top - HeapWord* st = _scan_top; - guarantee(st == _bottom || st == _top, "invariant"); - _gc_time_stamp = curr_gc_time_stamp; } } -void G1OffsetTableContigSpace::record_retained_region() { - // scan_top is the maximum address where it's safe for the next gc to - // scan this region. - _scan_top = top(); -} - void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) { object_iterate(blk); } @@ -1191,7 +961,6 @@ void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool m CompactibleSpace::initialize(mr, clear_space, mangle_space); _gc_time_stamp = 0; _top = bottom(); - _scan_top = bottom(); set_saved_mark_word(NULL); reset_bot(); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp index 4e0afbac1..830c860e8 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp @@ -48,6 +48,16 @@ // The solution is to remove this method from the definition // of a Space. +// Each heap region is self contained. top() and end() can never +// be set beyond the end of the region. For humongous objects, +// the first region is a StartsHumongous region. If the humongous +// object is larger than a heap region, the following regions will +// be of type ContinuesHumongous. In this case the top() of the +// StartHumongous region and all ContinuesHumongous regions except +// the last will point to their own end. For the last ContinuesHumongous +// region, top() will equal the object's top. + +class CMBitMapRO; class HeapRegionRemSet; class HeapRegionRemSetIterator; class HeapRegion; @@ -64,32 +74,6 @@ class G1RePrepareClosure; // sentinel value for hrm_index #define G1_NO_HRM_INDEX ((uint) -1) -// A dirty card to oop closure for heap regions. It -// knows how to get the G1 heap and how to use the bitmap -// in the concurrent marker used by G1 to filter remembered -// sets. - -class HeapRegionDCTOC : public DirtyCardToOopClosure { -private: - HeapRegion* _hr; - G1ParPushHeapRSClosure* _rs_scan; - G1CollectedHeap* _g1; - - // Walk the given memory region from bottom to (actual) top - // looking for objects and applying the oop closure (_cl) to - // them. The base implementation of this treats the area as - // blocks, where a block may or may not be an object. Sub- - // classes should override this to provide more accurate - // or possibly more efficient walking. - void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top); - -public: - HeapRegionDCTOC(G1CollectedHeap* g1, - HeapRegion* hr, - G1ParPushHeapRSClosure* cl, - CardTableModRefBS::PrecisionStyle precision); -}; - // The complicating factor is that BlockOffsetTable diverged // significantly, and we need functionality that is only in the G1 version. // So I copied that code, which led to an alternate G1 version of @@ -114,7 +98,6 @@ public: class G1OffsetTableContigSpace: public CompactibleSpace { friend class VMStructs; HeapWord* _top; - HeapWord* volatile _scan_top; protected: G1BlockOffsetArrayContigSpace _offsets; Mutex _par_alloc_lock; @@ -158,11 +141,9 @@ class G1OffsetTableContigSpace: public CompactibleSpace { void set_bottom(HeapWord* value); void set_end(HeapWord* value); - HeapWord* scan_top() const; void record_timestamp(); void reset_gc_time_stamp() { _gc_time_stamp = 0; } unsigned get_gc_time_stamp() { return _gc_time_stamp; } - void record_retained_region(); // See the comment above in the declaration of _pre_dummy_top for an // explanation of what it is. @@ -216,6 +197,13 @@ class HeapRegion: public G1OffsetTableContigSpace { void report_region_type_change(G1HeapRegionTraceType::Type to); + // Returns whether the given object address refers to a dead object, and either the + // size of the object (if live) or the size of the block (if dead) in size. + // May + // - only called with obj < top() + // - not called on humongous objects or archive regions + inline bool is_obj_dead_with_size(const oop obj, CMBitMapRO* prev_bitmap, size_t* size) const; + protected: // The index of this region in the heap region sequence. uint _hrm_index; @@ -294,6 +282,18 @@ class HeapRegion: public G1OffsetTableContigSpace { // the total value for the collection set. size_t _predicted_bytes_to_copy; + // Iterate over the references in a humongous objects and apply the given closure + // to them. + // Humongous objects are allocated directly in the old-gen. So we need special + // handling for concurrent processing encountering an in-progress allocation. + template + inline bool do_oops_on_card_in_humongous(MemRegion mr, + Closure* cl, + G1CollectedHeap* g1h); + + // Returns the block size of the given (dead, potentially having its class unloaded) object + // starting at p extending to at most the prev TAMS using the given mark bitmap. + inline size_t block_size_using_bitmap(const HeapWord* p, const CMBitMapRO* prev_bitmap) const; public: HeapRegion(uint hrm_index, G1BlockOffsetSharedArray* sharedOffsetArray, @@ -326,6 +326,14 @@ class HeapRegion: public G1OffsetTableContigSpace { ~((1 << (size_t) LogOfHRGrainBytes) - 1); } + // Returns whether a field is in the same region as the obj it points to. + template + static bool is_in_same_region(T* p, oop obj) { + assert(p != NULL, "p can't be NULL"); + assert(obj != NULL, "obj can't be NULL"); + return (((uintptr_t) p ^ cast_from_oop(obj)) >> LogOfHRGrainBytes) == 0; + } + static size_t max_region_size(); // It sets up the heap region size (GrainBytes / GrainWords), as @@ -339,6 +347,9 @@ class HeapRegion: public G1OffsetTableContigSpace { // All allocated blocks are occupied by objects in a HeapRegion bool block_is_obj(const HeapWord* p) const; + // Returns whether the given object is dead based on TAMS and bitmap. + bool is_obj_dead(const oop obj, const CMBitMapRO* prev_bitmap) const; + // Returns the object size for all valid block starts // and the amount of unallocated words if called on top() size_t block_size(const HeapWord* p) const; @@ -368,8 +379,6 @@ class HeapRegion: public G1OffsetTableContigSpace { size_t garbage_bytes() { size_t used_at_mark_start_bytes = (prev_top_at_mark_start() - bottom()) * HeapWordSize; - assert(used_at_mark_start_bytes >= marked_bytes(), - "Can't mark more than we have."); return used_at_mark_start_bytes - marked_bytes(); } @@ -388,7 +397,6 @@ class HeapRegion: public G1OffsetTableContigSpace { void add_to_marked_bytes(size_t incr_bytes) { _next_marked_bytes = _next_marked_bytes + incr_bytes; - assert(_next_marked_bytes <= used(), "invariant" ); } void zero_marked_bytes() { @@ -420,57 +428,14 @@ class HeapRegion: public G1OffsetTableContigSpace { void set_uncommit_list(bool in) { _in_uncommit_list = in; } bool in_uncommit_list() { return _in_uncommit_list; } - // Return the number of distinct regions that are covered by this region: - // 1 if the region is not humongous, >= 1 if the region is humongous. - uint region_num() const { - if (!isHumongous()) { - return 1U; - } else { - assert(startsHumongous(), "doesn't make sense on HC regions"); - assert(capacity() % HeapRegion::GrainBytes == 0, "sanity"); - return (uint) (capacity() >> HeapRegion::LogOfHRGrainBytes); - } - } - - // Return the index + 1 of the last HC regions that's associated - // with this HS region. - uint last_hc_index() const { - assert(startsHumongous(), "don't call this otherwise"); - return hrm_index() + region_num(); - } - - // Same as Space::is_in_reserved, but will use the original size of the region. - // The original size is different only for start humongous regions. They get - // their _end set up to be the end of the last continues region of the - // corresponding humongous object. - bool is_in_reserved_raw(const void* p) const { - return _bottom <= p && p < _orig_end; - } // Makes the current region be a "starts humongous" region, i.e., // the first region in a series of one or more contiguous regions - // that will contain a single "humongous" object. The two parameters - // are as follows: - // - // new_top : The new value of the top field of this region which - // points to the end of the humongous object that's being - // allocated. If there is more than one region in the series, top - // will lie beyond this region's original end field and on the last - // region in the series. + // that will contain a single "humongous" object. // - // new_end : The new value of the end field of this region which - // points to the end of the last region in the series. If there is - // one region in the series (namely: this one) end will be the same - // as the original end of this region. - // - // Updating top and end as described above makes this region look as - // if it spans the entire space taken up by all the regions in the - // series and an single allocation moved its top to new_top. This - // ensures that the space (capacity / allocated) taken up by all - // humongous regions can be calculated by just looking at the - // "starts humongous" regions and by ignoring the "continues - // humongous" regions. - void set_startsHumongous(HeapWord* new_top, HeapWord* new_end); + // obj_top : points to the top of the humongous object. + // fill_size : size of the filler object at the end of the region series. + void set_startsHumongous(HeapWord* obj_top, size_t fill_size); // Makes the current region be a "continues humongous' // region. first_hr is the "start humongous" region of the series @@ -556,8 +521,6 @@ class HeapRegion: public G1OffsetTableContigSpace { void set_next_dirty_cards_region(HeapRegion* hr) { _next_dirty_cards_region = hr; } bool is_on_dirty_cards_region_list() const { return get_next_dirty_cards_region() != NULL; } - HeapWord* orig_end() const { return _orig_end; } - // Reset HR stuff to default values. void hr_clear(bool par, bool clear_space, bool locked = false); void par_clear(); @@ -603,8 +566,8 @@ class HeapRegion: public G1OffsetTableContigSpace { bool is_marked() { return _prev_top_at_mark_start != bottom(); } void reset_during_compaction() { - assert(isHumongous() && startsHumongous(), - "should only be called for starts humongous regions"); + assert(isHumongous(), + "should only be called for humongous regions"); zero_marked_bytes(); init_top_at_mark_start(); @@ -713,9 +676,9 @@ class HeapRegion: public G1OffsetTableContigSpace { // Returns true if the card was successfully processed, false if an // unparsable part of the heap was encountered, which should only // happen when invoked concurrently with the mutator. - bool oops_on_card_seq_iterate_careful(MemRegion mr, - FilterOutOfRegionClosure* cl, - jbyte* card_ptr); + template + inline bool oops_on_card_seq_iterate_careful(MemRegion mr, + Closure* cl); size_t recorded_rs_length() const { return _recorded_rs_length; } double predicted_elapsed_time_ms() const { return _predicted_elapsed_time_ms; } @@ -784,6 +747,7 @@ class HeapRegion: public G1OffsetTableContigSpace { class HeapRegionClosure : public StackObj { friend class HeapRegionManager; friend class G1CollectedHeap; + friend class CollectionSetChooser; bool _complete; void incomplete() { _complete = false; } diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp index 46e8b00af..d4dd2b9bf 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp @@ -91,15 +91,56 @@ G1OffsetTableContigSpace::block_start_const(const void* p) const { return _offsets.block_start_const(p); } +inline bool HeapRegion::is_obj_dead_with_size(const oop obj, CMBitMapRO* prev_bitmap, size_t* size) const { + HeapWord* addr = (HeapWord*) obj; + + assert(addr < top(), "must be"); + assert(!isHumongous(), "Humongous objects not handled here"); + bool obj_is_dead = is_obj_dead(obj, prev_bitmap); + + if (ClassUnloadingWithConcurrentMark && obj_is_dead) { + assert(!block_is_obj(addr), "must be"); + *size = block_size_using_bitmap(addr, prev_bitmap); + } else { + assert(block_is_obj(addr), "must be"); + *size = obj->size(); + } + return obj_is_dead; +} + inline bool HeapRegion::block_is_obj(const HeapWord* p) const { G1CollectedHeap* g1h = G1CollectedHeap::heap(); + if (!this->is_in(p)) { + assert(continuesHumongous(), "This case can only happen for humongous regions"); + return (p == humongous_start_region()->bottom()); + } if (ClassUnloadingWithConcurrentMark) { return !g1h->is_obj_dead(oop(p), this); } return p < top(); } +inline size_t HeapRegion::block_size_using_bitmap(const HeapWord* addr, const CMBitMapRO* prev_bitmap) const { + assert(ClassUnloadingWithConcurrentMark, + err_msg("All blocks should be objects if G1 Class Unloading isn't used. " + "HR: [" PTR_FORMAT ", " PTR_FORMAT ", " PTR_FORMAT ") " + "addr: " PTR_FORMAT, + p2i(bottom()), p2i(top()), p2i(end()), p2i(addr))); + + // Old regions' dead objects may have dead classes + // We need to find the next live object using the bitmap + HeapWord* next = prev_bitmap->getNextMarkedWordAddress(addr, prev_top_at_mark_start()); + + assert(next > addr, "must get the next live object"); + return pointer_delta(next, addr); +} + +inline bool HeapRegion::is_obj_dead(const oop obj, const CMBitMapRO* prev_bitmap) const { + assert(is_in_reserved(obj), err_msg("Object " PTR_FORMAT " must be in region", p2i(obj))); + return !obj_allocated_since_prev_marking(obj) && !prev_bitmap->isMarked((HeapWord*)obj); +} + inline size_t HeapRegion::block_size(const HeapWord *addr) const { if (addr == top()) { @@ -109,22 +150,7 @@ HeapRegion::block_size(const HeapWord *addr) const { if (block_is_obj(addr)) { return oop(addr)->size(); } - - assert(ClassUnloadingWithConcurrentMark, - err_msg("All blocks should be objects if G1 Class Unloading isn't used. " - "HR: [" PTR_FORMAT ", " PTR_FORMAT ", " PTR_FORMAT ") " - "addr: " PTR_FORMAT, - p2i(bottom()), p2i(top()), p2i(end()), p2i(addr))); - - // Old regions' dead objects may have dead classes - // We need to find the next live object in some other - // manner than getting the oop size - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - HeapWord* next = g1h->concurrent_mark()->prevMarkBitMap()-> - getNextMarkedWordAddress(addr, prev_top_at_mark_start()); - - assert(next > addr, "must get the next live object"); - return pointer_delta(next, addr); + return block_size_using_bitmap(addr, G1CollectedHeap::heap()->concurrent_mark()->prevMarkBitMap()); } inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) { @@ -146,10 +172,6 @@ inline void HeapRegion::note_end_of_marking() { _prev_top_at_mark_start = _next_top_at_mark_start; _prev_marked_bytes = _next_marked_bytes; _next_marked_bytes = 0; - - assert(_prev_marked_bytes <= - (size_t) pointer_delta(prev_top_at_mark_start(), bottom()) * - HeapWordSize, "invariant"); } inline void HeapRegion::note_start_of_copying(bool during_initial_mark) { @@ -195,4 +217,108 @@ inline void HeapRegion::note_end_of_copying(bool during_initial_mark) { } } +template +bool HeapRegion::do_oops_on_card_in_humongous(MemRegion mr, + Closure* cl, + G1CollectedHeap* g1h) { + assert(isHumongous(), "precondition"); + HeapRegion* sr = humongous_start_region(); + oop obj = oop(sr->bottom()); + + // If concurrent and klass_or_null is NULL, then space has been + // allocated but the object has not yet been published by setting + // the klass. That can only happen if the card is stale. However, + // we've already set the card clean, so we must return failure, + // since the allocating thread could have performed a write to the + // card that might be missed otherwise. + if (!is_gc_active && (obj->klass_or_null_acquire() == NULL)) { + return false; + } + + // Only filler objects follow a humongous object in the containing + // regions, and we can ignore those. So only process the one + // humongous object. + if (!g1h->is_obj_dead(obj, sr)) { + if (obj->is_objArray() || (sr->bottom() < mr.start())) { + // objArrays are always marked precisely, so limit processing + // with mr. Non-objArrays might be precisely marked, and since + // it's humongous it's worthwhile avoiding full processing. + // However, the card could be stale and only cover filler + // objects. That should be rare, so not worth checking for; + // instead let it fall out from the bounded iteration. + obj->oop_iterate(cl, mr); + } else { + // If obj is not an objArray and mr contains the start of the + // obj, then this could be an imprecise mark, and we need to + // process the entire object. + obj->oop_iterate(cl); + } + } + return true; +} +template +bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr, + Closure* cl) { + + assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // Special handling for humongous regions. + if (isHumongous()) { + return do_oops_on_card_in_humongous(mr, cl, g1h); + } + + // During GC we limit mr by scan_top. So we never get here with an + // mr covering objects allocated during GC. Non-humongous objects + // are only allocated in the old-gen during GC. So the parts of the + // heap that may be examined here are always parsable; there's no + // need to use klass_or_null here to detect in-progress allocations. + + // Cache the boundaries of the memory region in some const locals + HeapWord* const start = mr.start(); + HeapWord* const end = mr.end(); + + // Find the obj that extends onto mr.start(). + // Update BOT as needed while finding start of (possibly dead) + // object containing the start of the region. + HeapWord* cur = block_start(start); + +#ifdef ASSERT + { + assert(cur <= start, + err_msg("cur: " PTR_FORMAT ", start: " PTR_FORMAT, p2i(cur), p2i(start))); + HeapWord* next = cur + block_size(cur); + assert(start < next, + err_msg("start: " PTR_FORMAT ", next: " PTR_FORMAT, p2i(start), p2i(next))); + } +#endif + + CMBitMapRO* bitmap = g1h->concurrent_mark()->prevMarkBitMap(); + do { + oop obj = oop(cur); + assert(obj->is_oop(true), err_msg("Not an oop at " PTR_FORMAT, p2i(cur))); + assert(obj->klass_or_null() != NULL, + err_msg("Unparsable heap at " PTR_FORMAT, p2i(cur))); + + size_t size; + bool is_dead = is_obj_dead_with_size(obj, bitmap, &size); + + cur += size; + if (!is_dead) { + // Process live object's references. + // Non-objArrays are usually marked imprecise at the object + // start, in which case we need to iterate over them in full. + // objArrays are precisely marked, but can still be iterated + // over in full if completely covered. + if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) { + obj->oop_iterate(cl); + } else { + obj->oop_iterate(cl, mr); + } + } + } while (cur < end); + + return true; +} + #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp index 83513b3b8..b9cf3410f 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp @@ -397,63 +397,23 @@ void HeapRegionManager::par_iterate(HeapRegionClosure* blk, uint worker_id, Heap // are currently not committed. // This also (potentially) iterates over regions newly allocated during GC. This // is no problem except for some extra work. - for (uint count = 0; count < _allocated_heapregions_length; count++) { - const uint index = (start_index + count) % _allocated_heapregions_length; - assert(0 <= index && index < _allocated_heapregions_length, "sanity"); + const uint n_regions = hrclaimer->n_regions(); + for (uint count = 0; count < n_regions; count++) { + const uint index = (start_index + count) % n_regions; + assert(0 <= index && index < n_regions, "sanity"); // Skip over unavailable regions HeapRegion* r = _regions.get_by_index(index); if (r != NULL && r->in_uncommit_list() || !_available_map.at(index)) { continue; } - // We'll ignore "continues humongous" regions (we'll process them - // when we come across their corresponding "start humongous" - // region) and regions already claimed. - if (hrclaimer->is_region_claimed(index) || r->continuesHumongous()) { + // We'll ignore regions already claimed. + if (hrclaimer->is_region_claimed(index)) { continue; } // OK, try to claim it if (!hrclaimer->claim_region(index)) { continue; } - // Success! - if (r->startsHumongous()) { - // If the region is "starts humongous" we'll iterate over its - // "continues humongous" first; in fact we'll do them - // first. The order is important. In one case, calling the - // closure on the "starts humongous" region might de-allocate - // and clear all its "continues humongous" regions and, as a - // result, we might end up processing them twice. So, we'll do - // them first (note: most closures will ignore them anyway) and - // then we'll do the "starts humongous" region. - for (uint ch_index = index + 1; ch_index < index + r->region_num(); ch_index++) { - HeapRegion* chr = _regions.get_by_index(ch_index); - - assert(chr->continuesHumongous(), "Must be humongous region"); - assert(chr->humongous_start_region() == r, - err_msg("Must work on humongous continuation of the original start region " - PTR_FORMAT ", but is " PTR_FORMAT, p2i(r), p2i(chr))); - assert(!hrclaimer->is_region_claimed(ch_index), - "Must not have been claimed yet because claiming of humongous continuation first claims the start region"); - - bool claim_result = hrclaimer->claim_region(ch_index); - // We should always be able to claim it; no one else should - // be trying to claim this region. - guarantee(claim_result, "We should always be able to claim the continuesHumongous part of the humongous object"); - - bool res2 = blk->doHeapRegion(chr); - if (res2) { - return; - } - - // Right now, this holds (i.e., no closure that actually - // does something with "continues humongous" regions - // clears them). We might have to weaken it in the future, - // but let's leave these two asserts here for extra safety. - assert(chr->continuesHumongous(), "should still be the case"); - assert(chr->humongous_start_region() == r, "sanity"); - } - } - bool res = blk->doHeapRegion(r); if (res) { return; @@ -551,11 +511,7 @@ void HeapRegionManager::verify() { // this method may be called, we have only completed allocation of the regions, // but not put into a region set. prev_committed = true; - if (hr->startsHumongous()) { - prev_end = hr->orig_end(); - } else { - prev_end = hr->end(); - } + prev_end = hr->end(); } for (uint i = _allocated_heapregions_length; i < max_length(); i++) { guarantee(_regions.get_by_index(i) == NULL, err_msg("invariant i: %u", i)); diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp index 3950d6280..38db9c660 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp @@ -52,6 +52,10 @@ public: assert(n_workers > 0, "Need at least one worker."); _n_workers = n_workers; } + + inline uint n_regions() const { + return _n_regions; + } // Calculate the starting region for given worker so // that they do not all start from the same region. uint offset_for_worker(uint worker_id) const; @@ -188,6 +192,10 @@ public: // is valid. inline HeapRegion* at(uint index) const; + // Return the next region (by index) that is part of the same + // humongous object that hr is part of. + inline HeapRegion* next_region_in_humongous(HeapRegion* hr) const; + // If addr is within the committed space return its corresponding // HeapRegion, otherwise return NULL. inline HeapRegion* addr_to_region(HeapWord* addr) const; diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.inline.hpp index 50d0fa832..9ac7edda9 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.inline.hpp @@ -47,6 +47,18 @@ inline HeapRegion* HeapRegionManager::at(uint index) const { return hr; } +inline HeapRegion* HeapRegionManager::next_region_in_humongous(HeapRegion* hr) const { + uint index = hr->hrm_index(); + assert(is_available(index), "pre-condition"); + assert(hr->isHumongous(), "next_region_in_humongous should only be called for a humongous region."); + index++; + if (index < max_length() && is_available(index) && at(index)->continuesHumongous()) { + return at(index); + } else { + return NULL; + } +} + inline void HeapRegionManager::insert_into_free_list(HeapRegion* hr) { _free_list.add_ordered(hr); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp index 9e9391ba6..18b2e95da 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp @@ -38,6 +38,8 @@ PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC +const char* HeapRegionRemSet::_state_strings[] = {"Untracked", "Updating", "Complete"}; + class PerRegionTable: public CHeapObj { friend class OtherRegionsTable; friend class HeapRegionRemSetIterator; @@ -62,10 +64,6 @@ protected: // We need access in order to union things into the base table. BitMap* bm() { return &_bm; } - void recount_occupied() { - _occupied = (jint) bm()->count_one_bits(); - } - PerRegionTable(HeapRegion* hr) : _hr(hr), _occupied(0), @@ -102,7 +100,7 @@ protected: // If the test below fails, then this table was reused concurrently // with this operation. This is OK, since the old table was coarsened, // and adding a bit to the new table is never incorrect. - if (loc_hr->is_in_reserved_raw(from)) { + if (loc_hr->is_in_reserved(from)) { CardIdx_t from_card = OtherRegionsTable::card_within_region(from, loc_hr); add_card_work(from_card, par); } @@ -141,13 +139,6 @@ public: add_reference_work(from, /*parallel*/ false); } - void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) { - HeapWord* hr_bot = hr()->bottom(); - size_t hr_first_card_index = ctbs->index_for(hr_bot); - bm()->set_intersection_at_offset(*card_bm, hr_first_card_index); - recount_occupied(); - } - void add_card(CardIdx_t from_card_index) { add_card_work(from_card_index, /*parallel*/ true); } @@ -448,7 +439,7 @@ void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) { } // Note that this may be a continued H region. - HeapRegion* from_hr = _g1h->heap_region_containing_raw(from); + HeapRegion* from_hr = _g1h->heap_region_containing(from); RegionIdx_t from_hrm_ind = (RegionIdx_t) from_hr->hrm_index(); // If the region is already coarsened, return. @@ -557,7 +548,7 @@ void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) { hr()->bottom(), from); } } - assert(contains_reference(from), err_msg("We just added " PTR_FORMAT " to the PRT", from)); + assert(contains_reference(from), err_msg("We just added " PTR_FORMAT " to the PRT(%d)", from, prt->contains_reference(from))); } PerRegionTable* @@ -637,74 +628,6 @@ PerRegionTable* OtherRegionsTable::delete_region_table() { return max; } - -// At present, this must be called stop-world single-threaded. -void OtherRegionsTable::scrub(CardTableModRefBS* ctbs, - BitMap* region_bm, BitMap* card_bm) { - // First eliminated garbage regions from the coarse map. - if (G1RSScrubVerbose) { - gclog_or_tty->print_cr("Scrubbing region %u:", hr()->hrm_index()); - } - - assert(_coarse_map.size() == region_bm->size(), "Precondition"); - if (G1RSScrubVerbose) { - gclog_or_tty->print(" Coarse map: before = " SIZE_FORMAT "...", - _n_coarse_entries); - } - _coarse_map.set_intersection(*region_bm); - _n_coarse_entries = _coarse_map.count_one_bits(); - if (G1RSScrubVerbose) { - gclog_or_tty->print_cr(" after = " SIZE_FORMAT ".", _n_coarse_entries); - } - - // Now do the fine-grained maps. - for (size_t i = 0; i < _max_fine_entries; i++) { - PerRegionTable* cur = _fine_grain_regions[i]; - PerRegionTable** prev = &_fine_grain_regions[i]; - while (cur != NULL) { - PerRegionTable* nxt = cur->collision_list_next(); - // If the entire region is dead, eliminate. - if (G1RSScrubVerbose) { - gclog_or_tty->print_cr(" For other region %u:", - cur->hr()->hrm_index()); - } - if (!region_bm->at((size_t) cur->hr()->hrm_index())) { - *prev = nxt; - cur->set_collision_list_next(NULL); - _n_fine_entries--; - if (G1RSScrubVerbose) { - gclog_or_tty->print_cr(" deleted via region map."); - } - unlink_from_all(cur); - PerRegionTable::free(cur); - } else { - // Do fine-grain elimination. - if (G1RSScrubVerbose) { - gclog_or_tty->print(" occ: before = %4d.", cur->occupied()); - } - cur->scrub(ctbs, card_bm); - if (G1RSScrubVerbose) { - gclog_or_tty->print_cr(" after = %4d.", cur->occupied()); - } - // Did that empty the table completely? - if (cur->occupied() == 0) { - *prev = nxt; - cur->set_collision_list_next(NULL); - _n_fine_entries--; - unlink_from_all(cur); - PerRegionTable::free(cur); - } else { - prev = cur->collision_list_next_addr(); - } - } - cur = nxt; - } - } - // Since we may have deleted a from_card_cache entry from the RS, clear - // the FCC. - clear_fcc(); -} - bool OtherRegionsTable::occupancy_less_or_equal_than(size_t limit) const { if (limit <= (size_t)G1RSetSparseRegionEntries) { return occ_coarse() == 0 && _first_all_fine_prts == NULL && occ_sparse() <= limit; @@ -824,7 +747,7 @@ bool OtherRegionsTable::contains_reference(OopOrNarrowOopStar from) const { } bool OtherRegionsTable::contains_reference_locked(OopOrNarrowOopStar from) const { - HeapRegion* hr = _g1h->heap_region_containing_raw(from); + HeapRegion* hr = _g1h->heap_region_containing(from); RegionIdx_t hr_ind = (RegionIdx_t) hr->hrm_index(); // Is this region in the coarse map? if (_coarse_map.at(hr_ind)) return true; @@ -856,15 +779,16 @@ OtherRegionsTable::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) { // This can be done by either mutator threads together with the // concurrent refinement threads or GC threads. uint HeapRegionRemSet::num_par_rem_sets() { - return MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), (uint)ParallelGCThreads); + return DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num() + MAX2(ConcGCThreads, ParallelGCThreads); } HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr) : _bosa(bosa), _m(Mutex::leaf, FormatBuffer<128>("HeapRegionRemSet lock #%u", hr->hrm_index()), true), - _code_roots(), _other_regions(hr, &_m), _iter_state(Unclaimed), _iter_claimed(0) { - reset_for_par_iteration(); + _code_roots(), + _state(Untracked), + _other_regions(hr, &_m) { } void HeapRegionRemSet::setup_remset_size() { @@ -881,20 +805,6 @@ void HeapRegionRemSet::setup_remset_size() { guarantee(G1RSetSparseRegionEntries > 0 && G1RSetRegionEntries > 0 , "Sanity"); } -bool HeapRegionRemSet::claim_iter() { - if (_iter_state != Unclaimed) return false; - jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed); - return (res == Unclaimed); -} - -void HeapRegionRemSet::set_iter_complete() { - _iter_state = Complete; -} - -bool HeapRegionRemSet::iter_is_complete() { - return _iter_state == Complete; -} - #ifndef PRODUCT void HeapRegionRemSet::print() { HeapRegionRemSetIterator iter(this); @@ -921,28 +831,18 @@ void HeapRegionRemSet::cleanup() { SparsePRT::cleanup_all(); } -void HeapRegionRemSet::clear() { +void HeapRegionRemSet::clear(bool only_cardset) { MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); - clear_locked(); + clear_locked(only_cardset); } -void HeapRegionRemSet::clear_locked() { - _code_roots.clear(); +void HeapRegionRemSet::clear_locked(bool only_cardset) { + if (!only_cardset) { + _code_roots.clear(); + } _other_regions.clear(); + set_state_empty(); assert(occupied_locked() == 0, "Should be clear."); - reset_for_par_iteration(); -} - -void HeapRegionRemSet::reset_for_par_iteration() { - _iter_state = Unclaimed; - _iter_claimed = 0; - // It's good to check this to make sure that the two methods are in sync. - assert(verify_ready_for_par_iteration(), "post-condition"); -} - -void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs, - BitMap* region_bm, BitMap* card_bm) { - _other_regions.scrub(ctbs, region_bm, card_bm); } // Code roots support diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp index 6659dc550..cb7bc9c6a 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -178,17 +178,13 @@ public: // Returns the card index of the given within_region pointer relative to the bottom ————————————————————heapRegionRemSet.hpp:312 OtherRegionsTable // of the given heap region. static CardIdx_t card_within_region(OopOrNarrowOopStar within_region, HeapRegion* hr); - // For now. Could "expand" some tables in the future, so that this made - // sense. + // Adds the reference from "from to this remembered set. void add_reference(OopOrNarrowOopStar from, int tid); // Returns whether this remembered set (and all sub-sets) have an occupancy // that is less or equal than the given occupancy. bool occupancy_less_or_equal_than(size_t limit) const; - // Removes any entries shown by the given bitmaps to contain only dead - // objects. - void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); // Returns whether this remembered set (and all sub-sets) contain no entries. bool is_empty() const; @@ -248,10 +244,6 @@ private: OtherRegionsTable _other_regions; - enum ParIterState { Unclaimed, Claimed, Complete }; - volatile ParIterState _iter_state; - volatile jlong _iter_claimed; - // Unused unless G1RecordHRRSOops is true. static const int MaxRecorded = 1000000; @@ -304,50 +296,62 @@ public: } static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); } +private: + enum RemSetState { + Untracked, + Updating, + Complete + }; + + RemSetState _state; + + static const char* _state_strings[]; + public: + + const char* get_state_str() const { return _state_strings[_state]; } + + bool is_tracked() { return _state != Untracked; } + bool is_updating() { return _state == Updating; } + bool is_complete() { return _state == Complete; } + + void set_state_empty() { + guarantee(SafepointSynchronize::is_at_safepoint() || !is_tracked(), "Should only set to Untracked during safepoint"); + if (_state == Untracked) { + return; + } + _other_regions.clear_fcc(); + _state = Untracked; + } + + void set_state_updating() { + guarantee(SafepointSynchronize::is_at_safepoint() && !is_tracked(), "Should only set to Updating from Untracked during safepoint "); + _other_regions.clear_fcc(); + _state = Updating; + } + + void set_state_complete() { + _other_regions.clear_fcc(); + _state = Complete; + } // Used in the sequential case. void add_reference(OopOrNarrowOopStar from) { - _other_regions.add_reference(from, 0); + add_reference(from, 0); } // Used in the parallel case. void add_reference(OopOrNarrowOopStar from, int tid) { + RemSetState state = _state; + if (state == Untracked) { + return; + } _other_regions.add_reference(from, tid); } - // Removes any entries shown by the given bitmaps to contain only dead - // objects. - void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); - // The region is being reclaimed; clear its remset, and any mention of // entries for this region in other remsets. - void clear(); - void clear_locked(); - - // Attempt to claim the region. Returns true iff this call caused an - // atomic transition from Unclaimed to Claimed. - bool claim_iter(); - // Sets the iteration state to "complete". - void set_iter_complete(); - // Returns "true" iff the region's iteration is complete. - bool iter_is_complete(); - - // Support for claiming blocks of cards during iteration - size_t iter_claimed() const { return (size_t)_iter_claimed; } - // Claim the next block of cards - size_t iter_claimed_next(size_t step) { - size_t current, next; - do { - current = iter_claimed(); - next = current + step; - } while (Atomic::cmpxchg((jlong)next, &_iter_claimed, (jlong)current) != (jlong)current); - return current; - } - void reset_for_par_iteration(); - - bool verify_ready_for_par_iteration() { - return (_iter_state == Unclaimed) && (_iter_claimed == 0); - } + void clear(bool only_cardset = false); + void clear_locked(bool only_cardset = false); // The actual # of bytes this hr_remset takes up. // Note also includes the strong code root set. diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp index 881bab784..f3f52db51 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp @@ -46,7 +46,6 @@ void HeapRegionSetBase::verify_region(HeapRegion* hr) { assert(hr->is_free() == regions_free(), err_msg("Wrong free state for region %u and set %s", hr->hrm_index(), name())); assert(!hr->is_free() || hr->is_empty(), err_msg("Free region %u is not empty for set %s", hr->hrm_index(), name())); assert(!hr->is_empty() || hr->is_free(), err_msg("Empty region %u is not free for set %s", hr->hrm_index(), name())); - assert(hr->rem_set()->verify_ready_for_par_iteration(), err_msg("Wrong iteration state %u", hr->hrm_index())); } #endif diff --git a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp index 8c70b6795..a14acb3e3 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp @@ -90,7 +90,7 @@ inline bool requires_marking(const void* entry, G1CollectedHeap* heap) { assert(heap->is_in_reserved(entry), err_msg("Non-heap pointer in SATB buffer: " PTR_FORMAT, p2i(entry))); - HeapRegion* region = heap->heap_region_containing_raw(entry); + HeapRegion* region = heap->heap_region_containing(entry); assert(region != NULL, err_msg("No region for " PTR_FORMAT, p2i(entry))); if (entry >= region->next_top_at_mark_start()) { return false; diff --git a/hotspot/src/share/vm/memory/allocation.hpp b/hotspot/src/share/vm/memory/allocation.hpp index 4d324b442..d72c28b59 100644 --- a/hotspot/src/share/vm/memory/allocation.hpp +++ b/hotspot/src/share/vm/memory/allocation.hpp @@ -754,4 +754,17 @@ class ArrayAllocator VALUE_OBJ_CLASS_SPEC { void free(); }; +// Uses mmaped memory for all allocations. All allocations are initially +// zero-filled. No pre-touching. +template +class MmapArrayAllocator : public AllStatic { + private: + static size_t size_for(size_t length); + + public: + static E* allocate_or_null(size_t length, MEMFLAGS flags); + static E* allocate(size_t length, MEMFLAGS flags); + static void free(E* addr, size_t length); +}; + #endif // SHARE_VM_MEMORY_ALLOCATION_HPP diff --git a/hotspot/src/share/vm/memory/allocation.inline.hpp b/hotspot/src/share/vm/memory/allocation.inline.hpp index 2e794a8b6..9d6e1313a 100644 --- a/hotspot/src/share/vm/memory/allocation.inline.hpp +++ b/hotspot/src/share/vm/memory/allocation.inline.hpp @@ -215,4 +215,50 @@ void ArrayAllocator::free() { } } +template +size_t MmapArrayAllocator::size_for(size_t length) { + size_t size = length * sizeof(E); + int alignment = os::vm_allocation_granularity(); + return align_size_up_(size, alignment); +} + +template +E* MmapArrayAllocator::allocate_or_null(size_t length, MEMFLAGS flags) { + size_t size = size_for(length); + int alignment = os::vm_allocation_granularity(); + + char* addr = os::reserve_memory(size, NULL, alignment, flags); + if (addr == NULL) { + return NULL; + } + + if (os::commit_memory(addr, size, !ExecMem)) { + return (E*)addr; + } else { + os::release_memory(addr, size); + return NULL; + } +} + +template +E* MmapArrayAllocator::allocate(size_t length, MEMFLAGS flags) { + size_t size = size_for(length); + int alignment = os::vm_allocation_granularity(); + + char* addr = os::reserve_memory(size, NULL, alignment, flags); + if (addr == NULL) { + vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "Allocator (reserve)"); + } + + os::commit_memory_or_exit(addr, size, !ExecMem, "Allocator (commit)"); + + return (E*)addr; +} + +template +void MmapArrayAllocator::free(E* addr, size_t length) { + bool result = os::release_memory((char*)addr, size_for(length)); + assert(result, "Failed to release memory"); +} + #endif // SHARE_VM_MEMORY_ALLOCATION_INLINE_HPP diff --git a/hotspot/src/share/vm/memory/collectorPolicy.hpp b/hotspot/src/share/vm/memory/collectorPolicy.hpp index e0982bafc..c924c2e1d 100644 --- a/hotspot/src/share/vm/memory/collectorPolicy.hpp +++ b/hotspot/src/share/vm/memory/collectorPolicy.hpp @@ -217,6 +217,8 @@ class ClearedAllSoftRefs : public StackObj { _collector_policy->cleared_all_soft_refs(); } } + + bool should_clear() { return _clear_all_soft_refs; } }; class GenCollectorPolicy : public CollectorPolicy { diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp index 015178b61..35e4a8e3c 100644 --- a/hotspot/src/share/vm/runtime/atomic.hpp +++ b/hotspot/src/share/vm/runtime/atomic.hpp @@ -57,6 +57,7 @@ class Atomic : AllStatic { // Atomically add to a location, return updated value inline static jint add (jint add_value, volatile jint* dest); + inline static size_t add (size_t add_value, volatile size_t* dest); inline static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest); inline static void* add_ptr(intptr_t add_value, volatile void* dest); // See comment above about using jlong atomics on 32-bit platforms @@ -65,12 +66,14 @@ class Atomic : AllStatic { // Atomically increment location inline static void inc (volatile jint* dest); static void inc (volatile jshort* dest); + inline static void inc (volatile size_t* dest); inline static void inc_ptr(volatile intptr_t* dest); inline static void inc_ptr(volatile void* dest); // Atomically decrement a location inline static void dec (volatile jint* dest); static void dec (volatile jshort* dest); + inline static void dec (volatile size_t* dest); inline static void dec_ptr(volatile intptr_t* dest); inline static void dec_ptr(volatile void* dest); diff --git a/hotspot/src/share/vm/runtime/atomic.inline.hpp b/hotspot/src/share/vm/runtime/atomic.inline.hpp index 222f29cbf..b0e17e5f9 100644 --- a/hotspot/src/share/vm/runtime/atomic.inline.hpp +++ b/hotspot/src/share/vm/runtime/atomic.inline.hpp @@ -73,4 +73,20 @@ # include "atomic_bsd_zero.inline.hpp" #endif +// size_t casts... +#if (SIZE_MAX != UINTPTR_MAX) +#error size_t is not WORD_SIZE, interesting platform, but missing implementation here +#endif + +inline size_t Atomic::add(size_t add_value, volatile size_t* dest) { + return (size_t) add_ptr((intptr_t) add_value, (volatile intptr_t*) dest); +} + +inline void Atomic::inc(volatile size_t* dest) { + inc_ptr((volatile intptr_t*) dest); +} + +inline void Atomic::dec(volatile size_t* dest) { + dec_ptr((volatile intptr_t*) dest); +} #endif // SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP diff --git a/hotspot/src/share/vm/utilities/bitMap.cpp b/hotspot/src/share/vm/utilities/bitMap.cpp index 12b4b4160..c06c8463e 100644 --- a/hotspot/src/share/vm/utilities/bitMap.cpp +++ b/hotspot/src/share/vm/utilities/bitMap.cpp @@ -78,6 +78,10 @@ void BitMap::resize(idx_t size_in_bits, bool in_resource_area) { } } +void BitMap::pretouch() { + os::pretouch_memory((char*)word_addr(0), (char*)word_addr(size())); +} + void BitMap::set_range_within_word(idx_t beg, idx_t end) { // With a valid range (beg <= end), this test ensures that end != 0, as // required by inverted_bit_mask_for_range. Also avoids an unnecessary write. diff --git a/hotspot/src/share/vm/utilities/bitMap.hpp b/hotspot/src/share/vm/utilities/bitMap.hpp index 08452bd90..d5879b83e 100644 --- a/hotspot/src/share/vm/utilities/bitMap.hpp +++ b/hotspot/src/share/vm/utilities/bitMap.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -143,10 +143,18 @@ class BitMap VALUE_OBJ_CLASS_SPEC { // use the same value for "in_resource_area".) void resize(idx_t size_in_bits, bool in_resource_area = true); + // Pretouch the entire range of memory this BitMap covers. + void pretouch(); + // Accessing idx_t size() const { return _size; } + idx_t size_in_bytes() const { return size_in_words() * BytesPerWord; } idx_t size_in_words() const { - return word_index(size() + BitsPerWord - 1); + return calc_size_in_words(size()); + } + + static idx_t calc_size_in_words(size_t size_in_bits) { + return word_index(size_in_bits + BitsPerWord - 1); } bool at(idx_t index) const { diff --git a/hotspot/test/gc/g1/Test2GbHeap.java b/hotspot/test/gc/g1/Test2GbHeap.java index 6b0cd3b8d..054df1b28 100644 --- a/hotspot/test/gc/g1/Test2GbHeap.java +++ b/hotspot/test/gc/g1/Test2GbHeap.java @@ -25,6 +25,9 @@ * @test Test2GbHeap * @bug 8031686 * @summary Regression test to ensure we can start G1 with 2gb heap. + * Skip test on 32 bit Windows: it typically does not support the many and large virtual memory reservations needed. + * @requires (vm.gc == "G1" | vm.gc == "null") + * @requires !((sun.arch.data.model == "32") & (os.family == "windows")) * @key gc * @key regression * @library /testlibrary diff --git a/hotspot/test/gc/g1/TestGCLogMessages.java b/hotspot/test/gc/g1/TestGCLogMessages.java index 1a4d6532b..97232582b 100644 --- a/hotspot/test/gc/g1/TestGCLogMessages.java +++ b/hotspot/test/gc/g1/TestGCLogMessages.java @@ -23,7 +23,7 @@ /* * @test TestGCLogMessages - * @bug 8035406 8027295 8035398 8019342 8027959 8048179 8027962 + * @bug 8035406 8027295 8035398 8019342 8027959 8048179 8027962 8069330 * @summary Ensure that the PrintGCDetails output for a minor GC with G1 * includes the expected necessary messages. * @key gc @@ -53,6 +53,8 @@ public class TestGCLogMessages { }; private LogMessageWithLevel allLogMessages[] = new LogMessageWithLevel[] { + // Update RS + new LogMessageWithLevel("Scan HCC (ms)", Level.FINER), // Ext Root Scan new LogMessageWithLevel("Thread Roots (ms)", Level.FINEST), new LogMessageWithLevel("StringTable Roots (ms)", Level.FINEST), -- 2.22.0