Skip to content

Make chunk map as global side metadata, and each space only lists its own chunks using chunk map #1304

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions src/policy/immix/immixspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,6 @@ impl<VM: VMBinding> ImmixSpace<VM> {
vec![
MetadataSpec::OnSide(Block::DEFRAG_STATE_TABLE),
MetadataSpec::OnSide(Block::MARK_TABLE),
MetadataSpec::OnSide(ChunkMap::ALLOC_TABLE),
*VM::VMObjectModel::LOCAL_MARK_BIT_SPEC,
*VM::VMObjectModel::LOCAL_FORWARDING_BITS_SPEC,
*VM::VMObjectModel::LOCAL_FORWARDING_POINTER_SPEC,
Expand All @@ -264,7 +263,6 @@ impl<VM: VMBinding> ImmixSpace<VM> {
MetadataSpec::OnSide(Line::MARK_TABLE),
MetadataSpec::OnSide(Block::DEFRAG_STATE_TABLE),
MetadataSpec::OnSide(Block::MARK_TABLE),
MetadataSpec::OnSide(ChunkMap::ALLOC_TABLE),
*VM::VMObjectModel::LOCAL_MARK_BIT_SPEC,
*VM::VMObjectModel::LOCAL_FORWARDING_BITS_SPEC,
*VM::VMObjectModel::LOCAL_FORWARDING_POINTER_SPEC,
Expand Down Expand Up @@ -299,6 +297,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
let scheduler = args.scheduler.clone();
let common =
CommonSpace::new(args.into_policy_args(true, false, Self::side_metadata_specs()));
let space_index = common.descriptor.get_index();
ImmixSpace {
pr: if common.vmrequest.is_discontiguous() {
BlockPageResource::new_discontiguous(
Expand All @@ -316,7 +315,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
)
},
common,
chunk_map: ChunkMap::new(),
chunk_map: ChunkMap::new(space_index),
line_mark_state: AtomicU8::new(Line::RESET_MARK_STATE),
line_unavail_state: AtomicU8::new(Line::RESET_MARK_STATE),
lines_consumed: AtomicUsize::new(0),
Expand Down Expand Up @@ -524,7 +523,7 @@ impl<VM: VMBinding> ImmixSpace<VM> {
self.defrag.notify_new_clean_block(copy);
let block = Block::from_aligned_address(block_address);
block.init(copy);
self.chunk_map.set(block.chunk(), ChunkState::Allocated);
self.chunk_map.set_allocated(block.chunk(), true);
self.lines_consumed
.fetch_add(Block::LINES, Ordering::SeqCst);
Some(block)
Expand Down Expand Up @@ -899,7 +898,7 @@ struct SweepChunk<VM: VMBinding> {

impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
fn do_work(&mut self, _worker: &mut GCWorker<VM>, mmtk: &'static MMTK<VM>) {
assert_eq!(self.space.chunk_map.get(self.chunk), ChunkState::Allocated);
assert!(self.space.chunk_map.get(self.chunk).unwrap().is_allocated());

let mut histogram = self.space.defrag.new_histogram();
let line_mark_state = if super::BLOCK_ONLY {
Expand Down Expand Up @@ -950,7 +949,7 @@ impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
probe!(mmtk, sweep_chunk, allocated_blocks);
// Set this chunk as free if there is not live blocks.
if allocated_blocks == 0 {
self.space.chunk_map.set(self.chunk, ChunkState::Free)
self.space.chunk_map.set_allocated(self.chunk, false)
}
self.space.defrag.add_completed_mark_histogram(histogram);
self.epilogue.finish_one_work_packet();
Expand Down
16 changes: 8 additions & 8 deletions src/policy/marksweepspace/native_ms/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
let vm_map = args.vm_map;
let is_discontiguous = args.vmrequest.is_discontiguous();
let local_specs = {
metadata::extract_side_metadata(&vec![
metadata::extract_side_metadata(&[
MetadataSpec::OnSide(Block::NEXT_BLOCK_TABLE),
MetadataSpec::OnSide(Block::PREV_BLOCK_TABLE),
MetadataSpec::OnSide(Block::FREE_LIST_TABLE),
Expand All @@ -300,11 +300,11 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
MetadataSpec::OnSide(Block::BLOCK_LIST_TABLE),
MetadataSpec::OnSide(Block::TLS_TABLE),
MetadataSpec::OnSide(Block::MARK_TABLE),
MetadataSpec::OnSide(ChunkMap::ALLOC_TABLE),
*VM::VMObjectModel::LOCAL_MARK_BIT_SPEC,
])
};
let common = CommonSpace::new(args.into_policy_args(false, false, local_specs));
let space_index = common.descriptor.get_index();
MarkSweepSpace {
pr: if is_discontiguous {
BlockPageResource::new_discontiguous(
Expand All @@ -322,7 +322,7 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
)
},
common,
chunk_map: ChunkMap::new(),
chunk_map: ChunkMap::new(space_index),
scheduler,
abandoned: Mutex::new(AbandonedBlockLists::new()),
abandoned_in_gc: Mutex::new(AbandonedBlockLists::new()),
Expand Down Expand Up @@ -402,7 +402,7 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {

pub fn record_new_block(&self, block: Block) {
block.init();
self.chunk_map.set(block.chunk(), ChunkState::Allocated);
self.chunk_map.set_allocated(block.chunk(), true);
}

pub fn prepare(&mut self, full_heap: bool) {
Expand Down Expand Up @@ -567,7 +567,7 @@ struct PrepareChunkMap<VM: VMBinding> {

impl<VM: VMBinding> GCWork<VM> for PrepareChunkMap<VM> {
fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
debug_assert!(self.space.chunk_map.get(self.chunk) == ChunkState::Allocated);
debug_assert!(self.space.chunk_map.get(self.chunk).unwrap().is_allocated());
// number of allocated blocks.
let mut n_occupied_blocks = 0;
self.chunk
Expand All @@ -581,7 +581,7 @@ impl<VM: VMBinding> GCWork<VM> for PrepareChunkMap<VM> {
});
if n_occupied_blocks == 0 {
// Set this chunk as free if there is no live blocks.
self.space.chunk_map.set(self.chunk, ChunkState::Free)
self.space.chunk_map.set_allocated(self.chunk, false)
} else {
// Otherwise this chunk is occupied, and we reset the mark bit if it is on the side.
if let MetadataSpec::OnSide(side) = *VM::VMObjectModel::LOCAL_MARK_BIT_SPEC {
Expand Down Expand Up @@ -617,7 +617,7 @@ struct SweepChunk<VM: VMBinding> {

impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
fn do_work(&mut self, _worker: &mut GCWorker<VM>, _mmtk: &'static MMTK<VM>) {
assert_eq!(self.space.chunk_map.get(self.chunk), ChunkState::Allocated);
assert!(self.space.chunk_map.get(self.chunk).unwrap().is_allocated());

// number of allocated blocks.
let mut allocated_blocks = 0;
Expand All @@ -636,7 +636,7 @@ impl<VM: VMBinding> GCWork<VM> for SweepChunk<VM> {
probe!(mmtk, sweep_chunk, allocated_blocks);
// Set this chunk as free if there is not live blocks.
if allocated_blocks == 0 {
self.space.chunk_map.set(self.chunk, ChunkState::Free)
self.space.chunk_map.set_allocated(self.chunk, false);
}
self.epilogue.finish_one_work_packet();
}
Expand Down
133 changes: 100 additions & 33 deletions src/util/heap/chunk_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,68 @@ impl Chunk {
}
}

/// Chunk allocation state
#[repr(u8)]
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum ChunkState {
/// The chunk is not allocated.
Free = 0,
/// The chunk is allocated.
Allocated = 1,
/// The allocation state for a chunk in the chunk map. It includes whether each chunk is allocated or free, and the space the chunk belongs to.
/// Highest bit: 0 = free, 1 = allocated
/// Lower 4 bits: Space index (0-15)
#[repr(transparent)]
#[derive(PartialEq, Clone, Copy)]
pub struct ChunkState(u8);

impl ChunkState {
const ALLOC_BIT_MASK: u8 = 0x80;
const SPACE_INDEX_MASK: u8 = 0x0F;

/// Create a new ChunkState that represents being allocated in the given space
pub fn allocated(space_index: usize) -> ChunkState {
debug_assert!(space_index < crate::util::heap::layout::heap_parameters::MAX_SPACES);
let mut encode = space_index as u8;
encode |= Self::ALLOC_BIT_MASK;
ChunkState(encode)
}
/// Create a new ChunkState that represents being free in the given space
pub fn free(space_index: usize) -> ChunkState {
debug_assert!(space_index < crate::util::heap::layout::heap_parameters::MAX_SPACES);
ChunkState(space_index as u8)
}
/// Is the chunk free?
pub fn is_free(&self) -> bool {
self.0 & Self::ALLOC_BIT_MASK == 0
}
/// Is the chunk allocated?
pub fn is_allocated(&self) -> bool {
!self.is_free()
}
/// Get the space index of the chunk
pub fn get_space_index(&self) -> usize {
let index = (self.0 & Self::SPACE_INDEX_MASK) as usize;
debug_assert!(index < crate::util::heap::layout::heap_parameters::MAX_SPACES);
index
}
}

impl std::fmt::Debug for ChunkState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.is_free() {
write!(f, "Free({})", self.get_space_index())
} else {
write!(f, "Allocated({})", self.get_space_index())
}
}
}

/// A byte-map to record all the allocated chunks.
/// A plan can use this to maintain records for the chunks that they used, and the states of the chunks.
/// Any plan that uses the chunk map should include the `ALLOC_TABLE` spec in their local sidemetadata specs
/// Any plan that uses the chunk map should include the `ALLOC_TABLE` spec in their local sidemetadata specs.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Any plan that uses the chunk map should include the `ALLOC_TABLE` spec in their local sidemetadata specs.
/// Any plan that uses the chunk map should include the `ALLOC_TABLE` spec in their global sidemetadata specs.

///
/// A chunk map is created for a space (identified by the space index), and will only update or list chunks for that space.
pub struct ChunkMap {
/// The space that uses this chunk map.
space_index: usize,
/// The range of chunks that are used by the space. The range only records the lowest chunk and the highest chunk.
/// All the chunks that are used for the space are within the range, but not necessarily that all the chunks in the range
/// are used for the space. Spaces may be discontiguous, thus the range may include chunks that do not belong to the space.
/// We need to use the space index in the chunk map and the space index encoded with the chunk state to know if
/// the chunk belongs to the current space.
chunk_range: Mutex<Range<Chunk>>,
}

Expand All @@ -66,22 +114,40 @@ impl ChunkMap {
pub const ALLOC_TABLE: SideMetadataSpec =
crate::util::metadata::side_metadata::spec_defs::CHUNK_MARK;

pub fn new() -> Self {
pub fn new(space_index: usize) -> Self {
Self {
space_index,
chunk_range: Mutex::new(Chunk::ZERO..Chunk::ZERO),
}
}

/// Set chunk state
pub fn set(&self, chunk: Chunk, state: ChunkState) {
/// Set a chunk as allocated, or as free.
pub fn set_allocated(&self, chunk: Chunk, allocated: bool) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel this function name is confusing. set_allocated to me sounds like this function will set the chunk to the allocated state, but it can actually set it to both allocated or free.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The boolean argument in set_allocated(bool) should be pretty clear that you can either set it to 'allocated' or 'not allocated'. Do you have a better suggestion?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why set is not good enough? The previous one's signature is pretty much the same except state is now a bool. Maybe @wks has a better idea. Or if he's fine with the new name, then that's also okay.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I felt after changing the argument from an enum to a boolean, chunk_map.set(chunk, true) was not as clear as chunk_map.set_allocated(chunk, true) so I made the change.

Copy link
Collaborator

@k-sareen k-sareen Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough 👍 I'll wait for @wks to see if he has a better suggestion.

let state = if allocated {
ChunkState::allocated(self.space_index)
} else {
ChunkState::free(self.space_index)
};
// Do nothing if the chunk is already in the expected state.
if self.get(chunk) == state {
if self.get_any(chunk) == state {
return;
}
#[cfg(debug_assertions)]
{
let old_state = self.get_any(chunk);
// If a chunk is free, any space may use it. If a chunk is not free, only the current space may update its state.
assert!(
old_state.is_free() || old_state.get_space_index() == state.get_space_index(),
"Chunk {:?}: old state {:?}, new state {:?}. Cannot set to new state.",
chunk,
old_state,
state
);
}
// Update alloc byte
unsafe { Self::ALLOC_TABLE.store::<u8>(chunk.start(), state as u8) };
unsafe { Self::ALLOC_TABLE.store::<u8>(chunk.start(), state.0) };
// If this is a newly allcoated chunk, then expand the chunk range.
if state == ChunkState::Allocated {
if state.is_allocated() {
debug_assert!(!chunk.start().is_zero());
let mut range = self.chunk_range.lock();
if range.start == Chunk::ZERO {
Expand All @@ -96,20 +162,30 @@ impl ChunkMap {
}
}

/// Get chunk state
pub fn get(&self, chunk: Chunk) -> ChunkState {
/// Get chunk state. Return None if the chunk does not belong to the space.
pub fn get(&self, chunk: Chunk) -> Option<ChunkState> {
let state = self.get_any(chunk);
(state.get_space_index() == self.space_index).then_some(state)
}

/// Get chunk state, regardless of the space. This should always be private.
fn get_any(&self, chunk: Chunk) -> ChunkState {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_any is not really descriptive of what it's doing. Should be something like get_internal or get_private I feel like.

let byte = unsafe { Self::ALLOC_TABLE.load::<u8>(chunk.start()) };
match byte {
0 => ChunkState::Free,
1 => ChunkState::Allocated,
_ => unreachable!(),
}
ChunkState(byte)
}

/// A range of all chunks in the heap.
pub fn all_chunks(&self) -> impl Iterator<Item = Chunk> + '_ {
let chunk_range = self.chunk_range.lock();
RegionIterator::<Chunk>::new(chunk_range.start, chunk_range.end)
.filter(|c| self.get(*c).is_some())
}

/// A range of all chunks in the heap.
pub fn all_chunks(&self) -> RegionIterator<Chunk> {
pub fn all_allocated_chunks(&self) -> impl Iterator<Item = Chunk> + '_ {
let chunk_range = self.chunk_range.lock();
RegionIterator::<Chunk>::new(chunk_range.start, chunk_range.end)
.filter(|c| self.get(*c).is_some_and(|state| state.is_allocated()))
}
Comment on lines +177 to 189
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand why we need this. The issue here is that when a chunk is freed, we still associate a space index with it. But why is that the case? We can just have the 0x0 space descriptor associated with it (I think 0x0 is reserved for the empty space). This would mean we don't need to have two functions for iterating through chunks for a space since we will automatically skip the free ones.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am just making sure all_chunks does the same thing as before, which lists all the chunks for the space no matter it is allocated or free.

Copy link
Collaborator

@k-sareen k-sareen Apr 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just did a grep of where the all_chunks function is used. Most of the times it's used as if all_allocated_chunks. The only times it's used as actually all chunks regardless of allocation state is when we are iterating through the chunks to bulk clear/set metadata bits. But I think it doesn't make sense to do that for free chunks since that (i.e. bulk clearing/setting metadata) should happen when we either free or allocate a (new) chunk.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also I don't think the above is a good idea anyway. We shouldn't be going through free chunks when clearing/setting metadata. It should almost always be the allocated chunks. Do you have a use-case in mind where it makes sense to have a space associated with a free chunk?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure. I will try replace the current use of all_chunks to all_allocated_chunks and see if it works.


/// Helper function to create per-chunk processing work packets for each allocated chunks.
Expand All @@ -118,18 +194,9 @@ impl ChunkMap {
func: impl Fn(Chunk) -> Box<dyn GCWork<VM>>,
) -> Vec<Box<dyn GCWork<VM>>> {
let mut work_packets: Vec<Box<dyn GCWork<VM>>> = vec![];
for chunk in self
.all_chunks()
.filter(|c| self.get(*c) == ChunkState::Allocated)
{
for chunk in self.all_allocated_chunks() {
work_packets.push(func(chunk));
}
work_packets
}
}

impl Default for ChunkMap {
fn default() -> Self {
Self::new()
}
}
5 changes: 5 additions & 0 deletions src/util/metadata/side_metadata/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,11 @@ impl SideMetadataContext {
}
}

// Any plan that uses the chunk map needs to reserve the chunk map table.
// As we use either the mark sweep or (non moving) immix as the non moving space,
// and both policies use the chunk map, we just add the chunk map table globally.
ret.push(crate::util::heap::chunk_map::ChunkMap::ALLOC_TABLE);

ret.extend_from_slice(specs);
ret
}
Expand Down
4 changes: 2 additions & 2 deletions src/util/metadata/side_metadata/spec_defs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ define_side_metadata_specs!(
MS_ACTIVE_CHUNK = (global: true, log_num_of_bits: 3, log_bytes_in_region: LOG_BYTES_IN_CHUNK),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my branch I did a quick fix and didn't bother to coalesce MS_ACTIVE_CHUNK and CHUNK_MARK but I think we should for this PR. It's not useful to have the same metadata under different names (they basically are the same metadata).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MS_ACTIVE_CHUNK only cares about chunks with malloc in them. But with this PR, we should be able to distinguish between a chunk mapped by the non-moving immix space and the malloc space (for example). I feel like MS_ACTIVE_CHUNK can be subsumed by CHUNK_MARK now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be possible. We could simply replace MS_ACTIVE_CHUNK in malloc mark sweep to CHUNK_MARK (which might be an issue if we have other spaces that use ChunkMap). Or we can start using ChunkMap in malloc mark sweep, but this may not be an easy refactoring. Either way, I will give it a try after this PR.

// Track the index in SFT map for a chunk (only used for SFT sparse chunk map)
SFT_DENSE_CHUNK_MAP_INDEX = (global: true, log_num_of_bits: 3, log_bytes_in_region: LOG_BYTES_IN_CHUNK),
// Mark chunks (any plan that uses the chunk map should include this spec in their local sidemetadata specs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Mark chunks (any plan that uses the chunk map should include this spec in their local sidemetadata specs)
// Mark chunks (any plan that uses the chunk map should include this spec in their global side-metadata specs)

CHUNK_MARK = (global: true, log_num_of_bits: 3, log_bytes_in_region: crate::util::heap::chunk_map::Chunk::LOG_BYTES),
);

// This defines all LOCAL side metadata used by mmtk-core.
Expand All @@ -75,8 +77,6 @@ define_side_metadata_specs!(
IX_BLOCK_DEFRAG = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::immix::block::Block::LOG_BYTES),
// Mark blocks by immix
IX_BLOCK_MARK = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::immix::block::Block::LOG_BYTES),
// Mark chunks (any plan that uses the chunk map should include this spec in their local sidemetadata specs)
CHUNK_MARK = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::util::heap::chunk_map::Chunk::LOG_BYTES),
// Mark blocks by (native mimalloc) marksweep
MS_BLOCK_MARK = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::policy::marksweepspace::native_ms::Block::LOG_BYTES),
// Next block in list for native mimalloc
Expand Down
15 changes: 5 additions & 10 deletions src/util/object_enum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@ use std::marker::PhantomData;
use crate::vm::VMBinding;

use super::{
heap::{
chunk_map::{ChunkMap, ChunkState},
MonotonePageResource,
},
heap::{chunk_map::ChunkMap, MonotonePageResource},
linear_scan::Region,
metadata::{side_metadata::spec_defs::VO_BIT, vo_bit},
Address, ObjectReference,
Expand Down Expand Up @@ -84,12 +81,10 @@ pub(crate) fn enumerate_blocks_from_chunk_map<B>(
) where
B: BlockMayHaveObjects,
{
for chunk in chunk_map.all_chunks() {
if chunk_map.get(chunk) == ChunkState::Allocated {
for block in chunk.iter_region::<B>() {
if block.may_have_objects() {
enumerator.visit_address_range(block.start(), block.end());
}
for chunk in chunk_map.all_allocated_chunks() {
for block in chunk.iter_region::<B>() {
if block.may_have_objects() {
enumerator.visit_address_range(block.start(), block.end());
}
}
}
Expand Down
Loading
Loading