Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Genericize RoaringBitmap and implements Roaring32 and Roaring64 #260

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ license = "MIT OR Apache-2.0"
[dependencies]
bytemuck = "1.7.3"
byteorder = "1.4.3"
retain_mut = "=0.1.7"
serde = { version = "1.0.139", optional = true }

[features]
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/benches/datasets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
use git2::FetchOptions;
use once_cell::sync::OnceCell as SyncOnceCell;

use roaring::RoaringBitmap;
use roaring::Roaring32;

static INSTANCE: SyncOnceCell<Vec<Dataset>> = SyncOnceCell::new();

Expand Down Expand Up @@ -41,7 +41,7 @@ impl IntoIterator for Datasets {

pub struct Dataset {
pub name: String,
pub bitmaps: Vec<RoaringBitmap>,
pub bitmaps: Vec<Roaring32>,
}

fn init_datasets() -> Result<PathBuf, Box<dyn std::error::Error>> {
Expand Down Expand Up @@ -186,7 +186,7 @@ fn parse_datasets<P: AsRef<Path>>(path: P) -> Result<Vec<Dataset>, Box<dyn std::
numbers.push(n);
}

let bitmap = RoaringBitmap::from_sorted_iter(numbers.iter().copied())?;
let bitmap = Roaring32::from_sorted_iter(numbers.iter().copied())?;
numbers.clear();
bitmaps.push(bitmap);

Expand Down
94 changes: 45 additions & 49 deletions benchmarks/benches/lib.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,28 @@
use itertools::Itertools;
use std::cmp::Reverse;
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign};

use criterion::measurement::Measurement;
use crate::datasets::Datasets;
use criterion::{
black_box, criterion_group, criterion_main, BatchSize, BenchmarkGroup, BenchmarkId, Criterion,
Throughput,
black_box, criterion_group, criterion_main, measurement::Measurement, BatchSize,
BenchmarkGroup, BenchmarkId, Criterion, Throughput,
};
use itertools::Itertools;
use roaring::{MultiOps, Roaring32, Roaring64};
use std::{
cmp::Reverse,
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign},
};

use roaring::{MultiOps, RoaringBitmap, RoaringTreemap};

use crate::datasets::Datasets;

mod datasets;

#[allow(clippy::too_many_arguments)]
fn pairwise_binary_op_matrix(
c: &mut Criterion,
op_name: &str,
op_own_own: impl Fn(RoaringBitmap, RoaringBitmap) -> RoaringBitmap,
op_own_ref: impl Fn(RoaringBitmap, &RoaringBitmap) -> RoaringBitmap,
op_ref_own: impl Fn(&RoaringBitmap, RoaringBitmap) -> RoaringBitmap,
op_ref_ref: impl Fn(&RoaringBitmap, &RoaringBitmap) -> RoaringBitmap,
mut op_assign_owned: impl FnMut(&mut RoaringBitmap, RoaringBitmap),
mut op_assign_ref: impl FnMut(&mut RoaringBitmap, &RoaringBitmap),
op_len: impl Fn(&RoaringBitmap, &RoaringBitmap) -> u64,
op_own_own: impl Fn(Roaring32, Roaring32) -> Roaring32,
op_own_ref: impl Fn(Roaring32, &Roaring32) -> Roaring32,
op_ref_own: impl Fn(&Roaring32, Roaring32) -> Roaring32,
op_ref_ref: impl Fn(&Roaring32, &Roaring32) -> Roaring32,
mut op_assign_owned: impl FnMut(&mut Roaring32, Roaring32),
mut op_assign_ref: impl FnMut(&mut Roaring32, &Roaring32),
op_len: impl Fn(&Roaring32, &Roaring32) -> u64,
) {
let mut group = c.benchmark_group(format!("pairwise_{}", op_name));

Expand Down Expand Up @@ -120,7 +118,7 @@ fn pairwise_binary_op_matrix(
fn pairwise_binary_op<R, M: Measurement>(
group: &mut BenchmarkGroup<M>,
op_name: &str,
op: impl Fn(RoaringBitmap, RoaringBitmap) -> R,
op: impl Fn(Roaring32, Roaring32) -> R,
) {
for dataset in Datasets {
group.bench_function(BenchmarkId::new(op_name, &dataset.name), |b| {
Expand Down Expand Up @@ -152,17 +150,15 @@ fn creation(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("from_sorted_iter", &dataset.name), |b| {
b.iter(|| {
for bitmap_numbers in &dataset_numbers {
black_box(
RoaringBitmap::from_sorted_iter(bitmap_numbers.iter().copied()).unwrap(),
);
black_box(Roaring32::from_sorted_iter(bitmap_numbers.iter().copied()).unwrap());
}
})
});

group.bench_function(BenchmarkId::new("collect", &dataset.name), |b| {
b.iter(|| {
for bitmap_numbers in &dataset_numbers {
black_box(bitmap_numbers.iter().copied().collect::<RoaringBitmap>());
black_box(bitmap_numbers.iter().copied().collect::<Roaring32>());
}
})
});
Expand Down Expand Up @@ -408,15 +404,15 @@ fn deserialization(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("deserialize_from", &dataset.name), |b| {
b.iter(|| {
for buf in input.iter() {
black_box(RoaringBitmap::deserialize_from(buf.as_slice()).unwrap());
black_box(Roaring32::deserialize_from(buf.as_slice()).unwrap());
}
});
});

group.bench_function(BenchmarkId::new("deserialize_unchecked_from", &dataset.name), |b| {
b.iter(|| {
for buf in input.iter() {
black_box(RoaringBitmap::deserialize_unchecked_from(buf.as_slice()).unwrap());
black_box(Roaring32::deserialize_unchecked_from(buf.as_slice()).unwrap());
}
});
});
Expand Down Expand Up @@ -476,7 +472,7 @@ fn successive_and(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("Multi And Owned", &dataset.name), |b| {
b.iter_batched(
|| dataset.bitmaps.clone(),
|bitmaps: Vec<RoaringBitmap>| black_box(bitmaps.intersection()),
|bitmaps: Vec<Roaring32>| black_box(bitmaps.intersection()),
BatchSize::LargeInput,
);
});
Expand All @@ -491,7 +487,7 @@ fn successive_or(c: &mut Criterion) {
for dataset in Datasets {
group.bench_function(BenchmarkId::new("Successive Or Assign Ref", &dataset.name), |b| {
b.iter(|| {
let mut output = RoaringBitmap::new();
let mut output = Roaring32::new();
for bitmap in &dataset.bitmaps {
output |= bitmap;
}
Expand All @@ -501,8 +497,8 @@ fn successive_or(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("Successive Or Assign Owned", &dataset.name), |b| {
b.iter_batched(
|| dataset.bitmaps.clone(),
|bitmaps: Vec<RoaringBitmap>| {
let mut output = RoaringBitmap::new();
|bitmaps: Vec<Roaring32>| {
let mut output = Roaring32::new();
for bitmap in bitmaps {
output |= bitmap;
}
Expand All @@ -513,7 +509,7 @@ fn successive_or(c: &mut Criterion) {

group.bench_function(BenchmarkId::new("Successive Or Ref Ref", &dataset.name), |b| {
b.iter(|| {
let mut output = RoaringBitmap::new();
let mut output = Roaring32::new();
for bitmap in &dataset.bitmaps {
output = (&output) | bitmap;
}
Expand All @@ -527,7 +523,7 @@ fn successive_or(c: &mut Criterion) {
group.bench_function(BenchmarkId::new("Multi Or Owned", &dataset.name), |b| {
b.iter_batched(
|| dataset.bitmaps.clone(),
|bitmaps: Vec<RoaringBitmap>| black_box(bitmaps.union()),
|bitmaps: Vec<Roaring32>| black_box(bitmaps.union()),
BatchSize::LargeInput,
);
});
Expand All @@ -541,13 +537,13 @@ fn successive_or(c: &mut Criterion) {

fn is_empty(c: &mut Criterion) {
c.bench_function("is_empty true", |b| {
let bitmap = RoaringBitmap::new();
let bitmap = Roaring32::new();
b.iter(|| {
bitmap.is_empty();
});
});
c.bench_function("is_empty false", |b| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
bitmap.insert(1);
b.iter(|| {
bitmap.is_empty();
Expand All @@ -558,21 +554,21 @@ fn is_empty(c: &mut Criterion) {
fn insert(c: &mut Criterion) {
c.bench_function("create & insert 1", |b| {
b.iter(|| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
bitmap.insert(black_box(1));
});
});

c.bench_function("insert 1", |b| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
b.iter(|| {
bitmap.insert(black_box(1));
});
});

c.bench_function("create & insert several", |b| {
b.iter(|| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
bitmap.insert(black_box(1));
bitmap.insert(black_box(10));
bitmap.insert(black_box(100));
Expand All @@ -584,7 +580,7 @@ fn insert(c: &mut Criterion) {
});

c.bench_function("insert several", |b| {
let mut bitmap = RoaringBitmap::new();
let mut bitmap = Roaring32::new();
b.iter(|| {
bitmap.insert(black_box(1));
bitmap.insert(black_box(10));
Expand All @@ -599,7 +595,7 @@ fn insert(c: &mut Criterion) {

fn contains(c: &mut Criterion) {
c.bench_function("contains true", |b| {
let mut bitmap: RoaringBitmap = RoaringBitmap::new();
let mut bitmap: Roaring32 = Roaring32::new();
bitmap.insert(1);

b.iter(|| {
Expand All @@ -608,7 +604,7 @@ fn contains(c: &mut Criterion) {
});

c.bench_function("contains false", |b| {
let bitmap: RoaringBitmap = RoaringBitmap::new();
let bitmap: Roaring32 = Roaring32::new();

b.iter(|| {
bitmap.contains(black_box(1));
Expand All @@ -618,7 +614,7 @@ fn contains(c: &mut Criterion) {

fn remove(c: &mut Criterion) {
c.bench_function("remove 1", |b| {
let mut sub: RoaringBitmap = (0..65_536).collect();
let mut sub: Roaring32 = (0..65_536).collect();
b.iter(|| {
black_box(sub.remove(1000));
});
Expand All @@ -627,7 +623,7 @@ fn remove(c: &mut Criterion) {

fn remove_range_bitmap(c: &mut Criterion) {
c.bench_function("remove_range 1", |b| {
let mut sub: RoaringBitmap = (0..65_536).collect();
let mut sub: Roaring32 = (0..65_536).collect();
b.iter(|| {
// carefully delete part of the bitmap
// only the first iteration will actually change something
Expand All @@ -641,7 +637,7 @@ fn remove_range_bitmap(c: &mut Criterion) {
// Slower bench that creates a new bitmap on each iteration so that can benchmark
// bitmap to array conversion
b.iter(|| {
let mut sub: RoaringBitmap = (0..65_536).collect();
let mut sub: Roaring32 = (0..65_536).collect();
black_box(sub.remove_range(100..65_536));
assert_eq!(sub.len(), 100);
});
Expand All @@ -653,15 +649,15 @@ fn insert_range_bitmap(c: &mut Criterion) {
let mut group = c.benchmark_group("insert_range");
group.throughput(criterion::Throughput::Elements(size as u64));
group.bench_function(format!("from_empty_{}", size), |b| {
let bm = RoaringBitmap::new();
let bm = Roaring32::new();
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
group.bench_function(format!("pre_populated_{}", size), |b| {
let mut bm = RoaringBitmap::new();
let mut bm = Roaring32::new();
bm.insert_range(0..size);
b.iter_batched(
|| bm.clone(),
Expand All @@ -672,20 +668,20 @@ fn insert_range_bitmap(c: &mut Criterion) {
}
}

fn insert_range_treemap(c: &mut Criterion) {
fn insert_range_roaring64(c: &mut Criterion) {
for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
let mut group = c.benchmark_group("insert_range_treemap");
let mut group = c.benchmark_group("insert_range_roaring64");
group.throughput(criterion::Throughput::Elements(size));
group.bench_function(format!("from_empty_{}", size), |b| {
let bm = RoaringTreemap::new();
let bm = Roaring64::new();
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
group.bench_function(format!("pre_populated_{}", size), |b| {
let mut bm = RoaringTreemap::new();
let mut bm = Roaring64::new();
bm.insert_range(0..size);
b.iter_batched(
|| bm.clone(),
Expand Down Expand Up @@ -713,7 +709,7 @@ criterion_group!(
remove,
remove_range_bitmap,
insert_range_bitmap,
insert_range_treemap,
insert_range_roaring64,
iteration,
is_empty,
serialization,
Expand Down
42 changes: 0 additions & 42 deletions src/bitmap/mod.rs

This file was deleted.

Loading