Split BitSet into two types and make it a bit nicer (#4249)

This commit is contained in:
Laurenz 2024-05-24 23:09:54 +02:00 committed by GitHub
parent 34f1a23246
commit ea4c64a799
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 95 additions and 43 deletions

7
Cargo.lock generated
View File

@ -2354,6 +2354,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "thin-vec"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a38c90d48152c236a3ab59271da4f4ae63d678c5d7ad6b7714d7cb9760be5e4b"
[[package]]
name = "thiserror"
version = "1.0.57"
@ -2812,6 +2818,7 @@ dependencies = [
"portable-atomic",
"rayon",
"siphasher 1.0.0",
"thin-vec",
]
[[package]]

View File

@ -104,6 +104,7 @@ syn = { version = "2", features = ["full", "extra-traits"] }
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
tar = "0.4"
tempfile = "3.7.0"
thin-vec = "0.2.13"
time = { version = "0.3.20", features = ["formatting", "macros", "parsing"] }
tiny-skia = "0.11"
toml = { version = "0.8", default-features = false, features = ["parse", "display"] }

View File

@ -14,9 +14,10 @@ readme = { workspace = true }
[dependencies]
once_cell = { workspace = true }
siphasher = { workspace = true }
portable-atomic = { workspace = true }
rayon = { workspace = true }
siphasher = { workspace = true }
thin-vec = { workspace = true }
[lints]
workspace = true

View File

@ -1,57 +1,45 @@
use std::fmt::{self, Debug, Formatter};
/// Efficiently stores a set of numbers which are expected to be very small
/// (< 32/64 depending on the architecture).
///
/// Inserting a very small value is very cheap while inserting a large one may
/// be very expensive.
#[derive(Clone, PartialEq, Hash)]
pub struct BitSet {
/// Used to store values < BITS.
low: usize,
/// Used to store values > BITS. We have the extra `Box` to keep the memory
/// size of the `BitSet` down.
#[allow(clippy::box_collection)]
hi: Option<Box<Vec<usize>>>,
}
use thin_vec::ThinVec;
/// The number of bits per chunk.
const BITS: usize = usize::BITS as usize;
/// Stores a set of numbers which are expected to be rather small.
///
/// Inserting a very small value is cheap while inserting a large one may be
/// very expensive.
///
/// Unless you're managing small numbers yourself, you should likely prefer
/// `SmallBitSet`, which has a bit larger memory size, but does not allocate
/// for small numbers.
#[derive(Clone, PartialEq, Hash)]
pub struct BitSet(ThinVec<usize>);
impl BitSet {
/// Creates a new empty bit set.
pub fn new() -> Self {
Self { low: 0, hi: None }
Self(ThinVec::new())
}
/// Inserts a number into the set.
pub fn insert(&mut self, value: usize) {
if value < BITS {
self.low |= 1 << value;
} else {
let chunk = value / BITS - 1;
let chunk = value / BITS;
let within = value % BITS;
let vec = self.hi.get_or_insert_with(Default::default);
if chunk >= vec.len() {
vec.resize(chunk + 1, 0);
}
vec[chunk] |= 1 << within;
if chunk >= self.0.len() {
self.0.resize(chunk + 1, 0);
}
self.0[chunk] |= 1 << within;
}
/// Whether a number is present in the set.
pub fn contains(&self, value: usize) -> bool {
if value < BITS {
(self.low & (1 << value)) != 0
} else {
let Some(hi) = &self.hi else { return false };
let chunk = value / BITS - 1;
let chunk = value / BITS;
let within = value % BITS;
let Some(bits) = hi.get(chunk) else { return false };
let Some(bits) = self.0.get(chunk) else { return false };
(bits & (1 << within)) != 0
}
}
}
impl Default for BitSet {
fn default() -> Self {
@ -62,7 +50,62 @@ impl Default for BitSet {
impl Debug for BitSet {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut list = f.debug_list();
let chunks = 1 + self.hi.as_ref().map_or(0, |v| v.len());
let chunks = self.0.len();
for v in 0..chunks * BITS {
if self.contains(v) {
list.entry(&v);
}
}
list.finish()
}
}
/// Efficiently stores a set of numbers which are expected to be very small.
/// Values `< 32/64` (depending on the architecture) are stored inline, while
/// values larger than that will lead to an allocation.
#[derive(Clone, PartialEq, Hash)]
pub struct SmallBitSet {
/// Used to store values < BITS.
low: usize,
/// Used to store values > BITS.
hi: BitSet,
}
impl SmallBitSet {
/// Creates a new empty bit set.
pub fn new() -> Self {
Self { low: 0, hi: BitSet::new() }
}
/// Inserts a number into the set.
pub fn insert(&mut self, value: usize) {
if value < BITS {
self.low |= 1 << value;
} else {
self.hi.insert(value - BITS);
}
}
/// Whether a number is present in the set.
pub fn contains(&self, value: usize) -> bool {
if value < BITS {
(self.low & (1 << value)) != 0
} else {
self.hi.contains(value - BITS)
}
}
}
impl Default for SmallBitSet {
fn default() -> Self {
Self::new()
}
}
impl Debug for SmallBitSet {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut list = f.debug_list();
let chunks = 1 + self.hi.0.len();
for v in 0..chunks * BITS {
if self.contains(v) {
list.entry(&v);
@ -78,7 +121,7 @@ mod tests {
#[test]
fn test_bitset() {
let mut set = BitSet::new();
let mut set = SmallBitSet::new();
assert!(!set.contains(0));
assert!(!set.contains(5));
set.insert(0);

View File

@ -10,7 +10,7 @@ mod hash;
mod pico;
mod scalar;
pub use self::bitset::BitSet;
pub use self::bitset::{BitSet, SmallBitSet};
pub use self::deferred::Deferred;
pub use self::hash::LazyHash;
pub use self::pico::PicoStr;

View File

@ -24,7 +24,7 @@ use crate::model::{Destination, EmphElem, LinkElem, StrongElem};
use crate::realize::{Behave, Behaviour};
use crate::syntax::Span;
use crate::text::UnderlineElem;
use crate::utils::{fat, BitSet, LazyHash};
use crate::utils::{fat, LazyHash, SmallBitSet};
/// A piece of document content.
///
@ -90,7 +90,7 @@ struct Inner<T: ?Sized + 'static> {
/// - If bit 0 is set, the element is prepared.
/// - If bit n is set, the element is guarded against the n-th show rule
/// recipe from the top of the style chain (counting from 1).
lifecycle: BitSet,
lifecycle: SmallBitSet,
/// The element's raw data.
elem: LazyHash<T>,
}
@ -102,7 +102,7 @@ impl Content {
inner: Arc::new(Inner {
label: None,
location: None,
lifecycle: BitSet::new(),
lifecycle: SmallBitSet::new(),
elem: elem.into(),
}),
span: Span::detached(),

View File

@ -10,7 +10,7 @@ use crate::foundations::{
};
use crate::introspection::{Locatable, TagElem};
use crate::text::TextElem;
use crate::utils::{hash128, BitSet};
use crate::utils::{hash128, SmallBitSet};
/// What to do with an element when encountering it during realization.
struct Verdict<'a> {
@ -83,7 +83,7 @@ fn verdict<'a>(
) -> Option<Verdict<'a>> {
let mut target = target;
let mut map = Styles::new();
let mut revoked = BitSet::new();
let mut revoked = SmallBitSet::new();
let mut step = None;
let mut slot;