diff --git a/crates/typst-ide/src/jump.rs b/crates/typst-ide/src/jump.rs index 5d270f04f..2dd5cf610 100644 --- a/crates/typst-ide/src/jump.rs +++ b/crates/typst-ide/src/jump.rs @@ -4,14 +4,15 @@ use typst::layout::{Frame, FrameItem, Point, Position, Size}; use typst::model::{Destination, Document, Url}; use typst::syntax::{FileId, LinkedNode, Side, Source, Span, SyntaxKind}; use typst::visualize::Geometry; +use typst::WorldExt; use crate::IdeWorld; /// Where to [jump](jump_from_click) to. #[derive(Debug, Clone, Eq, PartialEq)] pub enum Jump { - /// Jump to a position in a source file. - Source(FileId, usize), + /// Jump to a position in a file. + File(FileId, usize), /// Jump to an external URL. Url(Url), /// Jump to a point on a page. @@ -21,9 +22,8 @@ pub enum Jump { impl Jump { fn from_span(world: &dyn IdeWorld, span: Span) -> Option { let id = span.id()?; - let source = world.source(id).ok()?; - let node = source.find(span)?; - Some(Self::Source(id, node.offset())) + let offset = world.range(span)?.start; + Some(Self::File(id, offset)) } } @@ -83,7 +83,7 @@ pub fn jump_from_click( } else { node.offset() }; - return Some(Jump::Source(source.id(), pos)); + return Some(Jump::File(source.id(), pos)); } pos.x += width; @@ -194,7 +194,7 @@ mod tests { } fn cursor(cursor: usize) -> Option { - Some(Jump::Source(TestWorld::main_id(), cursor)) + Some(Jump::File(TestWorld::main_id(), cursor)) } fn pos(page: usize, x: f64, y: f64) -> Option { diff --git a/crates/typst-library/src/lib.rs b/crates/typst-library/src/lib.rs index 4db377e94..bd135cdbd 100644 --- a/crates/typst-library/src/lib.rs +++ b/crates/typst-library/src/lib.rs @@ -125,13 +125,13 @@ world_impl!(W for &W); pub trait WorldExt { /// Get the byte range for a span. /// - /// Returns `None` if the `Span` does not point into any source file. + /// Returns `None` if the `Span` does not point into any file. fn range(&self, span: Span) -> Option>; } -impl WorldExt for T { +impl WorldExt for T { fn range(&self, span: Span) -> Option> { - self.source(span.id()?).ok()?.range(span) + span.range().or_else(|| self.source(span.id()?).ok()?.range(span)) } } diff --git a/crates/typst-syntax/src/file.rs b/crates/typst-syntax/src/file.rs index e24fc8fb1..380ca2346 100644 --- a/crates/typst-syntax/src/file.rs +++ b/crates/typst-syntax/src/file.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; +use std::num::NonZeroU16; use std::sync::{LazyLock, RwLock}; use crate::package::PackageSpec; @@ -25,7 +26,7 @@ type Pair = &'static (Option, VirtualPath); /// /// This type is globally interned and thus cheap to copy, compare, and hash. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct FileId(u16); +pub struct FileId(NonZeroU16); impl FileId { /// Create a new interned file specification. @@ -48,7 +49,10 @@ impl FileId { // Create a new entry forever by leaking the pair. We can't leak more // than 2^16 pair (and typically will leak a lot less), so its not a // big deal. - let num = interner.from_id.len().try_into().expect("out of file ids"); + let num = u16::try_from(interner.from_id.len() + 1) + .and_then(NonZeroU16::try_from) + .expect("out of file ids"); + let id = FileId(num); let leaked = Box::leak(Box::new(pair)); interner.to_id.insert(leaked, id); @@ -66,7 +70,9 @@ impl FileId { #[track_caller] pub fn new_fake(path: VirtualPath) -> Self { let mut interner = INTERNER.write().unwrap(); - let num = interner.from_id.len().try_into().expect("out of file ids"); + let num = u16::try_from(interner.from_id.len() + 1) + .and_then(NonZeroU16::try_from) + .expect("out of file ids"); let id = FileId(num); let leaked = Box::leak(Box::new((None, path))); @@ -100,18 +106,18 @@ impl FileId { /// Should only be used with numbers retrieved via /// [`into_raw`](Self::into_raw). Misuse may results in panics, but no /// unsafety. - pub const fn from_raw(v: u16) -> Self { + pub const fn from_raw(v: NonZeroU16) -> Self { Self(v) } /// Extract the raw underlying number. - pub const fn into_raw(self) -> u16 { + pub const fn into_raw(self) -> NonZeroU16 { self.0 } /// Get the static pair. fn pair(&self) -> Pair { - INTERNER.read().unwrap().from_id[usize::from(self.0)] + INTERNER.read().unwrap().from_id[usize::from(self.0.get() - 1)] } } diff --git a/crates/typst-syntax/src/node.rs b/crates/typst-syntax/src/node.rs index a3487e45b..14ad018fb 100644 --- a/crates/typst-syntax/src/node.rs +++ b/crates/typst-syntax/src/node.rs @@ -241,7 +241,7 @@ impl SyntaxNode { return Err(Unnumberable); } - let mid = Span::new(id, (within.start + within.end) / 2).unwrap(); + let mid = Span::from_number(id, (within.start + within.end) / 2).unwrap(); match &mut self.0 { Repr::Leaf(leaf) => leaf.span = mid, Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?, @@ -457,7 +457,7 @@ impl InnerNode { let mut start = within.start; if range.is_none() { let end = start + stride; - self.span = Span::new(id, (start + end) / 2).unwrap(); + self.span = Span::from_number(id, (start + end) / 2).unwrap(); self.upper = within.end; start = end; } diff --git a/crates/typst-syntax/src/source.rs b/crates/typst-syntax/src/source.rs index 3454a2651..6ff94c73f 100644 --- a/crates/typst-syntax/src/source.rs +++ b/crates/typst-syntax/src/source.rs @@ -166,6 +166,8 @@ impl Source { /// Get the byte range for the given span in this file. /// /// Returns `None` if the span does not point into this source file. + /// + /// Typically, it's easier to use `WorldExt::range` instead. pub fn range(&self, span: Span) -> Option> { Some(self.find(span)?.range()) } diff --git a/crates/typst-syntax/src/span.rs b/crates/typst-syntax/src/span.rs index 0847ceea9..3618b8f2f 100644 --- a/crates/typst-syntax/src/span.rs +++ b/crates/typst-syntax/src/span.rs @@ -1,21 +1,37 @@ use std::fmt::{self, Debug, Formatter}; -use std::num::NonZeroU64; +use std::num::{NonZeroU16, NonZeroU64}; use std::ops::Range; use ecow::EcoString; use crate::FileId; -/// A unique identifier for a syntax node. +/// Defines a range in a file. /// -/// This is used throughout the compiler to track which source section an error -/// or element stems from. Can be [mapped back](crate::Source::range) to a byte -/// range for user facing display. +/// This is used throughout the compiler to track which source section an +/// element stems from or an error applies to. /// -/// During editing, the span values stay mostly stable, even for nodes behind an -/// insertion. This is not true for simple ranges as they would shift. Spans can -/// be used as inputs to memoized functions without hurting cache performance -/// when text is inserted somewhere in the document other than the end. +/// - The [`.id()`](Self::id) function can be used to get the `FileId` for the +/// span and, by extension, its file system path. +/// - The `WorldExt::range` function can be used to map the span to a +/// `Range`. +/// +/// This type takes up 8 bytes and is copyable and null-optimized (i.e. +/// `Option` also takes 8 bytes). +/// +/// Spans come in two flavors: Numbered spans and raw range spans. The +/// `WorldExt::range` function automatically handles both cases, yielding a +/// `Range`. +/// +/// # Numbered spans +/// Typst source files use _numbered spans._ Rather than using byte ranges, +/// which shift a lot as you type, each AST node gets a unique number. +/// +/// During editing, the span numbers stay mostly stable, even for nodes behind +/// an insertion. This is not true for simple ranges as they would shift. Spans +/// can be used as inputs to memoized functions without hurting cache +/// performance when text is inserted somewhere in the document other than the +/// end. /// /// Span ids are ordered in the syntax tree to enable quickly finding the node /// with some id: @@ -23,38 +39,37 @@ use crate::FileId; /// - The id of a node is always greater than any id in the subtrees of any left /// sibling and smaller than any id in the subtrees of any right sibling. /// -/// This type takes up 8 bytes and is null-optimized (i.e. `Option` also -/// takes 8 bytes). +/// # Raw range spans +/// Non Typst-files use raw ranges instead of numbered spans. The maximum +/// encodable value for start and end is 2^23. Larger values will be saturated. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub struct Span(NonZeroU64); impl Span { - /// The full range of numbers available for span numbering. - pub(super) const FULL: Range = 2..(1 << Self::BITS); + /// The full range of numbers available for source file span numbering. + pub(crate) const FULL: Range = 2..(1 << 47); /// The value reserved for the detached span. const DETACHED: u64 = 1; /// Data layout: - /// | 16 bits source id | 48 bits number | - const BITS: usize = 48; - - /// Create a new span from a source id and a unique number. + /// | 16 bits file id | 48 bits number | /// - /// Returns `None` if `number` is not contained in `FULL`. - pub(super) const fn new(id: FileId, number: u64) -> Option { - if number < Self::FULL.start || number >= Self::FULL.end { - return None; - } + /// Number = + /// - 1 means detached + /// - 2..2^47-1 is a numbered span + /// - 2^47..2^48-1 is a raw range span. To retrieve it, you must subtract + /// `RANGE_BASE` and then use shifting/bitmasking to extract the + /// components. + const NUMBER_BITS: usize = 48; + const FILE_ID_SHIFT: usize = Self::NUMBER_BITS; + const NUMBER_MASK: u64 = (1 << Self::NUMBER_BITS) - 1; + const RANGE_BASE: u64 = Self::FULL.end; + const RANGE_PART_BITS: usize = 23; + const RANGE_PART_SHIFT: usize = Self::RANGE_PART_BITS; + const RANGE_PART_MASK: u64 = (1 << Self::RANGE_PART_BITS) - 1; - let bits = ((id.into_raw() as u64) << Self::BITS) | number; - match NonZeroU64::new(bits) { - Some(v) => Some(Self(v)), - None => unreachable!(), - } - } - - /// Create a span that does not point into any source file. + /// Create a span that does not point into any file. pub const fn detached() -> Self { match NonZeroU64::new(Self::DETACHED) { Some(v) => Self(v), @@ -62,25 +77,26 @@ impl Span { } } - /// Whether the span is detached. - pub const fn is_detached(self) -> bool { - self.0.get() == Self::DETACHED - } - - /// The id of the source file the span points into. + /// Create a new span from a file id and a number. /// - /// Returns `None` if the span is detached. - pub const fn id(self) -> Option { - if self.is_detached() { + /// Returns `None` if `number` is not contained in `FULL`. + pub(crate) const fn from_number(id: FileId, number: u64) -> Option { + if number < Self::FULL.start || number >= Self::FULL.end { return None; } - let bits = (self.0.get() >> Self::BITS) as u16; - Some(FileId::from_raw(bits)) + Some(Self::pack(id, number)) } - /// The unique number of the span within its [`Source`](crate::Source). - pub const fn number(self) -> u64 { - self.0.get() & ((1 << Self::BITS) - 1) + /// Create a new span from a raw byte range instead of a span number. + /// + /// If one of the range's parts exceeds the maximum value (2^23), it is + /// saturated. + pub const fn from_range(id: FileId, range: Range) -> Self { + let max = 1 << Self::RANGE_PART_BITS; + let start = if range.start > max { max } else { range.start } as u64; + let end = if range.end > max { max } else { range.end } as u64; + let number = (start << Self::RANGE_PART_SHIFT) | end; + Self::pack(id, Self::RANGE_BASE + number) } /// Construct from a raw number. @@ -92,6 +108,51 @@ impl Span { Self(v) } + /// Pack a file ID and the low bits into a span. + const fn pack(id: FileId, low: u64) -> Self { + let bits = ((id.into_raw().get() as u64) << Self::FILE_ID_SHIFT) | low; + match NonZeroU64::new(bits) { + Some(v) => Self(v), + // The file ID is non-zero. + None => unreachable!(), + } + } + + /// Whether the span is detached. + pub const fn is_detached(self) -> bool { + self.0.get() == Self::DETACHED + } + + /// The id of the file the span points into. + /// + /// Returns `None` if the span is detached. + pub const fn id(self) -> Option { + // Detached span has only zero high bits, so it will trigger the + // `None` case. + match NonZeroU16::new((self.0.get() >> Self::FILE_ID_SHIFT) as u16) { + Some(v) => Some(FileId::from_raw(v)), + None => None, + } + } + + /// The unique number of the span within its [`Source`](crate::Source). + pub(crate) const fn number(self) -> u64 { + self.0.get() & Self::NUMBER_MASK + } + + /// Extract a raw byte range from the span, if it is a raw range span. + /// + /// Typically, you should use `WorldExt::range` instead. + pub const fn range(self) -> Option> { + let Some(number) = self.number().checked_sub(Self::RANGE_BASE) else { + return None; + }; + + let start = (number >> Self::RANGE_PART_SHIFT) as usize; + let end = (number & Self::RANGE_PART_MASK) as usize; + Some(start..end) + } + /// Extract the raw underlying number. pub const fn into_raw(self) -> NonZeroU64 { self.0 @@ -159,13 +220,40 @@ impl Debug for Spanned { #[cfg(test)] mod tests { + use std::num::NonZeroU16; + use std::ops::Range; + use crate::{FileId, Span}; #[test] - fn test_span_encoding() { - let id = FileId::from_raw(5); - let span = Span::new(id, 10).unwrap(); + fn test_span_detached() { + let span = Span::detached(); + assert!(span.is_detached()); + assert_eq!(span.id(), None); + assert_eq!(span.range(), None); + } + + #[test] + fn test_span_number_encoding() { + let id = FileId::from_raw(NonZeroU16::new(5).unwrap()); + let span = Span::from_number(id, 10).unwrap(); assert_eq!(span.id(), Some(id)); assert_eq!(span.number(), 10); + assert_eq!(span.range(), None); + } + + #[test] + fn test_span_range_encoding() { + let id = FileId::from_raw(NonZeroU16::new(u16::MAX).unwrap()); + let roundtrip = |range: Range| { + let span = Span::from_range(id, range.clone()); + assert_eq!(span.id(), Some(id)); + assert_eq!(span.range(), Some(range)); + }; + + roundtrip(0..0); + roundtrip(177..233); + roundtrip(0..8388607); + roundtrip(8388606..8388607); } } diff --git a/crates/typst-utils/src/pico.rs b/crates/typst-utils/src/pico.rs index dcab39b6d..7fcd33435 100644 --- a/crates/typst-utils/src/pico.rs +++ b/crates/typst-utils/src/pico.rs @@ -1,6 +1,7 @@ use std::cmp::Ordering; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; +use std::num::NonZeroU32; use std::sync::{LazyLock, RwLock}; /// The global string interner. @@ -21,7 +22,7 @@ struct Interner { /// unnecessarily. For this reason, the user should use the [`PicoStr::resolve`] /// method to get the underlying string, such that the lookup is done only once. #[derive(Copy, Clone, Eq, PartialEq, Hash)] -pub struct PicoStr(u32); +pub struct PicoStr(NonZeroU32); impl PicoStr { /// Creates a new interned string. @@ -38,7 +39,10 @@ impl PicoStr { // Create a new entry forever by leaking the string. PicoStr is only // used for strings that aren't created en masse, so it is okay. - let num = interner.from_id.len().try_into().expect("out of string ids"); + let num = u32::try_from(interner.from_id.len() + 1) + .and_then(NonZeroU32::try_from) + .expect("out of string ids"); + let id = Self(num); let string = Box::leak(string.to_string().into_boxed_str()); interner.to_id.insert(string, id); @@ -48,7 +52,7 @@ impl PicoStr { /// Resolves the interned string. pub fn resolve(&self) -> &'static str { - INTERNER.read().unwrap().from_id[self.0 as usize] + INTERNER.read().unwrap().from_id[(self.0.get() - 1) as usize] } }