mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Add support for raw range spans
This commit is contained in:
parent
5625914872
commit
525154a730
@ -4,14 +4,15 @@ use typst::layout::{Frame, FrameItem, Point, Position, Size};
|
||||
use typst::model::{Destination, Document, Url};
|
||||
use typst::syntax::{FileId, LinkedNode, Side, Source, Span, SyntaxKind};
|
||||
use typst::visualize::Geometry;
|
||||
use typst::WorldExt;
|
||||
|
||||
use crate::IdeWorld;
|
||||
|
||||
/// Where to [jump](jump_from_click) to.
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub enum Jump {
|
||||
/// Jump to a position in a source file.
|
||||
Source(FileId, usize),
|
||||
/// Jump to a position in a file.
|
||||
File(FileId, usize),
|
||||
/// Jump to an external URL.
|
||||
Url(Url),
|
||||
/// Jump to a point on a page.
|
||||
@ -21,9 +22,8 @@ pub enum Jump {
|
||||
impl Jump {
|
||||
fn from_span(world: &dyn IdeWorld, span: Span) -> Option<Self> {
|
||||
let id = span.id()?;
|
||||
let source = world.source(id).ok()?;
|
||||
let node = source.find(span)?;
|
||||
Some(Self::Source(id, node.offset()))
|
||||
let offset = world.range(span)?.start;
|
||||
Some(Self::File(id, offset))
|
||||
}
|
||||
}
|
||||
|
||||
@ -83,7 +83,7 @@ pub fn jump_from_click(
|
||||
} else {
|
||||
node.offset()
|
||||
};
|
||||
return Some(Jump::Source(source.id(), pos));
|
||||
return Some(Jump::File(source.id(), pos));
|
||||
}
|
||||
|
||||
pos.x += width;
|
||||
@ -194,7 +194,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn cursor(cursor: usize) -> Option<Jump> {
|
||||
Some(Jump::Source(TestWorld::main_id(), cursor))
|
||||
Some(Jump::File(TestWorld::main_id(), cursor))
|
||||
}
|
||||
|
||||
fn pos(page: usize, x: f64, y: f64) -> Option<Position> {
|
||||
|
@ -125,13 +125,13 @@ world_impl!(W for &W);
|
||||
pub trait WorldExt {
|
||||
/// Get the byte range for a span.
|
||||
///
|
||||
/// Returns `None` if the `Span` does not point into any source file.
|
||||
/// Returns `None` if the `Span` does not point into any file.
|
||||
fn range(&self, span: Span) -> Option<Range<usize>>;
|
||||
}
|
||||
|
||||
impl<T: World> WorldExt for T {
|
||||
impl<T: World + ?Sized> WorldExt for T {
|
||||
fn range(&self, span: Span) -> Option<Range<usize>> {
|
||||
self.source(span.id()?).ok()?.range(span)
|
||||
span.range().or_else(|| self.source(span.id()?).ok()?.range(span))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::num::NonZeroU16;
|
||||
use std::sync::{LazyLock, RwLock};
|
||||
|
||||
use crate::package::PackageSpec;
|
||||
@ -25,7 +26,7 @@ type Pair = &'static (Option<PackageSpec>, VirtualPath);
|
||||
///
|
||||
/// This type is globally interned and thus cheap to copy, compare, and hash.
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub struct FileId(u16);
|
||||
pub struct FileId(NonZeroU16);
|
||||
|
||||
impl FileId {
|
||||
/// Create a new interned file specification.
|
||||
@ -48,7 +49,10 @@ impl FileId {
|
||||
// Create a new entry forever by leaking the pair. We can't leak more
|
||||
// than 2^16 pair (and typically will leak a lot less), so its not a
|
||||
// big deal.
|
||||
let num = interner.from_id.len().try_into().expect("out of file ids");
|
||||
let num = u16::try_from(interner.from_id.len() + 1)
|
||||
.and_then(NonZeroU16::try_from)
|
||||
.expect("out of file ids");
|
||||
|
||||
let id = FileId(num);
|
||||
let leaked = Box::leak(Box::new(pair));
|
||||
interner.to_id.insert(leaked, id);
|
||||
@ -66,7 +70,9 @@ impl FileId {
|
||||
#[track_caller]
|
||||
pub fn new_fake(path: VirtualPath) -> Self {
|
||||
let mut interner = INTERNER.write().unwrap();
|
||||
let num = interner.from_id.len().try_into().expect("out of file ids");
|
||||
let num = u16::try_from(interner.from_id.len() + 1)
|
||||
.and_then(NonZeroU16::try_from)
|
||||
.expect("out of file ids");
|
||||
|
||||
let id = FileId(num);
|
||||
let leaked = Box::leak(Box::new((None, path)));
|
||||
@ -100,18 +106,18 @@ impl FileId {
|
||||
/// Should only be used with numbers retrieved via
|
||||
/// [`into_raw`](Self::into_raw). Misuse may results in panics, but no
|
||||
/// unsafety.
|
||||
pub const fn from_raw(v: u16) -> Self {
|
||||
pub const fn from_raw(v: NonZeroU16) -> Self {
|
||||
Self(v)
|
||||
}
|
||||
|
||||
/// Extract the raw underlying number.
|
||||
pub const fn into_raw(self) -> u16 {
|
||||
pub const fn into_raw(self) -> NonZeroU16 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Get the static pair.
|
||||
fn pair(&self) -> Pair {
|
||||
INTERNER.read().unwrap().from_id[usize::from(self.0)]
|
||||
INTERNER.read().unwrap().from_id[usize::from(self.0.get() - 1)]
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -241,7 +241,7 @@ impl SyntaxNode {
|
||||
return Err(Unnumberable);
|
||||
}
|
||||
|
||||
let mid = Span::new(id, (within.start + within.end) / 2).unwrap();
|
||||
let mid = Span::from_number(id, (within.start + within.end) / 2).unwrap();
|
||||
match &mut self.0 {
|
||||
Repr::Leaf(leaf) => leaf.span = mid,
|
||||
Repr::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within)?,
|
||||
@ -457,7 +457,7 @@ impl InnerNode {
|
||||
let mut start = within.start;
|
||||
if range.is_none() {
|
||||
let end = start + stride;
|
||||
self.span = Span::new(id, (start + end) / 2).unwrap();
|
||||
self.span = Span::from_number(id, (start + end) / 2).unwrap();
|
||||
self.upper = within.end;
|
||||
start = end;
|
||||
}
|
||||
|
@ -166,6 +166,8 @@ impl Source {
|
||||
/// Get the byte range for the given span in this file.
|
||||
///
|
||||
/// Returns `None` if the span does not point into this source file.
|
||||
///
|
||||
/// Typically, it's easier to use `WorldExt::range` instead.
|
||||
pub fn range(&self, span: Span) -> Option<Range<usize>> {
|
||||
Some(self.find(span)?.range())
|
||||
}
|
||||
|
@ -1,21 +1,37 @@
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::num::NonZeroU64;
|
||||
use std::num::{NonZeroU16, NonZeroU64};
|
||||
use std::ops::Range;
|
||||
|
||||
use ecow::EcoString;
|
||||
|
||||
use crate::FileId;
|
||||
|
||||
/// A unique identifier for a syntax node.
|
||||
/// Defines a range in a file.
|
||||
///
|
||||
/// This is used throughout the compiler to track which source section an error
|
||||
/// or element stems from. Can be [mapped back](crate::Source::range) to a byte
|
||||
/// range for user facing display.
|
||||
/// This is used throughout the compiler to track which source section an
|
||||
/// element stems from or an error applies to.
|
||||
///
|
||||
/// During editing, the span values stay mostly stable, even for nodes behind an
|
||||
/// insertion. This is not true for simple ranges as they would shift. Spans can
|
||||
/// be used as inputs to memoized functions without hurting cache performance
|
||||
/// when text is inserted somewhere in the document other than the end.
|
||||
/// - The [`.id()`](Self::id) function can be used to get the `FileId` for the
|
||||
/// span and, by extension, its file system path.
|
||||
/// - The `WorldExt::range` function can be used to map the span to a
|
||||
/// `Range<usize>`.
|
||||
///
|
||||
/// This type takes up 8 bytes and is copyable and null-optimized (i.e.
|
||||
/// `Option<Span>` also takes 8 bytes).
|
||||
///
|
||||
/// Spans come in two flavors: Numbered spans and raw range spans. The
|
||||
/// `WorldExt::range` function automatically handles both cases, yielding a
|
||||
/// `Range<usize>`.
|
||||
///
|
||||
/// # Numbered spans
|
||||
/// Typst source files use _numbered spans._ Rather than using byte ranges,
|
||||
/// which shift a lot as you type, each AST node gets a unique number.
|
||||
///
|
||||
/// During editing, the span numbers stay mostly stable, even for nodes behind
|
||||
/// an insertion. This is not true for simple ranges as they would shift. Spans
|
||||
/// can be used as inputs to memoized functions without hurting cache
|
||||
/// performance when text is inserted somewhere in the document other than the
|
||||
/// end.
|
||||
///
|
||||
/// Span ids are ordered in the syntax tree to enable quickly finding the node
|
||||
/// with some id:
|
||||
@ -23,38 +39,37 @@ use crate::FileId;
|
||||
/// - The id of a node is always greater than any id in the subtrees of any left
|
||||
/// sibling and smaller than any id in the subtrees of any right sibling.
|
||||
///
|
||||
/// This type takes up 8 bytes and is null-optimized (i.e. `Option<Span>` also
|
||||
/// takes 8 bytes).
|
||||
/// # Raw range spans
|
||||
/// Non Typst-files use raw ranges instead of numbered spans. The maximum
|
||||
/// encodable value for start and end is 2^23. Larger values will be saturated.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct Span(NonZeroU64);
|
||||
|
||||
impl Span {
|
||||
/// The full range of numbers available for span numbering.
|
||||
pub(super) const FULL: Range<u64> = 2..(1 << Self::BITS);
|
||||
/// The full range of numbers available for source file span numbering.
|
||||
pub(crate) const FULL: Range<u64> = 2..(1 << 47);
|
||||
|
||||
/// The value reserved for the detached span.
|
||||
const DETACHED: u64 = 1;
|
||||
|
||||
/// Data layout:
|
||||
/// | 16 bits source id | 48 bits number |
|
||||
const BITS: usize = 48;
|
||||
|
||||
/// Create a new span from a source id and a unique number.
|
||||
/// | 16 bits file id | 48 bits number |
|
||||
///
|
||||
/// Returns `None` if `number` is not contained in `FULL`.
|
||||
pub(super) const fn new(id: FileId, number: u64) -> Option<Self> {
|
||||
if number < Self::FULL.start || number >= Self::FULL.end {
|
||||
return None;
|
||||
}
|
||||
/// Number =
|
||||
/// - 1 means detached
|
||||
/// - 2..2^47-1 is a numbered span
|
||||
/// - 2^47..2^48-1 is a raw range span. To retrieve it, you must subtract
|
||||
/// `RANGE_BASE` and then use shifting/bitmasking to extract the
|
||||
/// components.
|
||||
const NUMBER_BITS: usize = 48;
|
||||
const FILE_ID_SHIFT: usize = Self::NUMBER_BITS;
|
||||
const NUMBER_MASK: u64 = (1 << Self::NUMBER_BITS) - 1;
|
||||
const RANGE_BASE: u64 = Self::FULL.end;
|
||||
const RANGE_PART_BITS: usize = 23;
|
||||
const RANGE_PART_SHIFT: usize = Self::RANGE_PART_BITS;
|
||||
const RANGE_PART_MASK: u64 = (1 << Self::RANGE_PART_BITS) - 1;
|
||||
|
||||
let bits = ((id.into_raw() as u64) << Self::BITS) | number;
|
||||
match NonZeroU64::new(bits) {
|
||||
Some(v) => Some(Self(v)),
|
||||
None => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a span that does not point into any source file.
|
||||
/// Create a span that does not point into any file.
|
||||
pub const fn detached() -> Self {
|
||||
match NonZeroU64::new(Self::DETACHED) {
|
||||
Some(v) => Self(v),
|
||||
@ -62,25 +77,26 @@ impl Span {
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the span is detached.
|
||||
pub const fn is_detached(self) -> bool {
|
||||
self.0.get() == Self::DETACHED
|
||||
}
|
||||
|
||||
/// The id of the source file the span points into.
|
||||
/// Create a new span from a file id and a number.
|
||||
///
|
||||
/// Returns `None` if the span is detached.
|
||||
pub const fn id(self) -> Option<FileId> {
|
||||
if self.is_detached() {
|
||||
/// Returns `None` if `number` is not contained in `FULL`.
|
||||
pub(crate) const fn from_number(id: FileId, number: u64) -> Option<Self> {
|
||||
if number < Self::FULL.start || number >= Self::FULL.end {
|
||||
return None;
|
||||
}
|
||||
let bits = (self.0.get() >> Self::BITS) as u16;
|
||||
Some(FileId::from_raw(bits))
|
||||
Some(Self::pack(id, number))
|
||||
}
|
||||
|
||||
/// The unique number of the span within its [`Source`](crate::Source).
|
||||
pub const fn number(self) -> u64 {
|
||||
self.0.get() & ((1 << Self::BITS) - 1)
|
||||
/// Create a new span from a raw byte range instead of a span number.
|
||||
///
|
||||
/// If one of the range's parts exceeds the maximum value (2^23), it is
|
||||
/// saturated.
|
||||
pub const fn from_range(id: FileId, range: Range<usize>) -> Self {
|
||||
let max = 1 << Self::RANGE_PART_BITS;
|
||||
let start = if range.start > max { max } else { range.start } as u64;
|
||||
let end = if range.end > max { max } else { range.end } as u64;
|
||||
let number = (start << Self::RANGE_PART_SHIFT) | end;
|
||||
Self::pack(id, Self::RANGE_BASE + number)
|
||||
}
|
||||
|
||||
/// Construct from a raw number.
|
||||
@ -92,6 +108,51 @@ impl Span {
|
||||
Self(v)
|
||||
}
|
||||
|
||||
/// Pack a file ID and the low bits into a span.
|
||||
const fn pack(id: FileId, low: u64) -> Self {
|
||||
let bits = ((id.into_raw().get() as u64) << Self::FILE_ID_SHIFT) | low;
|
||||
match NonZeroU64::new(bits) {
|
||||
Some(v) => Self(v),
|
||||
// The file ID is non-zero.
|
||||
None => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the span is detached.
|
||||
pub const fn is_detached(self) -> bool {
|
||||
self.0.get() == Self::DETACHED
|
||||
}
|
||||
|
||||
/// The id of the file the span points into.
|
||||
///
|
||||
/// Returns `None` if the span is detached.
|
||||
pub const fn id(self) -> Option<FileId> {
|
||||
// Detached span has only zero high bits, so it will trigger the
|
||||
// `None` case.
|
||||
match NonZeroU16::new((self.0.get() >> Self::FILE_ID_SHIFT) as u16) {
|
||||
Some(v) => Some(FileId::from_raw(v)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The unique number of the span within its [`Source`](crate::Source).
|
||||
pub(crate) const fn number(self) -> u64 {
|
||||
self.0.get() & Self::NUMBER_MASK
|
||||
}
|
||||
|
||||
/// Extract a raw byte range from the span, if it is a raw range span.
|
||||
///
|
||||
/// Typically, you should use `WorldExt::range` instead.
|
||||
pub const fn range(self) -> Option<Range<usize>> {
|
||||
let Some(number) = self.number().checked_sub(Self::RANGE_BASE) else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let start = (number >> Self::RANGE_PART_SHIFT) as usize;
|
||||
let end = (number & Self::RANGE_PART_MASK) as usize;
|
||||
Some(start..end)
|
||||
}
|
||||
|
||||
/// Extract the raw underlying number.
|
||||
pub const fn into_raw(self) -> NonZeroU64 {
|
||||
self.0
|
||||
@ -159,13 +220,40 @@ impl<T: Debug> Debug for Spanned<T> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::num::NonZeroU16;
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::{FileId, Span};
|
||||
|
||||
#[test]
|
||||
fn test_span_encoding() {
|
||||
let id = FileId::from_raw(5);
|
||||
let span = Span::new(id, 10).unwrap();
|
||||
fn test_span_detached() {
|
||||
let span = Span::detached();
|
||||
assert!(span.is_detached());
|
||||
assert_eq!(span.id(), None);
|
||||
assert_eq!(span.range(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_span_number_encoding() {
|
||||
let id = FileId::from_raw(NonZeroU16::new(5).unwrap());
|
||||
let span = Span::from_number(id, 10).unwrap();
|
||||
assert_eq!(span.id(), Some(id));
|
||||
assert_eq!(span.number(), 10);
|
||||
assert_eq!(span.range(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_span_range_encoding() {
|
||||
let id = FileId::from_raw(NonZeroU16::new(u16::MAX).unwrap());
|
||||
let roundtrip = |range: Range<usize>| {
|
||||
let span = Span::from_range(id, range.clone());
|
||||
assert_eq!(span.id(), Some(id));
|
||||
assert_eq!(span.range(), Some(range));
|
||||
};
|
||||
|
||||
roundtrip(0..0);
|
||||
roundtrip(177..233);
|
||||
roundtrip(0..8388607);
|
||||
roundtrip(8388606..8388607);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::num::NonZeroU32;
|
||||
use std::sync::{LazyLock, RwLock};
|
||||
|
||||
/// The global string interner.
|
||||
@ -21,7 +22,7 @@ struct Interner {
|
||||
/// unnecessarily. For this reason, the user should use the [`PicoStr::resolve`]
|
||||
/// method to get the underlying string, such that the lookup is done only once.
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct PicoStr(u32);
|
||||
pub struct PicoStr(NonZeroU32);
|
||||
|
||||
impl PicoStr {
|
||||
/// Creates a new interned string.
|
||||
@ -38,7 +39,10 @@ impl PicoStr {
|
||||
|
||||
// Create a new entry forever by leaking the string. PicoStr is only
|
||||
// used for strings that aren't created en masse, so it is okay.
|
||||
let num = interner.from_id.len().try_into().expect("out of string ids");
|
||||
let num = u32::try_from(interner.from_id.len() + 1)
|
||||
.and_then(NonZeroU32::try_from)
|
||||
.expect("out of string ids");
|
||||
|
||||
let id = Self(num);
|
||||
let string = Box::leak(string.to_string().into_boxed_str());
|
||||
interner.to_id.insert(string, id);
|
||||
@ -48,7 +52,7 @@ impl PicoStr {
|
||||
|
||||
/// Resolves the interned string.
|
||||
pub fn resolve(&self) -> &'static str {
|
||||
INTERNER.read().unwrap().from_id[self.0 as usize]
|
||||
INTERNER.read().unwrap().from_id[(self.0.get() - 1) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user