From 94b375ce550af1e18942c10c15d92163881a3213 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 1 Jun 2022 13:49:02 +0200 Subject: [PATCH] Incremental renumbering --- src/parse/incremental.rs | 14 +- src/source.rs | 8 +- src/syntax/mod.rs | 286 +++++++++++++++++++++++++++------------ src/syntax/span.rs | 31 ++++- tests/typeset.rs | 46 ++++--- 5 files changed, 264 insertions(+), 121 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 3cafc0141..59dbebebb 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::sync::Arc; -use crate::syntax::{InnerNode, NodeKind, SyntaxNode}; +use crate::syntax::{InnerNode, NodeKind, Span, SyntaxNode}; use super::{ is_newline, parse, reparse_code_block, reparse_content_block, reparse_markup_elements, @@ -26,7 +26,9 @@ pub fn reparse( } } + let id = root.span().source(); *root = parse(src); + root.numberize(id, Span::FULL).unwrap(); 0 .. src.len() } @@ -139,7 +141,7 @@ impl Reparser<'_> { if let SearchState::Contained(pos) = search { let child = &mut node.children_mut()[pos.idx]; let prev_len = child.len(); - let prev_count = child.count(); + let prev_descendants = child.descendants(); if let Some(range) = match child { SyntaxNode::Inner(node) => { @@ -148,8 +150,8 @@ impl Reparser<'_> { SyntaxNode::Leaf(_) => None, } { let new_len = child.len(); - let new_count = child.count(); - node.update_parent(prev_len, new_len, prev_count, new_count); + let new_descendants = child.descendants(); + node.update_parent(prev_len, new_len, prev_descendants, new_descendants); return Some(range); } @@ -259,7 +261,9 @@ impl Reparser<'_> { return None; } - node.replace_children(superseded_start .. superseded_start + amount, newborns); + node.replace_children(superseded_start .. superseded_start + amount, newborns) + .ok()?; + Some(newborn_span) } } diff --git a/src/source.rs b/src/source.rs index 87e962972..61abcd921 100644 --- a/src/source.rs +++ b/src/source.rs @@ -169,7 +169,7 @@ impl SourceFile { lines.extend(Line::iter(0, 0, &src)); let mut root = parse(&src); - root.number(id, Span::MIN_NUMBER); + root.numberize(id, Span::FULL).unwrap(); Self { id, @@ -243,7 +243,7 @@ impl SourceFile { self.lines = vec![Line { byte_idx: 0, utf16_idx: 0 }]; self.lines.extend(Line::iter(0, 0, &self.src)); self.root = parse(&self.src); - self.root.number(self.id(), Span::MIN_NUMBER); + self.root.numberize(self.id(), Span::FULL).unwrap(); self.rev = self.rev.wrapping_add(1); } @@ -277,9 +277,7 @@ impl SourceFile { )); // Incrementally reparse the replaced range. - let range = reparse(&mut self.root, &self.src, replace, with.len()); - self.root.number(self.id(), Span::MIN_NUMBER); - range + reparse(&mut self.root, &self.src, replace, with.len()) } /// Get the length of the file in bytes. diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 0791a761b..92d008787 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -30,8 +30,8 @@ impl SyntaxNode { /// Returns the metadata of the node. pub fn data(&self) -> &NodeData { match self { - SyntaxNode::Inner(inner) => &inner.data, - SyntaxNode::Leaf(leaf) => leaf, + Self::Inner(inner) => &inner.data, + Self::Leaf(leaf) => leaf, } } @@ -46,10 +46,10 @@ impl SyntaxNode { } /// The number of descendants, including the node itself. - pub fn count(&self) -> usize { + pub fn descendants(&self) -> usize { match self { - SyntaxNode::Inner(inner) => inner.count(), - SyntaxNode::Leaf(_) => 1, + Self::Inner(inner) => inner.descendants(), + Self::Leaf(_) => 1, } } @@ -58,35 +58,11 @@ impl SyntaxNode { self.data().span() } - /// If the span points into this node, convert it to a byte range. - pub fn range(&self, span: Span, offset: usize) -> Option> { - match self { - SyntaxNode::Inner(inner) => inner.range(span, offset), - SyntaxNode::Leaf(leaf) => { - (span == leaf.span).then(|| offset .. offset + self.len()) - } - } - } - /// The node's children. pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { match self { - SyntaxNode::Inner(inner) => inner.children(), - SyntaxNode::Leaf(_) => [].iter(), - } - } - - /// Returns all leaf descendants of this node (may include itself). - /// - /// This method is slow and only intended for testing. - pub fn leafs(&self) -> Vec { - if match self { - SyntaxNode::Inner(inner) => inner.children().len() == 0, - SyntaxNode::Leaf(_) => true, - } { - vec![self.clone()] - } else { - self.children().flat_map(Self::leafs).collect() + Self::Inner(inner) => inner.children(), + Self::Leaf(_) => [].iter(), } } @@ -146,19 +122,51 @@ impl SyntaxNode { } } - /// Assign spans to each node. - pub fn number(&mut self, id: SourceId, from: u64) { - match self { - SyntaxNode::Inner(inner) => Arc::make_mut(inner).number(id, from), - SyntaxNode::Leaf(leaf) => leaf.number(id, from), - } - } - /// Set a synthetic node id for the node and all its descendants. pub fn synthesize(&mut self, span: Span) { match self { - SyntaxNode::Inner(inner) => Arc::make_mut(inner).synthesize(span), - SyntaxNode::Leaf(leaf) => leaf.synthesize(span), + Self::Inner(inner) => Arc::make_mut(inner).synthesize(span), + Self::Leaf(leaf) => leaf.synthesize(span), + } + } + + /// Assign spans to each node. + pub fn numberize(&mut self, id: SourceId, within: Range) -> NumberingResult { + match self { + Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within), + Self::Leaf(leaf) => leaf.numberize(id, within), + } + } + + /// The upper bound of assigned numbers in this subtree. + pub fn upper(&self) -> u64 { + match self { + Self::Inner(inner) => inner.upper(), + Self::Leaf(leaf) => leaf.span().number() + 1, + } + } + + /// If the span points into this node, convert it to a byte range. + pub fn range(&self, span: Span, offset: usize) -> Option> { + match self { + Self::Inner(inner) => inner.range(span, offset), + Self::Leaf(leaf) => { + (span == leaf.span).then(|| offset .. offset + self.len()) + } + } + } + + /// Returns all leaf descendants of this node (may include itself). + /// + /// This method is slow and only intended for testing. + pub fn leafs(&self) -> Vec { + if match self { + Self::Inner(inner) => inner.children.is_empty(), + Self::Leaf(_) => true, + } { + vec![self.clone()] + } else { + self.children().flat_map(Self::leafs).collect() } } } @@ -179,14 +187,16 @@ impl Debug for SyntaxNode { } /// An inner node in the untyped syntax tree. -#[derive(Clone, PartialEq, Hash)] +#[derive(Clone, Hash)] pub struct InnerNode { /// Node metadata. data: NodeData, /// The number of nodes in the whole subtree, including this node. - count: usize, + descendants: usize, /// Whether this node or any of its children are erroneous. erroneous: bool, + /// The upper bound of this node's numbering range. + upper: u64, /// This node's children, losslessly make up this node. children: Vec, } @@ -200,19 +210,20 @@ impl InnerNode { /// Creates a new node with the given kind and children. pub fn with_children(kind: NodeKind, children: Vec) -> Self { let mut len = 0; - let mut count = 1; + let mut descendants = 1; let mut erroneous = kind.is_error(); for child in &children { len += child.len(); - count += child.count(); + descendants += child.descendants(); erroneous |= child.erroneous(); } Self { data: NodeData::new(kind, len), - count, + descendants, erroneous, + upper: 0, children, } } @@ -238,8 +249,69 @@ impl InnerNode { } /// The number of descendants, including the node itself. - pub fn count(&self) -> usize { - self.count + pub fn descendants(&self) -> usize { + self.descendants + } + + /// The node's children. + pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { + self.children.iter() + } + + /// Set a synthetic node id for the node and all its descendants. + pub fn synthesize(&mut self, span: Span) { + self.data.synthesize(span); + for child in &mut self.children { + child.synthesize(span); + } + } + + /// Assign spans to this subtree or some a range of children. + pub fn numberize( + &mut self, + id: SourceId, + range: Option>, + within: Range, + ) -> NumberingResult { + let descendants = match &range { + Some(range) if range.is_empty() => return Ok(()), + Some(range) => self.children[range.clone()] + .iter() + .map(SyntaxNode::descendants) + .sum::(), + None => self.descendants, + }; + + let space = within.end - within.start; + let mut stride = space / (2 * descendants as u64); + if stride == 0 { + stride = space / self.descendants as u64; + if stride == 0 { + return Err(Unnumberable); + } + } + + let mut start = within.start; + if range.is_none() { + let end = start + stride; + self.data.numberize(id, start .. end)?; + self.upper = within.end; + start = end; + } + + let len = self.children.len(); + for child in &mut self.children[range.unwrap_or(0 .. len)] { + let end = start + child.descendants() as u64 * stride; + child.numberize(id, start .. end)?; + start = end; + } + + Ok(()) + } + + /// The maximum assigned number in this subtree. + pub fn upper(&self) -> u64 { + self.upper } /// If the span points into this node, convert it to a byte range. @@ -276,41 +348,19 @@ impl InnerNode { None } - /// The node's children. - pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { - self.children.iter() - } - - /// Assign spans to each node. - pub fn number(&mut self, id: SourceId, mut from: u64) { - self.data.number(id, from); - from += 1; - - for child in &mut self.children { - child.number(id, from); - from += child.count() as u64; - } - } - - /// Set a synthetic node id for the node and all its descendants. - pub fn synthesize(&mut self, span: Span) { - self.data.synthesize(span); - for child in &mut self.children { - child.synthesize(span); - } - } - /// The node's children, mutably. pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] { &mut self.children } /// Replaces a range of children with some replacement. + /// + /// May have mutated the children if it returns `Err(_)`. pub(crate) fn replace_children( &mut self, - range: Range, + mut range: Range, replacement: Vec, - ) { + ) -> NumberingResult { let superseded = &self.children[range.clone()]; // Compute the new byte length. @@ -318,10 +368,10 @@ impl InnerNode { + replacement.iter().map(SyntaxNode::len).sum::() - superseded.iter().map(SyntaxNode::len).sum::(); - // Compute the new descendant count. - self.count = self.count - + replacement.iter().map(SyntaxNode::count).sum::() - - superseded.iter().map(SyntaxNode::count).sum::(); + // Compute the new number of descendants. + self.descendants = self.descendants + + replacement.iter().map(SyntaxNode::descendants).sum::() + - superseded.iter().map(SyntaxNode::descendants).sum::(); // Determine whether we're still erroneous after the replacement. That's // the case if @@ -329,11 +379,55 @@ impl InnerNode { // - or if we were erroneous before due to a non-superseded node. self.erroneous = replacement.iter().any(SyntaxNode::erroneous) || (self.erroneous - && (self.children[.. range.start].iter().any(SyntaxNode::erroneous) - || self.children[range.end ..].iter().any(SyntaxNode::erroneous))); + && (self.children[.. range.start].iter().any(SyntaxNode::erroneous)) + || self.children[range.end ..].iter().any(SyntaxNode::erroneous)); // Perform the replacement. - self.children.splice(range, replacement); + let replacement_count = replacement.len(); + self.children.splice(range.clone(), replacement); + range.end = range.start + replacement_count; + + // Renumber the new children. Retries until it works taking + // exponentially more children into account. + let max_left = range.start; + let max_right = self.children.len() - range.end; + let mut left = 0; + let mut right = 0; + loop { + let renumber = range.start - left .. range.end + right; + + // The minimum assignable number is the upper bound of the node + // right before the to-be-renumbered children (or the number after + // this inner node's span if renumbering starts at the first child). + let start_number = renumber + .start + .checked_sub(1) + .and_then(|i| self.children.get(i)) + .map_or(self.span().number() + 1, |child| child.upper()); + + // The upper bound of the is the span of the first child after the to-be-renumbered children + // or this node's upper bound. + let end_number = self + .children + .get(renumber.end) + .map_or(self.upper(), |next| next.span().number()); + + // Try to renumber within the number range. + let within = start_number .. end_number; + let id = self.span().source(); + if self.numberize(id, Some(renumber), within).is_ok() { + return Ok(()); + } + + // Doesn't even work with all children, so we give up. + if left == max_left && right == max_right { + return Err(Unnumberable); + } + + // Exponential expansion to both sides. + left = (left + 1).next_power_of_two().min(max_left); + right = (right + 1).next_power_of_two().min(max_right); + } } /// Update the length of this node given the old and new length of @@ -342,11 +436,11 @@ impl InnerNode { &mut self, prev_len: usize, new_len: usize, - prev_count: usize, - new_count: usize, + prev_descendants: usize, + new_descendants: usize, ) { self.data.len = self.data.len + new_len - prev_len; - self.count = self.count + new_count - prev_count; + self.descendants = self.descendants + new_descendants - prev_descendants; self.erroneous = self.children.iter().any(SyntaxNode::erroneous); } } @@ -374,6 +468,15 @@ impl Debug for InnerNode { } } +impl PartialEq for InnerNode { + fn eq(&self, other: &Self) -> bool { + self.data == other.data + && self.descendants == other.descendants + && self.erroneous == other.erroneous + && self.children == other.children + } +} + /// Data shared between inner and leaf nodes. #[derive(Clone, Hash)] pub struct NodeData { @@ -407,15 +510,20 @@ impl NodeData { self.span } - /// Assign spans to each node. - pub fn number(&mut self, id: SourceId, from: u64) { - self.span = Span::new(id, from); - } - /// Set a synthetic span for the node. pub fn synthesize(&mut self, span: Span) { self.span = span; } + + /// Assign a span to the node. + pub fn numberize(&mut self, id: SourceId, within: Range) -> NumberingResult { + if within.start < within.end { + self.span = Span::new(id, (within.start + within.end) / 2); + Ok(()) + } else { + Err(Unnumberable) + } + } } impl From for SyntaxNode { diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 496d699ca..d58ee3261 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,5 +1,6 @@ -use std::fmt::{self, Debug, Formatter}; +use std::fmt::{self, Debug, Display, Formatter}; use std::num::NonZeroU64; +use std::ops::Range; use crate::syntax::SourceId; @@ -61,12 +62,15 @@ impl Span { // Number of bits for and minimum and maximum numbers assigned to nodes. const BITS: usize = 48; const DETACHED: u64 = 1; - pub(crate) const MIN_NUMBER: u64 = 2; - pub(crate) const MAX_NUMBER: u64 = (1 << Self::BITS) - 1; + const MIN: u64 = 2; + const MAX: u64 = (1 << Self::BITS) - 1; + + // The root numbering range. + pub const FULL: Range = Self::MIN .. Self::MAX + 1; /// Create a new span from a source id and a unique number. - pub const fn new(id: SourceId, number: u64) -> Self { - assert!(number >= Self::MIN_NUMBER && number <= Self::MAX_NUMBER); + pub fn new(id: SourceId, number: u64) -> Self { + assert!(number >= Self::MIN && number <= Self::MAX, "{number}"); let bits = ((id.into_raw() as u64) << Self::BITS) | number; Self(convert(bits)) } @@ -83,7 +87,7 @@ impl Span { /// The unique number of the span within the source file. pub const fn number(self) -> u64 { - self.0.get() & Self::MAX_NUMBER + self.0.get() & ((1 << Self::BITS) - 1) } } @@ -94,3 +98,18 @@ const fn convert(v: u64) -> NonZeroU64 { None => unreachable!(), } } + +/// Result of numbering a node within an interval. +pub type NumberingResult = Result<(), Unnumberable>; + +/// Indicates that a node cannot be numbered within a given interval. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct Unnumberable; + +impl Display for Unnumberable { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.pad("cannot number within this interval") + } +} + +impl std::error::Error for Unnumberable {} diff --git a/tests/typeset.rs b/tests/typeset.rs index 6250ac77b..614449a16 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -295,11 +295,10 @@ fn test_part( println!("Syntax Tree: {:#?}", source.root()) } - test_spans(source.root()); - let (local_compare_ref, mut ref_errors) = parse_metadata(&source); let compare_ref = local_compare_ref.unwrap_or(compare_ref); + ok &= test_spans(source.root()); ok &= test_reparse(ctx.sources.get(id).src(), i, rng); let (mut frames, errors) = match typst::typeset(ctx, id) { @@ -461,21 +460,23 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { let incr_root = incr_source.root(); let ref_source = SourceFile::detached(edited_src); let ref_root = ref_source.root(); - let same = incr_root == ref_root; - if !same { + let mut ok = incr_root == ref_root; + if !ok { println!( " Subtest {i} reparse differs from clean parse when inserting '{with}' at {}-{} ❌\n", replace.start, replace.end, ); println!(" Expected reference tree:\n{ref_root:#?}\n"); println!(" Found incremental tree:\n{incr_root:#?}"); - println!("Full source ({}):\n\"{edited_src:?}\"", edited_src.len()); + println!( + " Full source ({}):\n\"{edited_src:?}\"", + edited_src.len() + ); } - test_spans(ref_root); - test_spans(incr_root); - - same + ok &= test_spans(ref_root); + ok &= test_spans(incr_root); + ok }; let mut pick = |range: Range| { @@ -506,18 +507,31 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { } /// Ensure that all spans are properly ordered (and therefore unique). -fn test_spans(root: &SyntaxNode) { - test_spans_impl(root, 0 .. u64::MAX); +#[track_caller] +fn test_spans(root: &SyntaxNode) -> bool { + test_spans_impl(root, 0 .. u64::MAX) } -fn test_spans_impl(root: &SyntaxNode, within: Range) { - assert!(within.contains(&root.span().number()), "wrong span order"); - let start = root.span().number() + 1; - let mut children = root.children().peekable(); +#[track_caller] +fn test_spans_impl(node: &SyntaxNode, within: Range) -> bool { + if !within.contains(&node.span().number()) { + eprintln!(" Node: {node:#?}"); + eprintln!( + " Wrong span order: {} not in {within:?} ❌", + node.span().number(), + ); + } + + let start = node.span().number() + 1; + let mut children = node.children().peekable(); while let Some(child) = children.next() { let end = children.peek().map_or(within.end, |next| next.span().number()); - test_spans_impl(child, start .. end); + if !test_spans_impl(child, start .. end) { + return false; + } } + + true } /// Draw all frames into one image with padding in between.