Incremental renumbering

This commit is contained in:
Laurenz 2022-06-01 13:49:02 +02:00
parent c56a8c41f0
commit 94b375ce55
5 changed files with 264 additions and 121 deletions

View File

@ -1,7 +1,7 @@
use std::ops::Range;
use std::sync::Arc;
use crate::syntax::{InnerNode, NodeKind, SyntaxNode};
use crate::syntax::{InnerNode, NodeKind, Span, SyntaxNode};
use super::{
is_newline, parse, reparse_code_block, reparse_content_block, reparse_markup_elements,
@ -26,7 +26,9 @@ pub fn reparse(
}
}
let id = root.span().source();
*root = parse(src);
root.numberize(id, Span::FULL).unwrap();
0 .. src.len()
}
@ -139,7 +141,7 @@ impl Reparser<'_> {
if let SearchState::Contained(pos) = search {
let child = &mut node.children_mut()[pos.idx];
let prev_len = child.len();
let prev_count = child.count();
let prev_descendants = child.descendants();
if let Some(range) = match child {
SyntaxNode::Inner(node) => {
@ -148,8 +150,8 @@ impl Reparser<'_> {
SyntaxNode::Leaf(_) => None,
} {
let new_len = child.len();
let new_count = child.count();
node.update_parent(prev_len, new_len, prev_count, new_count);
let new_descendants = child.descendants();
node.update_parent(prev_len, new_len, prev_descendants, new_descendants);
return Some(range);
}
@ -259,7 +261,9 @@ impl Reparser<'_> {
return None;
}
node.replace_children(superseded_start .. superseded_start + amount, newborns);
node.replace_children(superseded_start .. superseded_start + amount, newborns)
.ok()?;
Some(newborn_span)
}
}

View File

@ -169,7 +169,7 @@ impl SourceFile {
lines.extend(Line::iter(0, 0, &src));
let mut root = parse(&src);
root.number(id, Span::MIN_NUMBER);
root.numberize(id, Span::FULL).unwrap();
Self {
id,
@ -243,7 +243,7 @@ impl SourceFile {
self.lines = vec![Line { byte_idx: 0, utf16_idx: 0 }];
self.lines.extend(Line::iter(0, 0, &self.src));
self.root = parse(&self.src);
self.root.number(self.id(), Span::MIN_NUMBER);
self.root.numberize(self.id(), Span::FULL).unwrap();
self.rev = self.rev.wrapping_add(1);
}
@ -277,9 +277,7 @@ impl SourceFile {
));
// Incrementally reparse the replaced range.
let range = reparse(&mut self.root, &self.src, replace, with.len());
self.root.number(self.id(), Span::MIN_NUMBER);
range
reparse(&mut self.root, &self.src, replace, with.len())
}
/// Get the length of the file in bytes.

View File

@ -30,8 +30,8 @@ impl SyntaxNode {
/// Returns the metadata of the node.
pub fn data(&self) -> &NodeData {
match self {
SyntaxNode::Inner(inner) => &inner.data,
SyntaxNode::Leaf(leaf) => leaf,
Self::Inner(inner) => &inner.data,
Self::Leaf(leaf) => leaf,
}
}
@ -46,10 +46,10 @@ impl SyntaxNode {
}
/// The number of descendants, including the node itself.
pub fn count(&self) -> usize {
pub fn descendants(&self) -> usize {
match self {
SyntaxNode::Inner(inner) => inner.count(),
SyntaxNode::Leaf(_) => 1,
Self::Inner(inner) => inner.descendants(),
Self::Leaf(_) => 1,
}
}
@ -58,35 +58,11 @@ impl SyntaxNode {
self.data().span()
}
/// If the span points into this node, convert it to a byte range.
pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
match self {
SyntaxNode::Inner(inner) => inner.range(span, offset),
SyntaxNode::Leaf(leaf) => {
(span == leaf.span).then(|| offset .. offset + self.len())
}
}
}
/// The node's children.
pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
match self {
SyntaxNode::Inner(inner) => inner.children(),
SyntaxNode::Leaf(_) => [].iter(),
}
}
/// Returns all leaf descendants of this node (may include itself).
///
/// This method is slow and only intended for testing.
pub fn leafs(&self) -> Vec<Self> {
if match self {
SyntaxNode::Inner(inner) => inner.children().len() == 0,
SyntaxNode::Leaf(_) => true,
} {
vec![self.clone()]
} else {
self.children().flat_map(Self::leafs).collect()
Self::Inner(inner) => inner.children(),
Self::Leaf(_) => [].iter(),
}
}
@ -146,19 +122,51 @@ impl SyntaxNode {
}
}
/// Assign spans to each node.
pub fn number(&mut self, id: SourceId, from: u64) {
match self {
SyntaxNode::Inner(inner) => Arc::make_mut(inner).number(id, from),
SyntaxNode::Leaf(leaf) => leaf.number(id, from),
}
}
/// Set a synthetic node id for the node and all its descendants.
pub fn synthesize(&mut self, span: Span) {
match self {
SyntaxNode::Inner(inner) => Arc::make_mut(inner).synthesize(span),
SyntaxNode::Leaf(leaf) => leaf.synthesize(span),
Self::Inner(inner) => Arc::make_mut(inner).synthesize(span),
Self::Leaf(leaf) => leaf.synthesize(span),
}
}
/// Assign spans to each node.
pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
match self {
Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within),
Self::Leaf(leaf) => leaf.numberize(id, within),
}
}
/// The upper bound of assigned numbers in this subtree.
pub fn upper(&self) -> u64 {
match self {
Self::Inner(inner) => inner.upper(),
Self::Leaf(leaf) => leaf.span().number() + 1,
}
}
/// If the span points into this node, convert it to a byte range.
pub fn range(&self, span: Span, offset: usize) -> Option<Range<usize>> {
match self {
Self::Inner(inner) => inner.range(span, offset),
Self::Leaf(leaf) => {
(span == leaf.span).then(|| offset .. offset + self.len())
}
}
}
/// Returns all leaf descendants of this node (may include itself).
///
/// This method is slow and only intended for testing.
pub fn leafs(&self) -> Vec<Self> {
if match self {
Self::Inner(inner) => inner.children.is_empty(),
Self::Leaf(_) => true,
} {
vec![self.clone()]
} else {
self.children().flat_map(Self::leafs).collect()
}
}
}
@ -179,14 +187,16 @@ impl Debug for SyntaxNode {
}
/// An inner node in the untyped syntax tree.
#[derive(Clone, PartialEq, Hash)]
#[derive(Clone, Hash)]
pub struct InnerNode {
/// Node metadata.
data: NodeData,
/// The number of nodes in the whole subtree, including this node.
count: usize,
descendants: usize,
/// Whether this node or any of its children are erroneous.
erroneous: bool,
/// The upper bound of this node's numbering range.
upper: u64,
/// This node's children, losslessly make up this node.
children: Vec<SyntaxNode>,
}
@ -200,19 +210,20 @@ impl InnerNode {
/// Creates a new node with the given kind and children.
pub fn with_children(kind: NodeKind, children: Vec<SyntaxNode>) -> Self {
let mut len = 0;
let mut count = 1;
let mut descendants = 1;
let mut erroneous = kind.is_error();
for child in &children {
len += child.len();
count += child.count();
descendants += child.descendants();
erroneous |= child.erroneous();
}
Self {
data: NodeData::new(kind, len),
count,
descendants,
erroneous,
upper: 0,
children,
}
}
@ -238,8 +249,69 @@ impl InnerNode {
}
/// The number of descendants, including the node itself.
pub fn count(&self) -> usize {
self.count
pub fn descendants(&self) -> usize {
self.descendants
}
/// The node's children.
pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
self.children.iter()
}
/// Set a synthetic node id for the node and all its descendants.
pub fn synthesize(&mut self, span: Span) {
self.data.synthesize(span);
for child in &mut self.children {
child.synthesize(span);
}
}
/// Assign spans to this subtree or some a range of children.
pub fn numberize(
&mut self,
id: SourceId,
range: Option<Range<usize>>,
within: Range<u64>,
) -> NumberingResult {
let descendants = match &range {
Some(range) if range.is_empty() => return Ok(()),
Some(range) => self.children[range.clone()]
.iter()
.map(SyntaxNode::descendants)
.sum::<usize>(),
None => self.descendants,
};
let space = within.end - within.start;
let mut stride = space / (2 * descendants as u64);
if stride == 0 {
stride = space / self.descendants as u64;
if stride == 0 {
return Err(Unnumberable);
}
}
let mut start = within.start;
if range.is_none() {
let end = start + stride;
self.data.numberize(id, start .. end)?;
self.upper = within.end;
start = end;
}
let len = self.children.len();
for child in &mut self.children[range.unwrap_or(0 .. len)] {
let end = start + child.descendants() as u64 * stride;
child.numberize(id, start .. end)?;
start = end;
}
Ok(())
}
/// The maximum assigned number in this subtree.
pub fn upper(&self) -> u64 {
self.upper
}
/// If the span points into this node, convert it to a byte range.
@ -276,41 +348,19 @@ impl InnerNode {
None
}
/// The node's children.
pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> {
self.children.iter()
}
/// Assign spans to each node.
pub fn number(&mut self, id: SourceId, mut from: u64) {
self.data.number(id, from);
from += 1;
for child in &mut self.children {
child.number(id, from);
from += child.count() as u64;
}
}
/// Set a synthetic node id for the node and all its descendants.
pub fn synthesize(&mut self, span: Span) {
self.data.synthesize(span);
for child in &mut self.children {
child.synthesize(span);
}
}
/// The node's children, mutably.
pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] {
&mut self.children
}
/// Replaces a range of children with some replacement.
///
/// May have mutated the children if it returns `Err(_)`.
pub(crate) fn replace_children(
&mut self,
range: Range<usize>,
mut range: Range<usize>,
replacement: Vec<SyntaxNode>,
) {
) -> NumberingResult {
let superseded = &self.children[range.clone()];
// Compute the new byte length.
@ -318,10 +368,10 @@ impl InnerNode {
+ replacement.iter().map(SyntaxNode::len).sum::<usize>()
- superseded.iter().map(SyntaxNode::len).sum::<usize>();
// Compute the new descendant count.
self.count = self.count
+ replacement.iter().map(SyntaxNode::count).sum::<usize>()
- superseded.iter().map(SyntaxNode::count).sum::<usize>();
// Compute the new number of descendants.
self.descendants = self.descendants
+ replacement.iter().map(SyntaxNode::descendants).sum::<usize>()
- superseded.iter().map(SyntaxNode::descendants).sum::<usize>();
// Determine whether we're still erroneous after the replacement. That's
// the case if
@ -329,11 +379,55 @@ impl InnerNode {
// - or if we were erroneous before due to a non-superseded node.
self.erroneous = replacement.iter().any(SyntaxNode::erroneous)
|| (self.erroneous
&& (self.children[.. range.start].iter().any(SyntaxNode::erroneous)
|| self.children[range.end ..].iter().any(SyntaxNode::erroneous)));
&& (self.children[.. range.start].iter().any(SyntaxNode::erroneous))
|| self.children[range.end ..].iter().any(SyntaxNode::erroneous));
// Perform the replacement.
self.children.splice(range, replacement);
let replacement_count = replacement.len();
self.children.splice(range.clone(), replacement);
range.end = range.start + replacement_count;
// Renumber the new children. Retries until it works taking
// exponentially more children into account.
let max_left = range.start;
let max_right = self.children.len() - range.end;
let mut left = 0;
let mut right = 0;
loop {
let renumber = range.start - left .. range.end + right;
// The minimum assignable number is the upper bound of the node
// right before the to-be-renumbered children (or the number after
// this inner node's span if renumbering starts at the first child).
let start_number = renumber
.start
.checked_sub(1)
.and_then(|i| self.children.get(i))
.map_or(self.span().number() + 1, |child| child.upper());
// The upper bound of the is the span of the first child after the to-be-renumbered children
// or this node's upper bound.
let end_number = self
.children
.get(renumber.end)
.map_or(self.upper(), |next| next.span().number());
// Try to renumber within the number range.
let within = start_number .. end_number;
let id = self.span().source();
if self.numberize(id, Some(renumber), within).is_ok() {
return Ok(());
}
// Doesn't even work with all children, so we give up.
if left == max_left && right == max_right {
return Err(Unnumberable);
}
// Exponential expansion to both sides.
left = (left + 1).next_power_of_two().min(max_left);
right = (right + 1).next_power_of_two().min(max_right);
}
}
/// Update the length of this node given the old and new length of
@ -342,11 +436,11 @@ impl InnerNode {
&mut self,
prev_len: usize,
new_len: usize,
prev_count: usize,
new_count: usize,
prev_descendants: usize,
new_descendants: usize,
) {
self.data.len = self.data.len + new_len - prev_len;
self.count = self.count + new_count - prev_count;
self.descendants = self.descendants + new_descendants - prev_descendants;
self.erroneous = self.children.iter().any(SyntaxNode::erroneous);
}
}
@ -374,6 +468,15 @@ impl Debug for InnerNode {
}
}
impl PartialEq for InnerNode {
fn eq(&self, other: &Self) -> bool {
self.data == other.data
&& self.descendants == other.descendants
&& self.erroneous == other.erroneous
&& self.children == other.children
}
}
/// Data shared between inner and leaf nodes.
#[derive(Clone, Hash)]
pub struct NodeData {
@ -407,15 +510,20 @@ impl NodeData {
self.span
}
/// Assign spans to each node.
pub fn number(&mut self, id: SourceId, from: u64) {
self.span = Span::new(id, from);
}
/// Set a synthetic span for the node.
pub fn synthesize(&mut self, span: Span) {
self.span = span;
}
/// Assign a span to the node.
pub fn numberize(&mut self, id: SourceId, within: Range<u64>) -> NumberingResult {
if within.start < within.end {
self.span = Span::new(id, (within.start + within.end) / 2);
Ok(())
} else {
Err(Unnumberable)
}
}
}
impl From<NodeData> for SyntaxNode {

View File

@ -1,5 +1,6 @@
use std::fmt::{self, Debug, Formatter};
use std::fmt::{self, Debug, Display, Formatter};
use std::num::NonZeroU64;
use std::ops::Range;
use crate::syntax::SourceId;
@ -61,12 +62,15 @@ impl Span {
// Number of bits for and minimum and maximum numbers assigned to nodes.
const BITS: usize = 48;
const DETACHED: u64 = 1;
pub(crate) const MIN_NUMBER: u64 = 2;
pub(crate) const MAX_NUMBER: u64 = (1 << Self::BITS) - 1;
const MIN: u64 = 2;
const MAX: u64 = (1 << Self::BITS) - 1;
// The root numbering range.
pub const FULL: Range<u64> = Self::MIN .. Self::MAX + 1;
/// Create a new span from a source id and a unique number.
pub const fn new(id: SourceId, number: u64) -> Self {
assert!(number >= Self::MIN_NUMBER && number <= Self::MAX_NUMBER);
pub fn new(id: SourceId, number: u64) -> Self {
assert!(number >= Self::MIN && number <= Self::MAX, "{number}");
let bits = ((id.into_raw() as u64) << Self::BITS) | number;
Self(convert(bits))
}
@ -83,7 +87,7 @@ impl Span {
/// The unique number of the span within the source file.
pub const fn number(self) -> u64 {
self.0.get() & Self::MAX_NUMBER
self.0.get() & ((1 << Self::BITS) - 1)
}
}
@ -94,3 +98,18 @@ const fn convert(v: u64) -> NonZeroU64 {
None => unreachable!(),
}
}
/// Result of numbering a node within an interval.
pub type NumberingResult = Result<(), Unnumberable>;
/// Indicates that a node cannot be numbered within a given interval.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Unnumberable;
impl Display for Unnumberable {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.pad("cannot number within this interval")
}
}
impl std::error::Error for Unnumberable {}

View File

@ -295,11 +295,10 @@ fn test_part(
println!("Syntax Tree: {:#?}", source.root())
}
test_spans(source.root());
let (local_compare_ref, mut ref_errors) = parse_metadata(&source);
let compare_ref = local_compare_ref.unwrap_or(compare_ref);
ok &= test_spans(source.root());
ok &= test_reparse(ctx.sources.get(id).src(), i, rng);
let (mut frames, errors) = match typst::typeset(ctx, id) {
@ -461,21 +460,23 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
let incr_root = incr_source.root();
let ref_source = SourceFile::detached(edited_src);
let ref_root = ref_source.root();
let same = incr_root == ref_root;
if !same {
let mut ok = incr_root == ref_root;
if !ok {
println!(
" Subtest {i} reparse differs from clean parse when inserting '{with}' at {}-{} ❌\n",
replace.start, replace.end,
);
println!(" Expected reference tree:\n{ref_root:#?}\n");
println!(" Found incremental tree:\n{incr_root:#?}");
println!("Full source ({}):\n\"{edited_src:?}\"", edited_src.len());
println!(
" Full source ({}):\n\"{edited_src:?}\"",
edited_src.len()
);
}
test_spans(ref_root);
test_spans(incr_root);
same
ok &= test_spans(ref_root);
ok &= test_spans(incr_root);
ok
};
let mut pick = |range: Range<usize>| {
@ -506,18 +507,31 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
}
/// Ensure that all spans are properly ordered (and therefore unique).
fn test_spans(root: &SyntaxNode) {
test_spans_impl(root, 0 .. u64::MAX);
#[track_caller]
fn test_spans(root: &SyntaxNode) -> bool {
test_spans_impl(root, 0 .. u64::MAX)
}
fn test_spans_impl(root: &SyntaxNode, within: Range<u64>) {
assert!(within.contains(&root.span().number()), "wrong span order");
let start = root.span().number() + 1;
let mut children = root.children().peekable();
#[track_caller]
fn test_spans_impl(node: &SyntaxNode, within: Range<u64>) -> bool {
if !within.contains(&node.span().number()) {
eprintln!(" Node: {node:#?}");
eprintln!(
" Wrong span order: {} not in {within:?} ❌",
node.span().number(),
);
}
let start = node.span().number() + 1;
let mut children = node.children().peekable();
while let Some(child) = children.next() {
let end = children.peek().map_or(within.end, |next| next.span().number());
test_spans_impl(child, start .. end);
if !test_spans_impl(child, start .. end) {
return false;
}
}
true
}
/// Draw all frames into one image with padding in between.