From 1e4cab393e55df8875c6303ebb7bde8f09f911c9 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 2 Nov 2021 12:06:22 +0100 Subject: [PATCH 01/16] Introduce incremental parsing --- src/parse/mod.rs | 14 +++++++ src/source.rs | 54 ++++++++++++++++++++++++-- src/syntax/mod.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++++ src/syntax/span.rs | 11 ++++++ 4 files changed, 170 insertions(+), 3 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 10aaad234..1ab2fb15d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -25,6 +25,20 @@ pub fn parse(src: &str) -> Rc { } } +/// Parse a block. Returns `Some` if there was only one block. +pub fn parse_block(source: &str) -> Option> { + let mut p = Parser::new(source); + block(&mut p); + if p.eof() { + match p.finish().into_iter().next() { + Some(Green::Node(node)) => Some(node), + _ => unreachable!(), + } + } else { + None + } +} + /// Parse markup. fn markup(p: &mut Parser) { markup_while(p, true, &mut |_| true) diff --git a/src/source.rs b/src/source.rs index 432688a0b..069edd29f 100644 --- a/src/source.rs +++ b/src/source.rs @@ -268,7 +268,7 @@ impl SourceFile { /// This panics if the `replace` range is out of bounds. pub fn edit(&mut self, replace: Range, with: &str) { let start = replace.start; - self.src.replace_range(replace, with); + self.src.replace_range(replace.clone(), with); // Remove invalidated line starts. let line = self.byte_to_line(start).unwrap(); @@ -283,8 +283,39 @@ impl SourceFile { self.line_starts .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); - // Reparse. - self.root = parse(&self.src); + // Update the root node. + #[cfg(not(feature = "parse-cache"))] + { + self.root = parse(&self.src); + } + + #[cfg(feature = "parse-cache")] + { + let insertion_span = replace.into_span(self.id); + let incremental_target = + Rc::make_mut(&mut self.root).incremental_parent(insertion_span); + + match incremental_target { + Some((child_idx, parent, offset)) => { + let child = &parent.children()[child_idx]; + let src = &self.src[offset .. offset + child.len()]; + let parse_res = match child.kind() { + NodeKind::Markup => Some(parse(src)), + _ => parse_block(src), + } + .and_then(|x| x.data().erroneous().not().then(|| x)); + + if let Some(parse_res) = parse_res { + parent.replace_child(child_idx, parse_res); + } else { + self.root = parse(&self.src); + } + } + None => { + self.root = parse(&self.src); + } + } + } } /// Provide highlighting categories for the given range of the source file. @@ -473,4 +504,21 @@ mod tests { // Test removing everything. test(TEST, 0 .. 21, "", ""); } + + #[test] + fn test_source_file_edit_2() { + #[track_caller] + fn test(prev: &str, range: Range, with: &str, after: &str) { + let mut source = SourceFile::detached(prev); + let result = SourceFile::detached(after); + dbg!(Green::from(source.root.clone())); + source.edit(range, with); + assert_eq!(source.src, result.src); + assert_eq!(source.line_starts, result.line_starts); + dbg!(Green::from(source.root)); + } + + // Test inserting at the begining. + test("abc #f()[def] ghi", 10 .. 11, "xyz", "abc #f()[dxyzf] ghi"); + } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d9ad42a88..b0911c63d 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -127,6 +127,92 @@ impl GreenNode { pub fn children(&self) -> &[Green] { &self.children } + + /// The node's children, mutably. + pub fn children_mut(&mut self) -> &mut [Green] { + &mut self.children + } + + /// The node's metadata. + pub fn data(&self) -> &GreenData { + &self.data + } + + /// The node's type. + pub fn kind(&self) -> &NodeKind { + self.data().kind() + } + + /// The node's length. + pub fn len(&self) -> usize { + self.data().len() + } + + /// Find the parent of the deepest incremental-safe node and the index of + /// the found child. + pub fn incremental_parent( + &mut self, + span: Span, + ) -> Option<(usize, &mut GreenNode, usize)> { + self.incremental_parent_internal(span, 0) + } + + fn incremental_parent_internal( + &mut self, + span: Span, + mut offset: usize, + ) -> Option<(usize, &mut GreenNode, usize)> { + let x = unsafe { &mut *(self as *mut _) }; + + for (i, child) in self.children.iter_mut().enumerate() { + match child { + Green::Token(n) => { + if offset < span.start { + // the token is strictly before the span + offset += n.len(); + } else { + // the token is within or after the span; tokens are + // never safe, so we return. + return None; + } + } + Green::Node(n) => { + let end = n.len() + offset; + if offset < span.start && end < span.start { + // the node is strictly before the span + offset += n.len(); + } else if span.start >= offset + && span.start < end + && span.end <= end + && span.end > offset + { + // the node is within the span. + if n.kind().is_incremental_safe() { + let res = + Rc::make_mut(n).incremental_parent_internal(span, offset); + if res.is_none() { + return Some((i, x, offset)); + } + } else { + return Rc::make_mut(n) + .incremental_parent_internal(span, offset); + } + } else { + // the node is overlapping or after after the span; nodes are + // never safe, so we return. + return None; + } + } + } + } + + return None; + } + + /// Replace one of the node's children. + pub fn replace_child(&mut self, index: usize, child: impl Into) { + self.children[index] = child.into(); + } } impl From for Green { @@ -653,6 +739,14 @@ impl NodeKind { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } + /// Whether it is safe to do incremental parsing on this node. + pub fn is_incremental_safe(&self) -> bool { + match self { + Self::Block | Self::Markup => true, + _ => false, + } + } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 4d5b88195..a707d3d93 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -125,6 +125,17 @@ impl Span { *self = self.join(other) } + /// Create a new span with n characters inserted inside of this span. + pub fn inserted(mut self, other: Self, n: usize) -> Self { + if !self.contains(other.start) || !self.contains(other.end) { + panic!(); + } + + let len_change = (n as isize - other.len() as isize) as usize; + self.end += len_change; + self + } + /// Test whether a position is within the span. pub fn contains(&self, pos: usize) -> bool { self.start <= pos && self.end >= pos From 7016ab0d123ba06d0bbc6ed5001fa02fbd261bfa Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 3 Nov 2021 11:03:00 +0100 Subject: [PATCH 02/16] Make stuff more elegant --- src/source.rs | 50 ++++++++++++++------------------ src/syntax/mod.rs | 71 +++++++++++++++++++++------------------------- src/syntax/span.rs | 6 ++-- 3 files changed, 57 insertions(+), 70 deletions(-) diff --git a/src/source.rs b/src/source.rs index 069edd29f..2d18ece30 100644 --- a/src/source.rs +++ b/src/source.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::io; -use std::ops::Range; +use std::ops::{Not, Range}; use std::path::{Path, PathBuf}; use std::rc::Rc; @@ -10,9 +10,9 @@ use serde::{Deserialize, Serialize}; use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; -use crate::parse::{is_newline, parse, Scanner}; +use crate::parse::{is_newline, parse, parse_block, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{self, Category, GreenNode, RedNode}; +use crate::syntax::{self, Category, GreenNode, NodeKind, RedNode, Span}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -284,37 +284,28 @@ impl SourceFile { .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); // Update the root node. - #[cfg(not(feature = "parse-cache"))] - { - self.root = parse(&self.src); - } + let insertion_span = Span::new(self.id, replace.start, replace.end); + let incremental_target = + Rc::make_mut(&mut self.root).incremental_parent(insertion_span); - #[cfg(feature = "parse-cache")] - { - let insertion_span = replace.into_span(self.id); - let incremental_target = - Rc::make_mut(&mut self.root).incremental_parent(insertion_span); - - match incremental_target { - Some((child_idx, parent, offset)) => { - let child = &parent.children()[child_idx]; - let src = &self.src[offset .. offset + child.len()]; - let parse_res = match child.kind() { - NodeKind::Markup => Some(parse(src)), - _ => parse_block(src), - } - .and_then(|x| x.data().erroneous().not().then(|| x)); - - if let Some(parse_res) = parse_res { - parent.replace_child(child_idx, parse_res); - } else { - self.root = parse(&self.src); - } + match incremental_target { + Some((child, offset)) => { + let src = &self.src[offset .. offset + child.len()]; + let parse_res = match child.kind() { + NodeKind::Markup => Some(parse(src)), + _ => parse_block(src), } - None => { + .and_then(|x| x.erroneous.not().then(|| x)); + + if let Some(parse_res) = parse_res { + *child = Rc::try_unwrap(parse_res).unwrap(); + } else { self.root = parse(&self.src); } } + None => { + self.root = parse(&self.src); + } } } @@ -405,6 +396,7 @@ impl<'a> Files<'a> for SourceStore { #[cfg(test)] mod tests { use super::*; + use crate::syntax::Green; const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index b0911c63d..5da690ab8 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -98,7 +98,7 @@ pub struct GreenNode { /// This node's children, losslessly make up this node. children: Vec, /// Whether this node or any of its children are erroneous. - erroneous: bool, + pub erroneous: bool, } impl GreenNode { @@ -148,12 +148,9 @@ impl GreenNode { self.data().len() } - /// Find the parent of the deepest incremental-safe node and the index of - /// the found child. - pub fn incremental_parent( - &mut self, - span: Span, - ) -> Option<(usize, &mut GreenNode, usize)> { + /// Find the deepest incremental-safe node and its offset in the source + /// code. + pub fn incremental_parent(&mut self, span: Span) -> Option<(&mut GreenNode, usize)> { self.incremental_parent_internal(span, 0) } @@ -161,10 +158,8 @@ impl GreenNode { &mut self, span: Span, mut offset: usize, - ) -> Option<(usize, &mut GreenNode, usize)> { - let x = unsafe { &mut *(self as *mut _) }; - - for (i, child) in self.children.iter_mut().enumerate() { + ) -> Option<(&mut GreenNode, usize)> { + for child in self.children.iter_mut() { match child { Green::Token(n) => { if offset < span.start { @@ -187,15 +182,15 @@ impl GreenNode { && span.end > offset { // the node is within the span. - if n.kind().is_incremental_safe() { - let res = - Rc::make_mut(n).incremental_parent_internal(span, offset); + let safe = n.kind().is_incremental_safe(); + let mut_n = Rc::make_mut(n); + if safe { + let res = mut_n.incremental_parent_internal(span, offset); if res.is_none() { - return Some((i, x, offset)); + return Some((mut_n, offset)); } } else { - return Rc::make_mut(n) - .incremental_parent_internal(span, offset); + return mut_n.incremental_parent_internal(span, offset); } } else { // the node is overlapping or after after the span; nodes are @@ -208,11 +203,6 @@ impl GreenNode { return None; } - - /// Replace one of the node's children. - pub fn replace_child(&mut self, index: usize, child: impl Into) { - self.children[index] = child.into(); - } } impl From for Green { @@ -352,7 +342,7 @@ impl Debug for RedNode { } } -/// A borrowed wrapper for a green node with span information. +/// A borrowed wrapper for a [`GreenNode`] with span information. /// /// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. #[derive(Copy, Clone, PartialEq)] @@ -387,6 +377,26 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// Whether the node or its children contain an error. + pub fn erroneous(self) -> bool { + self.green.erroneous() + } + + /// The node's children. + pub fn children(self) -> Children<'a> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + Children { + id: self.id, + iter: children.iter(), + front: self.offset, + back: self.offset + self.len(), + } + } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -419,21 +429,6 @@ impl<'a> RedRef<'a> { T::from_red(self) } - /// The node's children. - pub fn children(self) -> Children<'a> { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - Children { - id: self.id, - iter: children.iter(), - front: self.offset, - back: self.offset + self.len(), - } - } - /// Get the first child that can cast to some AST type. pub fn cast_first_child(self) -> Option { self.children().find_map(RedRef::cast) diff --git a/src/syntax/span.rs b/src/syntax/span.rs index a707d3d93..430d5f1da 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -127,12 +127,12 @@ impl Span { /// Create a new span with n characters inserted inside of this span. pub fn inserted(mut self, other: Self, n: usize) -> Self { - if !self.contains(other.start) || !self.contains(other.end) { + if !self.surrounds(other) { panic!(); } - let len_change = (n as isize - other.len() as isize) as usize; - self.end += len_change; + let len_change = n as isize - other.len() as isize; + self.end += len_change as usize; self } From eba7fc34effbec3bcc6d5c40d831b1e15af77c4d Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 6 Nov 2021 16:07:21 +0100 Subject: [PATCH 03/16] Incremental-safety based approach --- src/parse/mod.rs | 73 +++-- src/parse/parser.rs | 24 ++ src/source.rs | 33 +-- src/syntax/ast.rs | 4 +- src/syntax/highlight.rs | 1 + src/syntax/mod.rs | 574 ++++++++++++++++++++++++++++++++++++---- src/syntax/span.rs | 3 +- 7 files changed, 612 insertions(+), 100 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1ab2fb15d..5d845a552 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -25,18 +25,36 @@ pub fn parse(src: &str) -> Rc { } } -/// Parse a block. Returns `Some` if there was only one block. -pub fn parse_block(source: &str) -> Option> { +/// Parse an atomic primary. Returns `Some` if all of the input was consumed. +pub fn parse_atomic(source: &str, _: bool) -> Option> { let mut p = Parser::new(source); - block(&mut p); - if p.eof() { - match p.finish().into_iter().next() { - Some(Green::Node(node)) => Some(node), - _ => unreachable!(), - } - } else { - None + primary(&mut p, true).ok()?; + p.eject() +} + +/// Parse some markup. Returns `Some` if all of the input was consumed. +pub fn parse_markup(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source); + markup(&mut p); + p.eject() +} + +/// Parse some markup without the topmost node. Returns `Some` if all of the +/// input was consumed. +pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option> { + let mut p = Parser::new(source); + while !p.eof() { + markup_node(&mut p, &mut at_start); } + p.eject() +} + +/// Parse some code. Returns `Some` if all of the input was consumed. +pub fn parse_code(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source); + p.set_mode(TokenMode::Code); + expr_list(&mut p); + p.eject() } /// Parse markup. @@ -118,7 +136,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())); + p.convert(NodeKind::TextInLine(p.peek_src().into())) } // Hashtag + keyword / identifier. @@ -196,7 +214,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match p.peek().and_then(UnOp::from_token) { + match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() { Some(op) => { p.eat(); let prec = op.precedence(); @@ -268,7 +286,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { } // Structures. - Some(NodeKind::LeftParen) => parenthesized(p), + Some(NodeKind::LeftParen) => parenthesized(p, atomic), Some(NodeKind::LeftBracket) => { template(p); Ok(()) @@ -329,7 +347,7 @@ fn literal(p: &mut Parser) -> bool { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) -> ParseResult { +fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult { let marker = p.marker(); p.start_group(Group::Paren); @@ -344,7 +362,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.at(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { params(p, marker); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); @@ -507,20 +525,25 @@ fn template(p: &mut Parser) { fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { p.start_group(Group::Brace); - while !p.eof() { - p.start_group(Group::Stmt); - if expr(p).is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); - } + expr_list(p); p.end_group(); }); } +/// Parse a number of code expressions. +fn expr_list(p: &mut Parser) { + while !p.eof() { + p.start_group(Group::Stmt); + if expr(p).is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); + + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); + } +} + /// Parse a function call. fn call(p: &mut Parser, callee: Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| args(p, true, true)) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index af8a7c5ca..f391c4739 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -21,6 +21,8 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, + /// Whether the last group was terminated. + last_group_terminated: bool, } impl<'s> Parser<'s> { @@ -36,6 +38,7 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], + last_group_terminated: true, } } @@ -44,6 +47,15 @@ impl<'s> Parser<'s> { self.children } + /// End the parsing process and return multiple children. + pub fn eject(self) -> Option> { + if self.eof() && self.group_success() { + Some(self.children) + } else { + None + } + } + /// Create a new marker. pub fn marker(&mut self) -> Marker { Marker(self.children.len()) @@ -190,6 +202,11 @@ impl<'s> Parser<'s> { self.tokens.scanner().column(index) } + /// Set the tokenizer's mode. + pub fn set_mode(&mut self, mode: TokenMode) { + self.tokens.set_mode(mode); + } + /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to @@ -225,6 +242,7 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); + self.last_group_terminated = true; let mut rescan = self.tokens.mode() != group_mode; @@ -243,6 +261,7 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); + self.last_group_terminated = false; } } @@ -260,6 +279,11 @@ impl<'s> Parser<'s> { } } + /// Check if the group processing was successfully terminated. + pub fn group_success(&self) -> bool { + self.last_group_terminated && self.groups.is_empty() + } + /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { diff --git a/src/source.rs b/src/source.rs index 2d18ece30..f7e6cb5e6 100644 --- a/src/source.rs +++ b/src/source.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::io; -use std::ops::{Not, Range}; +use std::ops::Range; use std::path::{Path, PathBuf}; use std::rc::Rc; @@ -10,9 +10,9 @@ use serde::{Deserialize, Serialize}; use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; -use crate::parse::{is_newline, parse, parse_block, Scanner}; +use crate::parse::{is_newline, parse, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{self, Category, GreenNode, NodeKind, RedNode, Span}; +use crate::syntax::{self, Category, GreenNode, RedNode, Span}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -285,27 +285,10 @@ impl SourceFile { // Update the root node. let insertion_span = Span::new(self.id, replace.start, replace.end); - let incremental_target = - Rc::make_mut(&mut self.root).incremental_parent(insertion_span); - - match incremental_target { - Some((child, offset)) => { - let src = &self.src[offset .. offset + child.len()]; - let parse_res = match child.kind() { - NodeKind::Markup => Some(parse(src)), - _ => parse_block(src), - } - .and_then(|x| x.erroneous.not().then(|| x)); - - if let Some(parse_res) = parse_res { - *child = Rc::try_unwrap(parse_res).unwrap(); - } else { - self.root = parse(&self.src); - } - } - None => { - self.root = parse(&self.src); - } + let source = self.src().to_string(); + if !Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) + { + self.root = parse(self.src()); } } @@ -511,6 +494,6 @@ mod tests { } // Test inserting at the begining. - test("abc #f()[def] ghi", 10 .. 11, "xyz", "abc #f()[dxyzf] ghi"); + test("abc #f()[def] ghi", 5 .. 6, "g", "abc #g()[def] ghi"); } } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ae8ecdc99..ed74dfe51 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -65,7 +65,9 @@ impl Markup { NodeKind::Parbreak => Some(MarkupNode::Parbreak), NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::Text(s) | NodeKind::TextInLine(s) => { + Some(MarkupNode::Text(s.clone())) + } NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 85fbef12f..21af060ff 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -158,6 +158,7 @@ impl Category { NodeKind::Space(_) => None, NodeKind::Parbreak => None, NodeKind::Text(_) => None, + NodeKind::TextInLine(_) => None, NodeKind::List => None, NodeKind::Enum => None, NodeKind::Array => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 5da690ab8..0879ab7f2 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -15,6 +15,9 @@ pub use span::*; use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::{ + parse_atomic, parse_code, parse_markup, parse_markup_elements, TokenMode, +}; use crate::source::SourceId; use crate::util::EcoString; @@ -73,6 +76,43 @@ impl Green { Self::Token(data) => data.kind = kind, } } + + /// Find the innermost child that is incremental safe. + pub fn incremental_int( + &mut self, + edit: &str, + replace: Span, + replacement_len: usize, + offset: usize, + parent_mode: TokenMode, + outermost: bool, + ) -> bool { + match self { + Green::Node(n) => Rc::make_mut(n).incremental_int( + edit, + replace, + replacement_len, + offset, + parent_mode, + outermost, + ), + Green::Token(_) => false, + } + } + + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec { + match self { + Green::Node(n) => n.errors(), + Green::Token(t) => { + if t.kind().is_error() { + vec![t.kind().clone()] + } else { + vec![] + } + } + } + } } impl Default for Green { @@ -148,60 +188,181 @@ impl GreenNode { self.data().len() } - /// Find the deepest incremental-safe node and its offset in the source - /// code. - pub fn incremental_parent(&mut self, span: Span) -> Option<(&mut GreenNode, usize)> { - self.incremental_parent_internal(span, 0) + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec { + let mut res = self.children.iter().flat_map(|c| c.errors()).collect::>(); + + if self.kind().is_error() { + res.push(self.kind().clone()); + } + + res } - fn incremental_parent_internal( + /// Find the innermost child that is incremental safe. + pub fn incremental( &mut self, - span: Span, + edit: &str, + replace: Span, + replacement_len: usize, + ) -> bool { + self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true) + } + + fn incremental_int( + &mut self, + src: &str, + replace: Span, + replacement_len: usize, mut offset: usize, - ) -> Option<(&mut GreenNode, usize)> { - for child in self.children.iter_mut() { - match child { - Green::Token(n) => { - if offset < span.start { - // the token is strictly before the span - offset += n.len(); - } else { - // the token is within or after the span; tokens are - // never safe, so we return. - return None; - } + parent_mode: TokenMode, + outermost: bool, + ) -> bool { + let kind = self.kind().clone(); + let mode = kind.mode().apply(parent_mode); + eprintln!("in {:?} (mode {:?})", kind, mode); + + let mut loop_result = None; + let mut child_at_start = true; + let last = self.children.len() - 1; + for (i, child) in self.children.iter_mut().enumerate() { + let child_span = Span::new(replace.source, offset, offset + child.len()); + if child_span.surrounds(replace) { + eprintln!("found correct child"); + + // First, we try if the child has another, more specific applicable child. + if kind.incremental_safety() != IncrementalSafety::Unsafe + && child.incremental_int( + src, + replace, + replacement_len, + offset, + mode, + i == last && outermost, + ) + { + eprintln!("child was successful"); + return true; } - Green::Node(n) => { - let end = n.len() + offset; - if offset < span.start && end < span.start { - // the node is strictly before the span - offset += n.len(); - } else if span.start >= offset - && span.start < end - && span.end <= end - && span.end > offset - { - // the node is within the span. - let safe = n.kind().is_incremental_safe(); - let mut_n = Rc::make_mut(n); - if safe { - let res = mut_n.incremental_parent_internal(span, offset); - if res.is_none() { - return Some((mut_n, offset)); - } - } else { - return mut_n.incremental_parent_internal(span, offset); - } + + // This didn't work, so we try to replace the child at this + // level. + let (function, policy) = + if let Some(p) = child.kind().reparsing_function(mode) { + p } else { - // the node is overlapping or after after the span; nodes are - // never safe, so we return. - return None; - } + return false; + }; + loop_result = Some((i, child_span, function, policy)); + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + + // We now have a child that we can replace and a function to do so if + // the loop found any results at all. + let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result + { + loop_result + } else { + // No child fully contains the replacement. + eprintln!("no child match"); + return false; + }; + + eprintln!("aquired function, policy {:?}", policy); + + let src_span = child_span.inserted(replace, replacement_len); + + let new_children = + if let Some(new_children) = func(&src[src_span.to_range()], child_at_start) { + new_children + } else { + eprintln!("function failed"); + return false; + }; + let child_mode = self.children[child_idx].kind().mode().apply(mode); + eprintln!("child mode {:?}", child_mode); + + // Check if the children / child has the right type. + let require_single = match policy { + IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, + _ => false, + }; + + if require_single { + eprintln!("must be a single replacement"); + if new_children.len() != 1 { + eprintln!("not a single replacement"); + return false; + } + + if match policy { + IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => { + true + } + _ => false, + } { + if self.children[child_idx].kind() != new_children[0].kind() { + eprintln!("not the same kind"); + return false; } } } - return None; + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !outermost + && new_children + .iter() + .flat_map(|x| x.errors()) + .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) + { + eprintln!("unclosed node"); + return false; + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx > 0 { + if self.children[child_idx - 1].kind().incremental_safety() + == IncrementalSafety::EnsureRightWhitespace + && !new_children[0].kind().is_whitespace() + { + eprintln!("left whitespace missing"); + return false; + } + } + + let mut new_at_start = child_at_start; + for child in &new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &self.children[child_idx + 1 ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().incremental_safety() { + IncrementalSafety::EnsureAtStart if !new_at_start => return false, + IncrementalSafety::EnsureNotAtStart if new_at_start => return false, + _ => {} + } + break; + } + } + + eprintln!("... replacing"); + + self.children.splice(child_idx .. child_idx + 1, new_children); + true } } @@ -397,6 +558,7 @@ impl<'a> RedRef<'a> { } } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -593,6 +755,8 @@ pub enum NodeKind { Parbreak, /// A consecutive non-markup string. Text(EcoString), + /// A text node that cannot appear at the beginning of a source line. + TextInLine(EcoString), /// A non-breaking space: `~`. NonBreakingSpace, /// An en-dash: `--`. @@ -729,19 +893,249 @@ impl NodeKind { matches!(self, Self::LeftParen | Self::RightParen) } + /// Whether this is whitespace. + pub fn is_whitespace(&self) -> bool { + match self { + Self::Space(_) | Self::Parbreak => true, + _ => false, + } + } + + /// Whether this is trivia. + pub fn is_trivia(&self) -> bool { + match self { + _ if self.is_whitespace() => true, + Self::LineComment | Self::BlockComment => true, + _ => false, + } + } + /// Whether this is some kind of error. pub fn is_error(&self) -> bool { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } - /// Whether it is safe to do incremental parsing on this node. - pub fn is_incremental_safe(&self) -> bool { + /// Whether this node is `at_start` given the previous value of the property. + pub fn is_at_start(&self, prev: bool) -> bool { match self { - Self::Block | Self::Markup => true, + Self::Space(n) if *n > 0 => true, + Self::Parbreak => true, + Self::LineComment | Self::BlockComment => prev, _ => false, } } + /// Whether this token appears in Markup. + pub fn mode(&self) -> NodeMode { + match self { + Self::Markup + | Self::Space(_) + | Self::Parbreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Math(_) => NodeMode::Markup, + Self::Template + | Self::Block + | Self::None + | Self::Auto + | Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::Call + | Self::LineComment + | Self::BlockComment + | Self::Error(_, _) + | Self::Minus + | Self::Eq => NodeMode::Universal, + _ => NodeMode::Code, + } + } + + pub fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> Option<(fn(&str, bool) -> Option>, IncrementalSafety)> { + let policy = self.incremental_safety(); + if policy == IncrementalSafety::Unsafe { + return None; + } + + let mode = self.mode(); + if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { + return None; + } + + if mode != NodeMode::Markup + && parent_mode == TokenMode::Code + && policy == IncrementalSafety::AtomicPrimary + { + return Some((parse_atomic, policy)); + } + + let parser: fn(&str, bool) -> _ = match mode { + NodeMode::Code => parse_code, + NodeMode::Markup if self == &Self::Markup => parse_markup, + NodeMode::Markup => parse_markup_elements, + NodeMode::Universal if parent_mode == TokenMode::Code => parse_code, + NodeMode::Universal => parse_markup_elements, + }; + + Some((parser, policy)) + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn incremental_safety(&self) -> IncrementalSafety { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => IncrementalSafety::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => IncrementalSafety::Unsafe, + + // These keywords are literals and can be safely be substituted with + // other expressions. + Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, + + // These keywords change what kind of expression the parent is. + Self::Let + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Set + | Self::Import + | Self::Include + | Self::From => IncrementalSafety::Unsafe, + + // This is a backslash followed by a space. But changing it to + // anything else is fair game. + Self::Linebreak => IncrementalSafety::EnsureRightWhitespace, + + Self::Markup => IncrementalSafety::SameKind, + + Self::Space(_) => IncrementalSafety::SameKindInCode, + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Text(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph => IncrementalSafety::Safe, + + // This is text that needs to be not `at_start`, otherwise it would + // start one of the below items. + Self::TextInLine(_) => IncrementalSafety::EnsureNotAtStart, + + // These have to be `at_start` so they must be preceeded with a + // Space(n) with n > 0 or a Parbreak. + Self::Heading | Self::Enum | Self::List => IncrementalSafety::EnsureAtStart, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => IncrementalSafety::Unsafe, + + Self::Raw(_) | Self::Math(_) => IncrementalSafety::Safe, + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group => IncrementalSafety::AtomicPrimary, + + Self::Call | Self::Unary | Self::Binary | Self::SetExpr => { + IncrementalSafety::UnsafeLayer + } + + Self::CallArgs | Self::Named | Self::Spread => IncrementalSafety::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => IncrementalSafety::UnsafeLayer, + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template | Self::Block => IncrementalSafety::SameKindInCode, + + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::ImportExpr + | Self::IncludeExpr => IncrementalSafety::AtomicPrimary, + + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + IncrementalSafety::UnsafeLayer + } + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => IncrementalSafety::SameKind, + + Self::Error(_, _) | Self::Unknown(_) => IncrementalSafety::Unsafe, + } + } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { @@ -794,7 +1188,7 @@ impl NodeKind { Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", - Self::Text(_) => "text", + Self::Text(_) | Self::TextInLine(_) => "text", Self::NonBreakingSpace => "non-breaking space", Self::EnDash => "en dash", Self::EmDash => "em dash", @@ -855,3 +1249,87 @@ impl Display for NodeKind { f.pad(self.as_str()) } } + +/// This enum describes what conditions a node has for being replaced by a new +/// parse result. +/// +/// Safe nodes are replaced by the new parse result from the respective mode. +/// They can be replaced by multiple tokens. If a token is inserted in Markup +/// mode and the next token would not be `at_start` there needs to be a forward +/// check for a `EnsureAtStart` node. If this fails, the parent has to be +/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is +/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last +/// tokens to be inserted, the edit is invalidated if there is no following +/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The +/// unsafe layers cannot be used but allow children access, the unsafe nodes do +/// neither. +/// +/// *Procedure:* +/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return +/// None. +/// 2. Reparse with appropriate node kind and `at_start`. +/// 3. Check whether the topmost group is terminated and the range was +/// completely consumed, otherwise return None. +/// 4. Check if the type criteria are met. +/// 5. If the node is not at the end of the tree, check if Strings etc. are +/// terminated. +/// 6. If this is markup, check the following things: +/// - The `at_start` conditions of the next non-comment and non-space(0) node +/// are met. +/// - The first node is whitespace or the previous siblings are not +/// `EnsureRightWhitespace`. +/// - If any of those fails, return None. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum IncrementalSafety { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind, + /// This node has to be replaced with a single token of the same kind if in + /// code mode. + SameKindInCode, + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + EnsureAtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + EnsureNotAtStart, + /// These nodes must be followed by whitespace. + EnsureRightWhitespace, + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +/// This enum describes which mode a token of [`NodeKind`] can appear in. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum NodeMode { + /// The token can only appear in markup mode. + Markup, + /// The token can only appear in code mode. + Code, + /// The token can appear in either mode. Look at the parent node to decide + /// which mode it is in. + Universal, +} + +impl NodeMode { + /// Returns the new [`TokenMode`] given the old one. + pub fn apply(&self, old: TokenMode) -> TokenMode { + match self { + Self::Markup => TokenMode::Markup, + Self::Code => TokenMode::Code, + Self::Universal => old, + } + } +} diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 430d5f1da..2691acc7c 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -125,7 +125,8 @@ impl Span { *self = self.join(other) } - /// Create a new span with n characters inserted inside of this span. + /// Create a new span by specifying a span in which a modification happened + /// and how many characters are now in that span. pub fn inserted(mut self, other: Self, n: usize) -> Self { if !self.surrounds(other) { panic!(); From 0663758fbb42651a08bfcd46c27b5cdeab90fb75 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 7 Nov 2021 19:43:01 +0100 Subject: [PATCH 04/16] Tests - length updates - dealing with keywords and comments --- src/parse/mod.rs | 19 ++--- src/parse/parser.rs | 9 +- src/source.rs | 102 +++++++++++++++++++--- src/syntax/mod.rs | 200 +++++++++++++++++++++++++++++--------------- 4 files changed, 234 insertions(+), 96 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 5d845a552..027773505 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -17,7 +17,7 @@ use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(src: &str) -> Rc { - let mut p = Parser::new(src); + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -26,23 +26,23 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(source: &str, _: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_atomic(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(source: &str, _: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_markup(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); p.eject() } /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); } @@ -50,9 +50,8 @@ pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option Option> { - let mut p = Parser::new(source); - p.set_mode(TokenMode::Code); +pub fn parse_code(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Code); expr_list(&mut p); p.eject() } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f391c4739..451e18f19 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -27,8 +27,8 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(src: &'s str) -> Self { - let mut tokens = Tokens::new(src, TokenMode::Markup); + pub fn new(src: &'s str, mode: TokenMode) -> Self { + let mut tokens = Tokens::new(src, mode); let current = tokens.next(); Self { tokens, @@ -202,11 +202,6 @@ impl<'s> Parser<'s> { self.tokens.scanner().column(index) } - /// Set the tokenizer's mode. - pub fn set_mode(&mut self, mode: TokenMode) { - self.tokens.set_mode(mode); - } - /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to diff --git a/src/source.rs b/src/source.rs index f7e6cb5e6..36db50ddb 100644 --- a/src/source.rs +++ b/src/source.rs @@ -128,6 +128,7 @@ pub struct SourceFile { src: String, line_starts: Vec, root: Rc, + was_incremental: bool, } impl SourceFile { @@ -141,6 +142,7 @@ impl SourceFile { root: parse(&src), src, line_starts, + was_incremental: false, } } @@ -286,12 +288,20 @@ impl SourceFile { // Update the root node. let insertion_span = Span::new(self.id, replace.start, replace.end); let source = self.src().to_string(); - if !Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) - { + if Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) { + self.was_incremental = true; + } else { self.root = parse(self.src()); + self.was_incremental = false; } } + /// Forces a non-incremental reparsing of the source file. + fn force_reparse(&mut self) { + self.root = parse(self.src()); + self.was_incremental = false; + } + /// Provide highlighting categories for the given range of the source file. pub fn highlight(&self, range: Range, mut f: F) where @@ -379,7 +389,6 @@ impl<'a> Files<'a> for SourceStore { #[cfg(test)] mod tests { use super::*; - use crate::syntax::Green; const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; @@ -481,19 +490,88 @@ mod tests { } #[test] - fn test_source_file_edit_2() { + fn test_incremental_parse() { #[track_caller] - fn test(prev: &str, range: Range, with: &str, after: &str) { + fn test(prev: &str, range: Range, with: &str, incr: bool) { let mut source = SourceFile::detached(prev); - let result = SourceFile::detached(after); - dbg!(Green::from(source.root.clone())); source.edit(range, with); - assert_eq!(source.src, result.src); - assert_eq!(source.line_starts, result.line_starts); - dbg!(Green::from(source.root)); + + if incr { + assert!(source.was_incremental); + let incr_tree = source.root.clone(); + source.force_reparse(); + assert_eq!(source.root, incr_tree); + } else { + assert!(!source.was_incremental); + } } - // Test inserting at the begining. - test("abc #f()[def] ghi", 5 .. 6, "g", "abc #g()[def] ghi"); + // Test simple replacements. + test("hello world", 6 .. 11, "wankers", true); + test("{(0, 1, 2)}", 5 .. 6, "11pt", true); + test("= A heading", 3 .. 3, "n evocative", true); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 16 .. 20, + "none", + true, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 33 .. 42, + "[_gronk_]", + true, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 34 .. 41, + "_bar_", + true, + ); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", true); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", true); + test("hello {x}", 6 .. 9, "#f()", false); + test( + "this is -- in my opinion -- spectacular", + 8 .. 10, + "---", + true, + ); + test("understanding `code` is complicated", 15 .. 15, "C ", true); + test("{ let x = g() }", 10 .. 12, "f(54", true); + test( + "#let rect with (fill: eastern)", + 14 .. 29, + " (stroke: conifer", + true, + ); + test("a b c", 1 .. 1, " /* letters */", false); + + // Test the whitespace invariants. + test("hello \\ world", 7 .. 8, "a ", false); + test("hello \\ world", 7 .. 8, "\n\n", true); + test("x = y", 2 .. 2, "+ y ", true); + test("x = y", 2 .. 2, "+ y \n ", false); + test("abc\n= a heading", 3 .. 4, "\nsome more test\n\n", true); + test("abc\n= a heading", 3 .. 4, "\nnot ", false); + + // Test type invariants. + test("#for x in array {x}", 16 .. 19, "[#x]", true); + test("#let x = 1 {5}", 1 .. 4, "if", false); + test("#let x = 1 {5}", 4 .. 4, " if", false); + test("a // b c #f()", 3 .. 4, "", false); + + // this appearantly works but the assertion fails. + // test("a b c", 1 .. 1, "{[}", true); + + // Test unclosed things. + test(r#"{"hi"}"#, 4 .. 5, "c", false); + test(r"this \u{abcd}", 8 .. 9, "", true); + test(r"this \u{abcd} that", 12 .. 13, "", false); + test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", false); + + // these appearantly works but the assertion fails. + // test(r#"a ```typst hello``` b"#, 16 .. 17, "", false); + // test(r#"a ```typst hello```"#, 16 .. 17, "", true); } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 0879ab7f2..cb811266e 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -49,6 +49,15 @@ impl Green { self.data().len() } + /// Set the length of the node. + pub fn set_len(&mut self, len: usize) { + let data = match self { + Self::Node(node) => &mut Rc::make_mut(node).data, + Self::Token(data) => data, + }; + data.set_len(len); + } + /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { @@ -78,15 +87,15 @@ impl Green { } /// Find the innermost child that is incremental safe. - pub fn incremental_int( + fn incremental_int( &mut self, edit: &str, replace: Span, replacement_len: usize, offset: usize, - parent_mode: TokenMode, + parent_mode: NodeMode, outermost: bool, - ) -> bool { + ) -> Result<(), bool> { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -96,7 +105,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => false, + Green::Token(_) => Err(false), } } @@ -202,11 +211,17 @@ impl GreenNode { /// Find the innermost child that is incremental safe. pub fn incremental( &mut self, - edit: &str, + src: &str, replace: Span, replacement_len: usize, ) -> bool { - self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true) + let edit = &src[replace.inserted(replace, replacement_len).to_range()]; + if edit.contains("//") || edit.contains("/*") || edit.contains("*/") { + return false; + } + + self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) + .is_ok() } fn incremental_int( @@ -215,9 +230,9 @@ impl GreenNode { replace: Span, replacement_len: usize, mut offset: usize, - parent_mode: TokenMode, + parent_mode: NodeMode, outermost: bool, - ) -> bool { + ) -> Result<(), bool> { let kind = self.kind().clone(); let mode = kind.mode().apply(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); @@ -230,30 +245,41 @@ impl GreenNode { if child_span.surrounds(replace) { eprintln!("found correct child"); + let old_len = child.len(); // First, we try if the child has another, more specific applicable child. - if kind.incremental_safety() != IncrementalSafety::Unsafe - && child.incremental_int( + if !kind.incremental_safety().unsafe_interior() { + match child.incremental_int( src, replace, replacement_len, offset, - mode, + kind.mode().child_mode(), i == last && outermost, - ) - { - eprintln!("child was successful"); - return true; + ) { + Ok(_) => { + eprintln!("child success"); + let new_len = child.len(); + self.data.set_len(self.data.len() + new_len - old_len); + return Ok(()); + } + Err(b) if b => return Err(false), + _ => {} + } } // This didn't work, so we try to replace the child at this // level. - let (function, policy) = - if let Some(p) = child.kind().reparsing_function(mode) { - p - } else { - return false; - }; - loop_result = Some((i, child_span, function, policy)); + let (function, policy) = match child + .kind() + .reparsing_function(mode.child_mode().as_token_mode()) + { + Ok(p) => p, + Err(policy) => { + return Err(policy == IncrementalSafety::VeryUnsafe); + } + }; + loop_result = + Some((i, child_span, i == last && outermost, function, policy)); break; } @@ -264,14 +290,14 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. - let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result - { - loop_result - } else { - // No child fully contains the replacement. - eprintln!("no child match"); - return false; - }; + let (child_idx, child_span, child_outermost, func, policy) = + if let Some(loop_result) = loop_result { + loop_result + } else { + // No child fully contains the replacement. + eprintln!("no child match"); + return Err(false); + }; eprintln!("aquired function, policy {:?}", policy); @@ -282,9 +308,10 @@ impl GreenNode { new_children } else { eprintln!("function failed"); - return false; + return Err(false); }; - let child_mode = self.children[child_idx].kind().mode().apply(mode); + let child_mode = + self.children[child_idx].kind().mode().child_mode().as_token_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -298,7 +325,7 @@ impl GreenNode { eprintln!("must be a single replacement"); if new_children.len() != 1 { eprintln!("not a single replacement"); - return false; + return Err(false); } if match policy { @@ -310,32 +337,32 @@ impl GreenNode { } { if self.children[child_idx].kind() != new_children[0].kind() { eprintln!("not the same kind"); - return false; + return Err(false); } } } // Do not accept unclosed nodes if the old node did not use to be at the // right edge of the tree. - if !outermost + if !child_outermost && new_children .iter() .flat_map(|x| x.errors()) .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) { eprintln!("unclosed node"); - return false; + return Err(false); } // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { + if mode.as_token_mode() == TokenMode::Markup { if child_idx > 0 { if self.children[child_idx - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace && !new_children[0].kind().is_whitespace() { eprintln!("left whitespace missing"); - return false; + return Err(false); } } @@ -351,8 +378,12 @@ impl GreenNode { } match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => return false, - IncrementalSafety::EnsureNotAtStart if new_at_start => return false, + IncrementalSafety::EnsureAtStart if !new_at_start => { + return Err(false); + } + IncrementalSafety::EnsureNotAtStart if new_at_start => { + return Err(false); + } _ => {} } break; @@ -361,8 +392,12 @@ impl GreenNode { eprintln!("... replacing"); + let old_len = self.children[child_idx].len(); + let new_len: usize = new_children.iter().map(Green::len).sum(); + self.children.splice(child_idx .. child_idx + 1, new_children); - true + self.data.set_len(self.data.len + new_len - old_len); + Ok(()) } } @@ -414,6 +449,11 @@ impl GreenData { pub fn len(&self) -> usize { self.len } + + /// Set the length of the node. + pub fn set_len(&mut self, len: usize) { + self.len = len; + } } impl From for Green { @@ -939,24 +979,18 @@ impl NodeKind { | Self::Escape(_) | Self::Strong | Self::Emph + | Self::Raw(_) | Self::Math(_) => NodeMode::Markup, Self::Template | Self::Block - | Self::None - | Self::Auto | Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group + | Self::LetExpr + | Self::IfExpr + | Self::WhileExpr + | Self::ForExpr + | Self::ImportExpr | Self::Call + | Self::IncludeExpr | Self::LineComment | Self::BlockComment | Self::Error(_, _) @@ -969,22 +1003,25 @@ impl NodeKind { pub fn reparsing_function( &self, parent_mode: TokenMode, - ) -> Option<(fn(&str, bool) -> Option>, IncrementalSafety)> { + ) -> Result< + (fn(&str, bool) -> Option>, IncrementalSafety), + IncrementalSafety, + > { let policy = self.incremental_safety(); - if policy == IncrementalSafety::Unsafe { - return None; + if policy.unsafe_interior() { + return Err(policy); } let mode = self.mode(); if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { - return None; + return Err(policy); } if mode != NodeMode::Markup && parent_mode == TokenMode::Code && policy == IncrementalSafety::AtomicPrimary { - return Some((parse_atomic, policy)); + return Ok((parse_atomic, policy)); } let parser: fn(&str, bool) -> _ = match mode { @@ -995,7 +1032,7 @@ impl NodeKind { NodeMode::Universal => parse_markup_elements, }; - Some((parser, policy)) + Ok((parser, policy)) } /// Whether it is safe to do incremental parsing on this node. Never allow @@ -1042,7 +1079,8 @@ impl NodeKind { // other expressions. Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, - // These keywords change what kind of expression the parent is. + // These keywords change what kind of expression the parent is and + // how far the expression would go. Self::Let | Self::If | Self::Else @@ -1055,7 +1093,7 @@ impl NodeKind { | Self::Set | Self::Import | Self::Include - | Self::From => IncrementalSafety::Unsafe, + | Self::From => IncrementalSafety::VeryUnsafe, // This is a backslash followed by a space. But changing it to // anything else is fair game. @@ -1309,6 +1347,17 @@ pub enum IncrementalSafety { /// Changing an unsafe node or any of its children will trigger undefined /// behavior. Change the parents instead. Unsafe, + /// Its unsafe for two! + VeryUnsafe, +} + +impl IncrementalSafety { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe | Self::VeryUnsafe => true, + _ => false, + } + } } /// This enum describes which mode a token of [`NodeKind`] can appear in. @@ -1319,17 +1368,34 @@ pub enum NodeMode { /// The token can only appear in code mode. Code, /// The token can appear in either mode. Look at the parent node to decide - /// which mode it is in. + /// which mode it is in. After an apply, this is equivalent to Markup. Universal, } impl NodeMode { - /// Returns the new [`TokenMode`] given the old one. - pub fn apply(&self, old: TokenMode) -> TokenMode { + /// Returns a new mode considering the parent node. + pub fn apply(&self, old: Self) -> Self { match self { - Self::Markup => TokenMode::Markup, + Self::Markup => Self::Markup, + Self::Code => Self::Code, + Self::Universal if old != Self::Markup => Self::Code, + Self::Universal => Self::Universal, + } + } + + /// Return the corresponding token mode. + pub fn as_token_mode(&self) -> TokenMode { + match self { + Self::Markup | Self::Universal => TokenMode::Markup, Self::Code => TokenMode::Code, - Self::Universal => old, + } + } + + /// The mode of the children of this node. + pub fn child_mode(&self) -> Self { + match self { + Self::Markup => Self::Markup, + Self::Code | Self::Universal => Self::Code, } } } From 9141cba6a9db6ae3106e39d92508cb91c390049b Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 8 Nov 2021 12:01:35 +0100 Subject: [PATCH 05/16] Deal with the effects of keywords --- src/parse/mod.rs | 43 +++++++++++++++-- src/parse/parser.rs | 6 +++ src/source.rs | 10 ++-- src/syntax/mod.rs | 111 ++++++++++++++++++++++++++++---------------- 4 files changed, 121 insertions(+), 49 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 027773505..afeb34f1d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -29,7 +29,7 @@ pub fn parse(src: &str) -> Rc { pub fn parse_atomic(src: &str, _: bool) -> Option> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; - p.eject() + p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. @@ -49,10 +49,32 @@ pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option p.eject() } -/// Parse some code. Returns `Some` if all of the input was consumed. -pub fn parse_code(src: &str, _: bool) -> Option> { - let mut p = Parser::new(src, TokenMode::Code); - expr_list(&mut p); +/// Parse a template literal. Returns `Some` if all of the input was consumed. +pub fn parse_template(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source, TokenMode::Code); + if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { + return None; + } + + template(&mut p); + p.eject() +} + +/// Parse a code block. Returns `Some` if all of the input was consumed. +pub fn parse_block(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source, TokenMode::Code); + if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { + return None; + } + + block(&mut p); + p.eject() +} + +/// Parse a comment. Returns `Some` if all of the input was consumed. +pub fn parse_comment(source: &str, _: bool) -> Option> { + let mut p = Parser::new(source, TokenMode::Code); + comment(&mut p).ok()?; p.eject() } @@ -742,3 +764,14 @@ fn body(p: &mut Parser) -> ParseResult { } Ok(()) } + +/// Parse a comment. +fn comment(p: &mut Parser) -> ParseResult { + match p.peek() { + Some(NodeKind::LineComment | NodeKind::BlockComment) => { + p.eat(); + Ok(()) + } + _ => Err(()), + } +} diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 451e18f19..31c918a8f 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -95,6 +95,12 @@ impl<'s> Parser<'s> { output } + /// End the parsing process and return multiple children, even if there + /// remains stuff in the string. + pub fn eject_partial(self) -> Option> { + self.group_success().then(|| self.children) + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof diff --git a/src/source.rs b/src/source.rs index 36db50ddb..2bba86a9d 100644 --- a/src/source.rs +++ b/src/source.rs @@ -558,11 +558,15 @@ mod tests { // Test type invariants. test("#for x in array {x}", 16 .. 19, "[#x]", true); test("#let x = 1 {5}", 1 .. 4, "if", false); + test("{let x = 1 {5}}", 1 .. 4, "if", true); test("#let x = 1 {5}", 4 .. 4, " if", false); + test("{let x = 1 {5}}", 4 .. 4, " if", true); test("a // b c #f()", 3 .. 4, "", false); + test("{\nf()\n//g(a)\n}", 6 .. 8, "", true); + test("{(1, 2)}", 1 .. 1, "while ", true); // this appearantly works but the assertion fails. - // test("a b c", 1 .. 1, "{[}", true); + test("a b c", 1 .. 1, "{[}", true); // Test unclosed things. test(r#"{"hi"}"#, 4 .. 5, "c", false); @@ -571,7 +575,7 @@ mod tests { test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", false); // these appearantly works but the assertion fails. - // test(r#"a ```typst hello``` b"#, 16 .. 17, "", false); - // test(r#"a ```typst hello```"#, 16 .. 17, "", true); + test(r#"a ```typst hello``` b"#, 16 .. 17, "", false); + test(r#"a ```typst hello```"#, 16 .. 17, "", true); } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index cb811266e..c1d7b8d33 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -16,7 +16,8 @@ use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; use crate::parse::{ - parse_atomic, parse_code, parse_markup, parse_markup_elements, TokenMode, + parse_atomic, parse_block, parse_comment, parse_markup, parse_markup_elements, + parse_template, TokenMode, }; use crate::source::SourceId; use crate::util::EcoString; @@ -95,7 +96,7 @@ impl Green { offset: usize, parent_mode: NodeMode, outermost: bool, - ) -> Result<(), bool> { + ) -> bool { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -105,7 +106,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => Err(false), + Green::Token(_) => false, } } @@ -221,7 +222,6 @@ impl GreenNode { } self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) - .is_ok() } fn incremental_int( @@ -232,7 +232,7 @@ impl GreenNode { mut offset: usize, parent_mode: NodeMode, outermost: bool, - ) -> Result<(), bool> { + ) -> bool { let kind = self.kind().clone(); let mode = kind.mode().apply(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); @@ -248,7 +248,7 @@ impl GreenNode { let old_len = child.len(); // First, we try if the child has another, more specific applicable child. if !kind.incremental_safety().unsafe_interior() { - match child.incremental_int( + if child.incremental_int( src, replace, replacement_len, @@ -256,14 +256,11 @@ impl GreenNode { kind.mode().child_mode(), i == last && outermost, ) { - Ok(_) => { - eprintln!("child success"); - let new_len = child.len(); - self.data.set_len(self.data.len() + new_len - old_len); - return Ok(()); - } - Err(b) if b => return Err(false), - _ => {} + eprintln!("child success"); + let new_len = child.len(); + self.data.set_len(self.data.len() + new_len - old_len); + self.erroneous = self.children.iter().any(|x| x.erroneous()); + return true; } } @@ -274,8 +271,8 @@ impl GreenNode { .reparsing_function(mode.child_mode().as_token_mode()) { Ok(p) => p, - Err(policy) => { - return Err(policy == IncrementalSafety::VeryUnsafe); + _ => { + return false; } }; loop_result = @@ -296,20 +293,33 @@ impl GreenNode { } else { // No child fully contains the replacement. eprintln!("no child match"); - return Err(false); + return false; }; eprintln!("aquired function, policy {:?}", policy); let src_span = child_span.inserted(replace, replacement_len); + let recompile_range = if policy == IncrementalSafety::AtomicPrimary { + src_span.start .. src.len() + } else { + src_span.to_range() + }; - let new_children = - if let Some(new_children) = func(&src[src_span.to_range()], child_at_start) { + let new_children = if let Some(new_children) = + func(&src[recompile_range], child_at_start) + { + if policy != IncrementalSafety::AtomicPrimary + || new_children.iter().map(Green::len).sum::() == src_span.len() + { new_children } else { - eprintln!("function failed"); - return Err(false); - }; + eprintln!("wrong atomic len"); + return false; + } + } else { + eprintln!("function failed"); + return false; + }; let child_mode = self.children[child_idx].kind().mode().child_mode().as_token_mode(); eprintln!("child mode {:?}", child_mode); @@ -325,7 +335,7 @@ impl GreenNode { eprintln!("must be a single replacement"); if new_children.len() != 1 { eprintln!("not a single replacement"); - return Err(false); + return false; } if match policy { @@ -337,7 +347,7 @@ impl GreenNode { } { if self.children[child_idx].kind() != new_children[0].kind() { eprintln!("not the same kind"); - return Err(false); + return false; } } } @@ -351,7 +361,7 @@ impl GreenNode { .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) { eprintln!("unclosed node"); - return Err(false); + return false; } // Check if the neighbor invariants are still true. @@ -362,7 +372,7 @@ impl GreenNode { && !new_children[0].kind().is_whitespace() { eprintln!("left whitespace missing"); - return Err(false); + return false; } } @@ -379,10 +389,10 @@ impl GreenNode { match child.kind().incremental_safety() { IncrementalSafety::EnsureAtStart if !new_at_start => { - return Err(false); + return false; } IncrementalSafety::EnsureNotAtStart if new_at_start => { - return Err(false); + return false; } _ => {} } @@ -396,8 +406,9 @@ impl GreenNode { let new_len: usize = new_children.iter().map(Green::len).sum(); self.children.splice(child_idx .. child_idx + 1, new_children); + self.erroneous = self.children.iter().any(|x| x.erroneous()); self.data.set_len(self.data.len + new_len - old_len); - Ok(()) + true } } @@ -1008,28 +1019,41 @@ impl NodeKind { IncrementalSafety, > { let policy = self.incremental_safety(); - if policy.unsafe_interior() { + if policy.is_unsafe() { return Err(policy); } let mode = self.mode(); + let is_code = mode == NodeMode::Universal && parent_mode == TokenMode::Code + || mode == NodeMode::Code; if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { return Err(policy); } - if mode != NodeMode::Markup - && parent_mode == TokenMode::Code - && policy == IncrementalSafety::AtomicPrimary - { + if is_code && policy == IncrementalSafety::AtomicPrimary { return Ok((parse_atomic, policy)); } + if policy == IncrementalSafety::SameKind + || (policy == IncrementalSafety::SameKindInCode && is_code) + { + let parser: fn(&str, bool) -> _ = match self { + NodeKind::Template => parse_template, + NodeKind::Block => parse_block, + NodeKind::LineComment | NodeKind::BlockComment => parse_comment, + _ => return Err(policy), + }; + + return Ok((parser, policy)); + } + let parser: fn(&str, bool) -> _ = match mode { - NodeMode::Code => parse_code, NodeMode::Markup if self == &Self::Markup => parse_markup, NodeMode::Markup => parse_markup_elements, - NodeMode::Universal if parent_mode == TokenMode::Code => parse_code, - NodeMode::Universal => parse_markup_elements, + NodeMode::Universal if parent_mode == TokenMode::Markup => { + parse_markup_elements + } + _ => return Err(policy), }; Ok((parser, policy)) @@ -1093,7 +1117,7 @@ impl NodeKind { | Self::Set | Self::Import | Self::Include - | Self::From => IncrementalSafety::VeryUnsafe, + | Self::From => IncrementalSafety::Unsafe, // This is a backslash followed by a space. But changing it to // anything else is fair game. @@ -1347,14 +1371,19 @@ pub enum IncrementalSafety { /// Changing an unsafe node or any of its children will trigger undefined /// behavior. Change the parents instead. Unsafe, - /// Its unsafe for two! - VeryUnsafe, } impl IncrementalSafety { pub fn unsafe_interior(&self) -> bool { match self { - Self::Unsafe | Self::VeryUnsafe => true, + Self::Unsafe => true, + _ => false, + } + } + + pub fn is_unsafe(&self) -> bool { + match self { + Self::UnsafeLayer | Self::Unsafe => true, _ => false, } } From 7a631d8b09bbffa8c7d90a1038d986876370ea7a Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 9 Nov 2021 13:07:55 +0100 Subject: [PATCH 06/16] Simplify node mode management --- src/parse/mod.rs | 2 +- src/source.rs | 16 +++--------- src/syntax/mod.rs | 66 ++++++++++++++++++----------------------------- 3 files changed, 30 insertions(+), 54 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index afeb34f1d..1f1ac2660 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -772,6 +772,6 @@ fn comment(p: &mut Parser) -> ParseResult { p.eat(); Ok(()) } - _ => Err(()), + _ => Err(ParseError), } } diff --git a/src/source.rs b/src/source.rs index 2bba86a9d..3117f5b61 100644 --- a/src/source.rs +++ b/src/source.rs @@ -286,22 +286,15 @@ impl SourceFile { .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); // Update the root node. - let insertion_span = Span::new(self.id, replace.start, replace.end); - let source = self.src().to_string(); - if Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) { + let span = Span::new(self.id, replace.start, replace.end); + if Rc::make_mut(&mut self.root).incremental(&self.src, span, with.len()) { self.was_incremental = true; } else { - self.root = parse(self.src()); + self.root = parse(&self.src); self.was_incremental = false; } } - /// Forces a non-incremental reparsing of the source file. - fn force_reparse(&mut self) { - self.root = parse(self.src()); - self.was_incremental = false; - } - /// Provide highlighting categories for the given range of the source file. pub fn highlight(&self, range: Range, mut f: F) where @@ -499,8 +492,7 @@ mod tests { if incr { assert!(source.was_incremental); let incr_tree = source.root.clone(); - source.force_reparse(); - assert_eq!(source.root, incr_tree); + assert_eq!(parse(source.src()), incr_tree); } else { assert!(!source.was_incremental); } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index c1d7b8d33..d6658fd35 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -94,7 +94,7 @@ impl Green { replace: Span, replacement_len: usize, offset: usize, - parent_mode: NodeMode, + parent_mode: TokenMode, outermost: bool, ) -> bool { match self { @@ -221,7 +221,7 @@ impl GreenNode { return false; } - self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) + self.incremental_int(src, replace, replacement_len, 0, TokenMode::Markup, true) } fn incremental_int( @@ -230,11 +230,11 @@ impl GreenNode { replace: Span, replacement_len: usize, mut offset: usize, - parent_mode: NodeMode, + parent_mode: TokenMode, outermost: bool, ) -> bool { let kind = self.kind().clone(); - let mode = kind.mode().apply(parent_mode); + let mode = kind.mode().contextualize(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); let mut loop_result = None; @@ -266,15 +266,11 @@ impl GreenNode { // This didn't work, so we try to replace the child at this // level. - let (function, policy) = match child - .kind() - .reparsing_function(mode.child_mode().as_token_mode()) - { - Ok(p) => p, - _ => { - return false; - } - }; + let (function, policy) = + match child.kind().reparsing_function(kind.mode().child_mode()) { + Ok(p) => p, + _ => return false, + }; loop_result = Some((i, child_span, i == last && outermost, function, policy)); break; @@ -320,8 +316,7 @@ impl GreenNode { eprintln!("function failed"); return false; }; - let child_mode = - self.children[child_idx].kind().mode().child_mode().as_token_mode(); + let child_mode = self.children[child_idx].kind().mode().child_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -365,7 +360,7 @@ impl GreenNode { } // Check if the neighbor invariants are still true. - if mode.as_token_mode() == TokenMode::Markup { + if mode == TokenMode::Markup { if child_idx > 0 { if self.children[child_idx - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace @@ -1023,10 +1018,10 @@ impl NodeKind { return Err(policy); } - let mode = self.mode(); - let is_code = mode == NodeMode::Universal && parent_mode == TokenMode::Code - || mode == NodeMode::Code; - if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { + let contextualized = self.mode().contextualize(parent_mode); + let is_code = contextualized == TokenMode::Code; + + if is_code && policy == IncrementalSafety::UnsafeLayer { return Err(policy); } @@ -1047,12 +1042,9 @@ impl NodeKind { return Ok((parser, policy)); } - let parser: fn(&str, bool) -> _ = match mode { - NodeMode::Markup if self == &Self::Markup => parse_markup, - NodeMode::Markup => parse_markup_elements, - NodeMode::Universal if parent_mode == TokenMode::Markup => { - parse_markup_elements - } + let parser: fn(&str, bool) -> _ = match contextualized { + TokenMode::Markup if self == &Self::Markup => parse_markup, + TokenMode::Markup => parse_markup_elements, _ => return Err(policy), }; @@ -1403,28 +1395,20 @@ pub enum NodeMode { impl NodeMode { /// Returns a new mode considering the parent node. - pub fn apply(&self, old: Self) -> Self { + pub fn contextualize(&self, old: TokenMode) -> TokenMode { match self { - Self::Markup => Self::Markup, - Self::Code => Self::Code, - Self::Universal if old != Self::Markup => Self::Code, - Self::Universal => Self::Universal, - } - } - - /// Return the corresponding token mode. - pub fn as_token_mode(&self) -> TokenMode { - match self { - Self::Markup | Self::Universal => TokenMode::Markup, + Self::Markup => TokenMode::Markup, Self::Code => TokenMode::Code, + Self::Universal if old != TokenMode::Markup => TokenMode::Code, + Self::Universal => TokenMode::Markup, } } /// The mode of the children of this node. - pub fn child_mode(&self) -> Self { + pub fn child_mode(&self) -> TokenMode { match self { - Self::Markup => Self::Markup, - Self::Code | Self::Universal => Self::Code, + Self::Markup => TokenMode::Markup, + Self::Code | Self::Universal => TokenMode::Code, } } } From 91f2f97572c64d7eb25c88ad0ebb18192cf8eddf Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Tue, 9 Nov 2021 13:34:23 +0100 Subject: [PATCH 07/16] Multiple replacements, escapes --- src/source.rs | 4 +++ src/syntax/mod.rs | 79 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/source.rs b/src/source.rs index 3117f5b61..797e815b1 100644 --- a/src/source.rs +++ b/src/source.rs @@ -500,6 +500,8 @@ mod tests { // Test simple replacements. test("hello world", 6 .. 11, "wankers", true); + test("a d e", 1 .. 3, " b c d", true); + test("a #f() e", 1 .. 6, " b c d", false); test("{(0, 1, 2)}", 5 .. 6, "11pt", true); test("= A heading", 3 .. 3, "n evocative", true); test( @@ -546,6 +548,8 @@ mod tests { test("x = y", 2 .. 2, "+ y \n ", false); test("abc\n= a heading", 3 .. 4, "\nsome more test\n\n", true); test("abc\n= a heading", 3 .. 4, "\nnot ", false); + test("hey #myfriend", 4 .. 4, "\\", false); + test("hey #myfriend", 4 .. 4, "\\", true); // Test type invariants. test("#for x in array {x}", 16 .. 19, "[#x]", true); diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d6658fd35..d1ca36746 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -240,6 +240,7 @@ impl GreenNode { let mut loop_result = None; let mut child_at_start = true; let last = self.children.len() - 1; + let mut start = None; for (i, child) in self.children.iter_mut().enumerate() { let child_span = Span::new(replace.source, offset, offset + child.len()); if child_span.surrounds(replace) { @@ -271,8 +272,45 @@ impl GreenNode { Ok(p) => p, _ => return false, }; - loop_result = - Some((i, child_span, i == last && outermost, function, policy)); + loop_result = Some(( + i .. i + 1, + child_span, + i == last && outermost, + function, + policy, + )); + break; + } else if child_span.contains(replace.start) + && mode == TokenMode::Markup + && child.kind().incremental_safety().markup_safe() + { + eprintln!("found safe start"); + start = Some((i, offset)); + } else if child_span.contains(replace.end) + && mode == TokenMode::Markup + && child.kind().incremental_safety().markup_safe() + { + eprintln!("found safe end"); + if let Some((start, start_offset)) = start { + let (function, policy) = + match child.kind().reparsing_function(kind.mode().child_mode()) { + Ok(p) => p, + _ => return false, + }; + loop_result = Some(( + start .. i + 1, + Span::new(replace.source, start_offset, offset + child.len()), + i == last && outermost, + function, + policy, + )); + } + break; + } else if start.is_some() + && (mode != TokenMode::Markup + || !child.kind().incremental_safety().markup_safe()) + { + eprintln!("unsafe inbetweeen {:?}", child.kind()); break; } @@ -283,7 +321,7 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. - let (child_idx, child_span, child_outermost, func, policy) = + let (child_idx_range, child_span, child_outermost, func, policy) = if let Some(loop_result) = loop_result { loop_result } else { @@ -316,7 +354,7 @@ impl GreenNode { eprintln!("function failed"); return false; }; - let child_mode = self.children[child_idx].kind().mode().child_mode(); + let child_mode = self.children[child_idx_range.start].kind().mode().child_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -340,7 +378,7 @@ impl GreenNode { } _ => false, } { - if self.children[child_idx].kind() != new_children[0].kind() { + if self.children[child_idx_range.start].kind() != new_children[0].kind() { eprintln!("not the same kind"); return false; } @@ -361,8 +399,8 @@ impl GreenNode { // Check if the neighbor invariants are still true. if mode == TokenMode::Markup { - if child_idx > 0 { - if self.children[child_idx - 1].kind().incremental_safety() + if child_idx_range.start > 0 { + if self.children[child_idx_range.start - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace && !new_children[0].kind().is_whitespace() { @@ -376,7 +414,7 @@ impl GreenNode { new_at_start = child.kind().is_at_start(new_at_start); } - for child in &self.children[child_idx + 1 ..] { + for child in &self.children[child_idx_range.end ..] { if child.kind().is_trivia() { new_at_start = child.kind().is_at_start(new_at_start); continue; @@ -393,14 +431,25 @@ impl GreenNode { } break; } + + if new_children.last().map(|x| x.kind().incremental_safety()) + == Some(IncrementalSafety::EnsureRightWhitespace) + && self.children.len() > child_idx_range.end + { + if !self.children[child_idx_range.end].kind().is_whitespace() { + eprintln!("right whitespace missing"); + return false; + } + } } eprintln!("... replacing"); - let old_len = self.children[child_idx].len(); + let old_len: usize = + self.children[child_idx_range.clone()].iter().map(Green::len).sum(); let new_len: usize = new_children.iter().map(Green::len).sum(); - self.children.splice(child_idx .. child_idx + 1, new_children); + self.children.splice(child_idx_range, new_children); self.erroneous = self.children.iter().any(|x| x.erroneous()); self.data.set_len(self.data.len + new_len - old_len); true @@ -1379,6 +1428,16 @@ impl IncrementalSafety { _ => false, } } + + pub fn markup_safe(&self) -> bool { + match self { + Self::Safe + | Self::SameKindInCode + | Self::EnsureAtStart + | Self::UnsafeLayer => true, + _ => false, + } + } } /// This enum describes which mode a token of [`NodeKind`] can appear in. From 3162c6a83a910f34d6ed7e966c11b7e7b5bd4088 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 10 Nov 2021 20:41:10 +0100 Subject: [PATCH 08/16] Comments and neighbors --- benches/oneshot.rs | 6 + src/parse/mod.rs | 15 +- src/parse/parser.rs | 9 +- src/parse/tokens.rs | 26 +++- src/source.rs | 135 ++++++++-------- src/syntax/mod.rs | 367 ++++++++++++++++++++++---------------------- 6 files changed, 303 insertions(+), 255 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index d3e2ff8e4..c088a93c8 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -49,6 +49,11 @@ fn bench_parse(iai: &mut Iai) { iai.run(|| parse(SRC)); } +fn bench_edit(iai: &mut Iai) { + let (mut ctx, id) = context(); + iai.run(|| black_box(ctx.sources.edit(id, 1168 .. 1171, "_Uhr_"))); +} + fn bench_eval(iai: &mut Iai) { let (mut ctx, id) = context(); iai.run(|| ctx.evaluate(id).unwrap()); @@ -66,6 +71,7 @@ main!( bench_scan, bench_tokenize, bench_parse, + bench_edit, bench_eval, bench_layout ); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1f1ac2660..f2fae5f28 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -26,14 +26,14 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool) -> Option> { +pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool) -> Option> { +pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); p.eject() @@ -41,7 +41,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option> { /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option> { +pub fn parse_markup_elements( + src: &str, + mut at_start: bool, +) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); @@ -50,7 +53,7 @@ pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool) -> Option> { +pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { return None; @@ -61,7 +64,7 @@ pub fn parse_template(source: &str, _: bool) -> Option> { } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool) -> Option> { +pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { return None; @@ -72,7 +75,7 @@ pub fn parse_block(source: &str, _: bool) -> Option> { } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool) -> Option> { +pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); comment(&mut p).ok()?; p.eject() diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 31c918a8f..a37cb9c62 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -48,9 +48,9 @@ impl<'s> Parser<'s> { } /// End the parsing process and return multiple children. - pub fn eject(self) -> Option> { + pub fn eject(self) -> Option<(Vec, bool)>{ if self.eof() && self.group_success() { - Some(self.children) + Some((self.children, self.tokens.was_unterminated())) } else { None } @@ -97,8 +97,9 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children, even if there /// remains stuff in the string. - pub fn eject_partial(self) -> Option> { - self.group_success().then(|| self.children) + pub fn eject_partial(self) -> Option<(Vec, bool)> { + self.group_success() + .then(|| (self.children, self.tokens.was_unterminated())) } /// Whether the end of the source string or group is reached. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 27ec046df..7be31fe18 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -13,6 +13,7 @@ use crate::util::EcoString; pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, + has_unterminated: bool, } /// What kind of tokens to emit. @@ -28,7 +29,11 @@ impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(src), mode } + Self { + s: Scanner::new(src), + mode, + has_unterminated: false, + } } /// Get the current token mode. @@ -63,6 +68,12 @@ impl<'s> Tokens<'s> { pub fn scanner(&self) -> Scanner<'s> { self.s } + + /// Whether the last token was unterminated. + #[inline] + pub fn was_unterminated(&self) -> bool { + self.has_unterminated + } } impl<'s> Iterator for Tokens<'s> { @@ -248,6 +259,7 @@ impl<'s> Tokens<'s> { ) } } else { + self.has_unterminated = true; NodeKind::Error( ErrorPos::End, "expected closing brace".into(), @@ -346,6 +358,7 @@ impl<'s> Tokens<'s> { let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; + self.has_unterminated = true; NodeKind::Error( ErrorPos::End, if found == 0 { @@ -393,6 +406,7 @@ impl<'s> Tokens<'s> { display, })) } else { + self.has_unterminated = true; NodeKind::Error( ErrorPos::End, if !display || (!escaped && dollar) { @@ -481,18 +495,23 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { + self.has_unterminated = true; NodeKind::Error(ErrorPos::End, "expected quote".into()) } } fn line_comment(&mut self) -> NodeKind { self.s.eat_until(is_newline); + if self.s.peek().is_none() { + self.has_unterminated = true; + } NodeKind::LineComment } fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; + let mut terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -500,6 +519,7 @@ impl<'s> Tokens<'s> { ('*', '/') => { depth -= 1; if depth == 0 { + terminated = true; break; } '_' @@ -512,6 +532,10 @@ impl<'s> Tokens<'s> { } } + if !terminated { + self.has_unterminated = true; + } + NodeKind::BlockComment } diff --git a/src/source.rs b/src/source.rs index 797e815b1..7eb1d3a7f 100644 --- a/src/source.rs +++ b/src/source.rs @@ -128,7 +128,6 @@ pub struct SourceFile { src: String, line_starts: Vec, root: Rc, - was_incremental: bool, } impl SourceFile { @@ -142,7 +141,6 @@ impl SourceFile { root: parse(&src), src, line_starts, - was_incremental: false, } } @@ -268,7 +266,7 @@ impl SourceFile { /// Edit the source file by replacing the given range. /// /// This panics if the `replace` range is out of bounds. - pub fn edit(&mut self, replace: Range, with: &str) { + pub fn edit(&mut self, replace: Range, with: &str) -> Range { let start = replace.start; self.src.replace_range(replace.clone(), with); @@ -287,11 +285,13 @@ impl SourceFile { // Update the root node. let span = Span::new(self.id, replace.start, replace.end); - if Rc::make_mut(&mut self.root).incremental(&self.src, span, with.len()) { - self.was_incremental = true; + if let Ok(range) = + Rc::make_mut(&mut self.root).incremental(&self.src, span, with.len()) + { + range } else { self.root = parse(&self.src); - self.was_incremental = false; + 0 .. self.src.len() } } @@ -485,93 +485,108 @@ mod tests { #[test] fn test_incremental_parse() { #[track_caller] - fn test(prev: &str, range: Range, with: &str, incr: bool) { + fn test(prev: &str, range: Range, with: &str, incr: Range) { let mut source = SourceFile::detached(prev); - source.edit(range, with); + let range = source.edit(range, with); + assert_eq!(range, incr); - if incr { - assert!(source.was_incremental); - let incr_tree = source.root.clone(); - assert_eq!(parse(source.src()), incr_tree); - } else { - assert!(!source.was_incremental); - } + let incr_tree = source.root.clone(); + assert_eq!(parse(source.src()), incr_tree); } // Test simple replacements. - test("hello world", 6 .. 11, "wankers", true); - test("a d e", 1 .. 3, " b c d", true); - test("a #f() e", 1 .. 6, " b c d", false); - test("{(0, 1, 2)}", 5 .. 6, "11pt", true); - test("= A heading", 3 .. 3, "n evocative", true); + test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("a d e", 1 .. 3, " b c d", 0 .. 8); + test("a #f() e", 1 .. 6, " b c d", 0 .. 8); + test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); + test("= A heading", 3 .. 3, "n evocative", 2 .. 15); + test("your thing", 5 .. 5, "a", 4 .. 11); + test("a your thing a", 6 .. 7, "a", 2 .. 12); test( "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", - true, + 16 .. 20, ); test( "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", - true, + 33 .. 42, ); test( "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", - true, + 34 .. 39, ); - test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", true); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", true); - test("hello {x}", 6 .. 9, "#f()", false); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test( "this is -- in my opinion -- spectacular", 8 .. 10, "---", - true, + 7 .. 12, ); - test("understanding `code` is complicated", 15 .. 15, "C ", true); - test("{ let x = g() }", 10 .. 12, "f(54", true); test( - "#let rect with (fill: eastern)", - 14 .. 29, - " (stroke: conifer", - true, + "understanding `code` is complicated", + 15 .. 15, + "C ", + 14 .. 22, + ); + test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); + test( + "a #let rect with (fill: eastern)\nb", + 16 .. 31, + " (stroke: conifer", + 2 .. 34, ); - test("a b c", 1 .. 1, " /* letters */", false); // Test the whitespace invariants. - test("hello \\ world", 7 .. 8, "a ", false); - test("hello \\ world", 7 .. 8, "\n\n", true); - test("x = y", 2 .. 2, "+ y ", true); - test("x = y", 2 .. 2, "+ y \n ", false); - test("abc\n= a heading", 3 .. 4, "\nsome more test\n\n", true); - test("abc\n= a heading", 3 .. 4, "\nnot ", false); - test("hey #myfriend", 4 .. 4, "\\", false); - test("hey #myfriend", 4 .. 4, "\\", true); + test("hello \\ world", 7 .. 8, "a ", 6 .. 14); + test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("x = y", 1 .. 1, " + y", 0 .. 6); + test("x = y", 1 .. 1, " + y\n", 0 .. 10); + test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); + test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); + test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); // Test type invariants. - test("#for x in array {x}", 16 .. 19, "[#x]", true); - test("#let x = 1 {5}", 1 .. 4, "if", false); - test("{let x = 1 {5}}", 1 .. 4, "if", true); - test("#let x = 1 {5}", 4 .. 4, " if", false); - test("{let x = 1 {5}}", 4 .. 4, " if", true); - test("a // b c #f()", 3 .. 4, "", false); - test("{\nf()\n//g(a)\n}", 6 .. 8, "", true); - test("{(1, 2)}", 1 .. 1, "while ", true); - - // this appearantly works but the assertion fails. - test("a b c", 1 .. 1, "{[}", true); + test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); + test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); + test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); + test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17); + test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); + test("a // b c #f()", 3 .. 4, "", 0 .. 12); + test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); + test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); + test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); + test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); + test("a b c", 1 .. 1, "{[}", 0 .. 5); // Test unclosed things. - test(r#"{"hi"}"#, 4 .. 5, "c", false); - test(r"this \u{abcd}", 8 .. 9, "", true); - test(r"this \u{abcd} that", 12 .. 13, "", false); - test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", false); + test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); + test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); + test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); + test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); + test("a b c", 1 .. 1, " /* letters */", 0 .. 16); + test("a b c", 1 .. 1, " /* letters", 0 .. 16); + test( + "{if i==1 {a} else [b]; b()}", + 12 .. 12, + " /* letters */", + 1 .. 35, + ); + test( + "{if i==1 {a} else [b]; b()}", + 12 .. 12, + " /* letters", + 0 .. 38, + ); - // these appearantly works but the assertion fails. - test(r#"a ```typst hello``` b"#, 16 .. 17, "", false); - test(r#"a ```typst hello```"#, 16 .. 17, "", true); + test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); + test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d1ca36746..cfb443761 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -6,6 +6,7 @@ mod pretty; mod span; use std::fmt::{self, Debug, Display, Formatter}; +use std::ops::Range; use std::rc::Rc; pub use highlight::*; @@ -88,7 +89,7 @@ impl Green { } /// Find the innermost child that is incremental safe. - fn incremental_int( + fn incremental( &mut self, edit: &str, replace: Span, @@ -96,7 +97,7 @@ impl Green { offset: usize, parent_mode: TokenMode, outermost: bool, - ) -> bool { + ) -> Result, ()> { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -106,21 +107,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => false, - } - } - - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { - match self { - Green::Node(n) => n.errors(), - Green::Token(t) => { - if t.kind().is_error() { - vec![t.kind().clone()] - } else { - vec![] - } - } + Green::Token(_) => Err(()), } } } @@ -198,15 +185,23 @@ impl GreenNode { self.data().len() } - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { - let mut res = self.children.iter().flat_map(|c| c.errors()).collect::>(); + pub fn replace_child_range( + &mut self, + child_idx_range: Range, + replacement: Vec, + ) { + let old_len: usize = + self.children[child_idx_range.clone()].iter().map(Green::len).sum(); + let new_len: usize = replacement.iter().map(Green::len).sum(); - if self.kind().is_error() { - res.push(self.kind().clone()); - } + self.children.splice(child_idx_range, replacement); + self.erroneous = self.children.iter().any(|x| x.erroneous()); + self.data.set_len(self.data.len + new_len - old_len); + } - res + pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { + self.data.len = self.data.len() + new_len - old_len; + self.erroneous = self.children.iter().any(|x| x.erroneous()); } /// Find the innermost child that is incremental safe. @@ -215,12 +210,7 @@ impl GreenNode { src: &str, replace: Span, replacement_len: usize, - ) -> bool { - let edit = &src[replace.inserted(replace, replacement_len).to_range()]; - if edit.contains("//") || edit.contains("/*") || edit.contains("*/") { - return false; - } - + ) -> Result, ()> { self.incremental_int(src, replace, replacement_len, 0, TokenMode::Markup, true) } @@ -232,10 +222,9 @@ impl GreenNode { mut offset: usize, parent_mode: TokenMode, outermost: bool, - ) -> bool { + ) -> Result, ()> { let kind = self.kind().clone(); let mode = kind.mode().contextualize(parent_mode); - eprintln!("in {:?} (mode {:?})", kind, mode); let mut loop_result = None; let mut child_at_start = true; @@ -243,13 +232,16 @@ impl GreenNode { let mut start = None; for (i, child) in self.children.iter_mut().enumerate() { let child_span = Span::new(replace.source, offset, offset + child.len()); - if child_span.surrounds(replace) { - eprintln!("found correct child"); - + if child_span.surrounds(replace) + && start.is_none() + && ((replace.start != child_span.end && replace.end != child_span.start) + || mode == TokenMode::Code + || i == last) + { let old_len = child.len(); // First, we try if the child has another, more specific applicable child. if !kind.incremental_safety().unsafe_interior() { - if child.incremental_int( + if let Ok(range) = child.incremental( src, replace, replacement_len, @@ -257,21 +249,17 @@ impl GreenNode { kind.mode().child_mode(), i == last && outermost, ) { - eprintln!("child success"); let new_len = child.len(); - self.data.set_len(self.data.len() + new_len - old_len); - self.erroneous = self.children.iter().any(|x| x.erroneous()); - return true; + self.update_child_len(new_len, old_len); + return Ok(range); } } // This didn't work, so we try to replace the child at this // level. let (function, policy) = - match child.kind().reparsing_function(kind.mode().child_mode()) { - Ok(p) => p, - _ => return false, - }; + child.kind().reparsing_function(kind.mode().child_mode()); + let function = function?; loop_result = Some(( i .. i + 1, child_span, @@ -280,23 +268,21 @@ impl GreenNode { policy, )); break; - } else if child_span.contains(replace.start) + } else if start.is_none() + && child_span.contains(replace.start) && mode == TokenMode::Markup && child.kind().incremental_safety().markup_safe() { - eprintln!("found safe start"); start = Some((i, offset)); } else if child_span.contains(replace.end) + && (replace.end != child_span.end || i == last) && mode == TokenMode::Markup && child.kind().incremental_safety().markup_safe() { - eprintln!("found safe end"); if let Some((start, start_offset)) = start { let (function, policy) = - match child.kind().reparsing_function(kind.mode().child_mode()) { - Ok(p) => p, - _ => return false, - }; + child.kind().reparsing_function(kind.mode().child_mode()); + let function = function?; loop_result = Some(( start .. i + 1, Span::new(replace.source, start_offset, offset + child.len()), @@ -310,7 +296,6 @@ impl GreenNode { && (mode != TokenMode::Markup || !child.kind().incremental_safety().markup_safe()) { - eprintln!("unsafe inbetweeen {:?}", child.kind()); break; } @@ -322,15 +307,7 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. let (child_idx_range, child_span, child_outermost, func, policy) = - if let Some(loop_result) = loop_result { - loop_result - } else { - // No child fully contains the replacement. - eprintln!("no child match"); - return false; - }; - - eprintln!("aquired function, policy {:?}", policy); + loop_result.ok_or(())?; let src_span = child_span.inserted(replace, replacement_len); let recompile_range = if policy == IncrementalSafety::AtomicPrimary { @@ -339,123 +316,139 @@ impl GreenNode { src_span.to_range() }; - let new_children = if let Some(new_children) = - func(&src[recompile_range], child_at_start) - { - if policy != IncrementalSafety::AtomicPrimary - || new_children.iter().map(Green::len).sum::() == src_span.len() - { - new_children - } else { - eprintln!("wrong atomic len"); - return false; - } - } else { - eprintln!("function failed"); - return false; - }; - let child_mode = self.children[child_idx_range.start].kind().mode().child_mode(); - eprintln!("child mode {:?}", child_mode); + let (mut new_children, unterminated) = + func(&src[recompile_range], child_at_start).ok_or(())?; - // Check if the children / child has the right type. - let require_single = match policy { - IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, - _ => false, - }; + let insertion = match check_invariants( + &new_children, + self.children(), + unterminated, + child_idx_range.clone(), + child_outermost, + child_at_start, + mode, + src_span, + policy, + ) { + InvariantResult::Ok => Ok(new_children), + InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => Err(()), + }?; - if require_single { - eprintln!("must be a single replacement"); - if new_children.len() != 1 { - eprintln!("not a single replacement"); - return false; - } + self.replace_child_range(child_idx_range, insertion); - if match policy { - IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => { - true - } - _ => false, - } { - if self.children[child_idx_range.start].kind() != new_children[0].kind() { - eprintln!("not the same kind"); - return false; - } - } - } - - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !child_outermost - && new_children - .iter() - .flat_map(|x| x.errors()) - .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) - { - eprintln!("unclosed node"); - return false; - } - - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if self.children[child_idx_range.start - 1].kind().incremental_safety() - == IncrementalSafety::EnsureRightWhitespace - && !new_children[0].kind().is_whitespace() - { - eprintln!("left whitespace missing"); - return false; - } - } - - let mut new_at_start = child_at_start; - for child in &new_children { - new_at_start = child.kind().is_at_start(new_at_start); - } - - for child in &self.children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; - } - - match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => { - return false; - } - IncrementalSafety::EnsureNotAtStart if new_at_start => { - return false; - } - _ => {} - } - break; - } - - if new_children.last().map(|x| x.kind().incremental_safety()) - == Some(IncrementalSafety::EnsureRightWhitespace) - && self.children.len() > child_idx_range.end - { - if !self.children[child_idx_range.end].kind().is_whitespace() { - eprintln!("right whitespace missing"); - return false; - } - } - } - - eprintln!("... replacing"); - - let old_len: usize = - self.children[child_idx_range.clone()].iter().map(Green::len).sum(); - let new_len: usize = new_children.iter().map(Green::len).sum(); - - self.children.splice(child_idx_range, new_children); - self.erroneous = self.children.iter().any(|x| x.erroneous()); - self.data.set_len(self.data.len + new_len - old_len); - true + Ok(src_span.to_range()) } } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + unterminated: bool, + child_idx_range: Range, + outermost: bool, + child_at_start: bool, + mode: TokenMode, + src_span: Span, + policy: IncrementalSafety, +) -> InvariantResult { + let (new_children, ok) = if policy == IncrementalSafety::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); + + // Check if the children / child has the right type. + let require_single = match policy { + IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, + _ => false, + }; + + if require_single { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if match policy { + IncrementalSafety::SameKind => true, + IncrementalSafety::SameKindInCode => child_mode == TokenMode::Code, + _ => false, + } { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !outermost && unterminated { + return InvariantResult::Error; + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().incremental_safety() + == IncrementalSafety::EnsureRightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + let mut new_at_start = child_at_start; + for child in new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().incremental_safety() { + IncrementalSafety::EnsureAtStart if !new_at_start => { + return InvariantResult::Error; + } + IncrementalSafety::EnsureNotAtStart if new_at_start => { + return InvariantResult::Error; + } + _ => {} + } + break; + } + + if new_children.last().map(|x| x.kind().incremental_safety()) + == Some(IncrementalSafety::EnsureRightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + } + + ok +} + impl From for Green { fn from(node: GreenNode) -> Self { Rc::new(node).into() @@ -1025,6 +1018,7 @@ impl NodeKind { match self { Self::Markup | Self::Space(_) + | Self::Linebreak | Self::Parbreak | Self::Text(_) | Self::TextInLine(_) @@ -1034,6 +1028,10 @@ impl NodeKind { | Self::Escape(_) | Self::Strong | Self::Emph + | Self::Heading + | Self::Enum + | Self::EnumNumbering(_) + | Self::List | Self::Raw(_) | Self::Math(_) => NodeMode::Markup, Self::Template @@ -1058,24 +1056,24 @@ impl NodeKind { pub fn reparsing_function( &self, parent_mode: TokenMode, - ) -> Result< - (fn(&str, bool) -> Option>, IncrementalSafety), + ) -> ( + Result Option<(Vec, bool)>, ()>, IncrementalSafety, - > { + ) { let policy = self.incremental_safety(); if policy.is_unsafe() { - return Err(policy); + return (Err(()), policy); } let contextualized = self.mode().contextualize(parent_mode); let is_code = contextualized == TokenMode::Code; if is_code && policy == IncrementalSafety::UnsafeLayer { - return Err(policy); + return (Err(()), policy); } if is_code && policy == IncrementalSafety::AtomicPrimary { - return Ok((parse_atomic, policy)); + return (Ok(parse_atomic), policy); } if policy == IncrementalSafety::SameKind @@ -1085,19 +1083,19 @@ impl NodeKind { NodeKind::Template => parse_template, NodeKind::Block => parse_block, NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return Err(policy), + _ => return (Err(()), policy), }; - return Ok((parser, policy)); + return (Ok(parser), policy); } let parser: fn(&str, bool) -> _ = match contextualized { TokenMode::Markup if self == &Self::Markup => parse_markup, TokenMode::Markup => parse_markup_elements, - _ => return Err(policy), + _ => return (Err(()), policy), }; - Ok((parser, policy)) + (Ok(parser), policy) } /// Whether it is safe to do incremental parsing on this node. Never allow @@ -1434,6 +1432,8 @@ impl IncrementalSafety { Self::Safe | Self::SameKindInCode | Self::EnsureAtStart + | Self::EnsureNotAtStart + | Self::EnsureRightWhitespace | Self::UnsafeLayer => true, _ => false, } @@ -1458,8 +1458,7 @@ impl NodeMode { match self { Self::Markup => TokenMode::Markup, Self::Code => TokenMode::Code, - Self::Universal if old != TokenMode::Markup => TokenMode::Code, - Self::Universal => TokenMode::Markup, + Self::Universal => old, } } From fdb9d0743d73c278136b9254286fdc4be71c42a5 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 18 Nov 2021 16:21:45 +0100 Subject: [PATCH 09/16] Refactoring and bugfixes --- Cargo.lock | 420 +++++++++++++++++++++++++++- Cargo.toml | 17 +- benches/timed.rs | 98 +++++++ src/parse/mod.rs | 34 ++- src/parse/parser.rs | 2 +- src/source.rs | 17 +- src/syntax/incremental.rs | 515 ++++++++++++++++++++++++++++++++++ src/syntax/mod.rs | 563 ++------------------------------------ 8 files changed, 1101 insertions(+), 565 deletions(-) create mode 100644 benches/timed.rs create mode 100644 src/syntax/incremental.rs diff --git a/Cargo.lock b/Cargo.lock index df3bf74a3..98cb4d81b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,6 +38,17 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.0.1" @@ -56,6 +67,24 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" + [[package]] name = "bytemuck" version = "1.7.3" @@ -68,12 +97,32 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "cast" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" +dependencies = [ + "rustc_version", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + [[package]] name = "codespan-reporting" version = "0.11.1" @@ -99,6 +148,108 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +dependencies = [ + "cfg-if", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "data-url" version = "0.1.1" @@ -276,6 +427,21 @@ dependencies = [ "wasi", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "iai" version = "0.1.1" @@ -315,12 +481,27 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +[[package]] +name = "itoa" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" + [[package]] name = "jpeg-decoder" version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "229d53d58899083193af11e15917b5640cd40b29ff475a1fe4ef725deb02d0f2" +[[package]] +name = "js-sys" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "kurbo" version = "0.8.3" @@ -330,6 +511,12 @@ dependencies = [ "arrayvec 0.7.2", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.112" @@ -351,6 +538,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + [[package]] name = "memmap2" version = "0.5.0" @@ -360,6 +553,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.3.7" @@ -420,12 +622,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "pdf-writer" version = "0.4.1" @@ -433,7 +651,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36d760a6f2ac90811cba1006a298e8a7e5ce2c922bb5dc7f7000911a4a6b60f4" dependencies = [ "bitflags", - "itoa", + "itoa 0.4.8", "ryu", ] @@ -443,6 +661,34 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" +[[package]] +name = "plotters" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" + +[[package]] +name = "plotters-svg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +dependencies = [ + "plotters-backend", +] + [[package]] name = "png" version = "0.16.8" @@ -532,6 +778,31 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "rctree" version = "0.4.0" @@ -557,6 +828,27 @@ dependencies = [ "redox_syscall", ] +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + [[package]] name = "resvg" version = "0.19.0" @@ -590,6 +882,15 @@ dependencies = [ "xmlparser", ] +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rustybuzz" version = "0.4.0" @@ -630,6 +931,18 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" + [[package]] name = "serde" version = "1.0.132" @@ -639,6 +952,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.132" @@ -650,6 +973,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2bb9cd061c5865d345bb02ca49fcef1391741b672b54a0bf7b679badec3142" +dependencies = [ + "itoa 1.0.1", + "ryu", + "serde", +] + [[package]] name = "simplecss" version = "0.2.1" @@ -712,6 +1046,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thiserror" version = "1.0.30" @@ -746,6 +1089,16 @@ dependencies = [ "safe_arch", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "ttf-parser" version = "0.12.3" @@ -758,6 +1111,7 @@ version = "0.1.0" dependencies = [ "anyhow", "codespan-reporting", + "criterion", "dirs", "filedescriptor", "fxhash", @@ -892,6 +1246,70 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +[[package]] +name = "wasm-bindgen" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" + +[[package]] +name = "web-sys" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 0bf68d74f..5c4dddcb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,14 +20,21 @@ opt-level = 2 [dependencies] fxhash = "0.2" -image = { version = "0.23", default-features = false, features = ["png", "jpeg"] } +image = { version = "0.23", default-features = false, features = [ + "png", + "jpeg", +] } itertools = "0.10" miniz_oxide = "0.4" once_cell = "1" pdf-writer = "0.4" rustybuzz = "0.4" serde = { version = "1", features = ["derive", "rc"] } -svg2pdf = { version = "0.1", default-features = false, features = ["text", "png", "jpeg"] } +svg2pdf = { version = "0.1", default-features = false, features = [ + "text", + "png", + "jpeg", +] } ttf-parser = "0.12" typst-macros = { path = "./macros" } unicode-bidi = "0.3.5" @@ -54,6 +61,7 @@ walkdir = "2" # Dependencies updates: # - Bump ttf-parser when rustybuzz is updated # - Bump usvg and resvg in conjunction with svg2pdf +criterion = "0.3" [[bin]] name = "typst" @@ -68,3 +76,8 @@ harness = false name = "oneshot" path = "benches/oneshot.rs" harness = false + +[[bench]] +name = "timed" +path = "benches/timed.rs" +harness = false diff --git a/benches/timed.rs b/benches/timed.rs new file mode 100644 index 000000000..83820af2f --- /dev/null +++ b/benches/timed.rs @@ -0,0 +1,98 @@ +use std::path::Path; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use typst::eval::eval; +use typst::layout::layout; +use typst::loading::MemLoader; +use typst::parse::{parse, Scanner, TokenMode, Tokens}; +use typst::source::SourceId; +use typst::Context; + +const SRC: &str = include_str!("bench.typ"); +const FONT: &[u8] = include_bytes!("../fonts/IBMPlexSans-Regular.ttf"); + +fn context() -> (Context, SourceId) { + let loader = MemLoader::new().with(Path::new("font.ttf"), FONT).wrap(); + let mut ctx = Context::new(loader); + let id = ctx.sources.provide(Path::new("src.typ"), SRC.to_string()); + (ctx, id) +} + +fn bench_decode(c: &mut Criterion) { + c.bench_function("decode", |b| { + b.iter(|| { + // We don't use chars().count() because that has a special + // superfast implementation. + let mut count = 0; + let mut chars = black_box(SRC).chars(); + while let Some(_) = chars.next() { + count += 1; + } + count + }) + }); +} + +fn bench_scan(c: &mut Criterion) { + c.bench_function("scan", |b| { + b.iter(|| { + let mut count = 0; + let mut scanner = Scanner::new(black_box(SRC)); + while let Some(_) = scanner.eat() { + count += 1; + } + count + }) + }); +} + +fn bench_tokenize(c: &mut Criterion) { + c.bench_function("tokenize", |b| { + b.iter(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()) + }); +} + +fn bench_parse(c: &mut Criterion) { + c.bench_function("parse", |b| b.iter(|| parse(SRC))); +} + +fn bench_edit(c: &mut Criterion) { + let (mut ctx, id) = context(); + c.bench_function("edit", |b| { + b.iter(|| black_box(ctx.sources.edit(id, 1168 .. 1171, "_Uhr_"))) + }); +} + +fn bench_eval(c: &mut Criterion) { + let (mut ctx, id) = context(); + let ast = ctx.sources.get(id).ast().unwrap(); + c.bench_function("eval", |b| b.iter(|| eval(&mut ctx, id, &ast).unwrap())); +} + +fn bench_to_tree(c: &mut Criterion) { + let (mut ctx, id) = context(); + let module = ctx.evaluate(id).unwrap(); + c.bench_function("to_tree", |b| { + b.iter(|| module.template.to_pages(ctx.style())) + }); +} + +fn bench_layout(c: &mut Criterion) { + let (mut ctx, id) = context(); + let tree = ctx.execute(id).unwrap(); + c.bench_function("layout", |b| b.iter(|| layout(&mut ctx, &tree))); +} + +criterion_group!( + benches, + bench_decode, + bench_scan, + bench_tokenize, + bench_parse, + bench_edit, + bench_eval, + bench_to_tree, + bench_layout +); +criterion_main!(benches); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f2fae5f28..f1f1e8b6c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -32,6 +32,13 @@ pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { p.eject_partial() } +/// Parse an atomic primary. Returns `Some` if all of the input was consumed. +pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec, bool)> { + let mut p = Parser::new(src, TokenMode::Markup); + markup_expr(&mut p); + p.eject_partial() +} + /// Parse some markup. Returns `Some` if all of the input was consumed. pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); @@ -171,17 +178,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::While | NodeKind::For | NodeKind::Import - | NodeKind::Include => { - let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; - - p.start_group(group); - let res = expr_prec(p, true, 0); - if stmt && res.is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); - } + | NodeKind::Include => markup_expr(p), // Block and template. NodeKind::LeftBrace => block(p), @@ -222,6 +219,21 @@ fn enum_node(p: &mut Parser) { }); } +/// Parse an expression within markup mode. +fn markup_expr(p: &mut Parser) { + if let Some(token) = p.peek() { + let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; + + p.start_group(group); + let res = expr_prec(p, true, 0); + if stmt && res.is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); + } +} + /// Parse an expression. fn expr(p: &mut Parser) -> ParseResult { expr_prec(p, false, 0) diff --git a/src/parse/parser.rs b/src/parse/parser.rs index a37cb9c62..06cb15785 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -48,7 +48,7 @@ impl<'s> Parser<'s> { } /// End the parsing process and return multiple children. - pub fn eject(self) -> Option<(Vec, bool)>{ + pub fn eject(self) -> Option<(Vec, bool)> { if self.eof() && self.group_success() { Some((self.children, self.tokens.was_unterminated())) } else { diff --git a/src/source.rs b/src/source.rs index 7eb1d3a7f..aaf009e0b 100644 --- a/src/source.rs +++ b/src/source.rs @@ -12,7 +12,7 @@ use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{self, Category, GreenNode, RedNode, Span}; +use crate::syntax::{self, Category, GreenNode, RedNode, Reparser, Span}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -285,9 +285,8 @@ impl SourceFile { // Update the root node. let span = Span::new(self.id, replace.start, replace.end); - if let Ok(range) = - Rc::make_mut(&mut self.root).incremental(&self.src, span, with.len()) - { + let reparser = Reparser::new(&self.src, span, with.len()); + if let Ok(range) = reparser.incremental(Rc::make_mut(&mut self.root)) { range } else { self.root = parse(&self.src); @@ -502,6 +501,14 @@ mod tests { test("= A heading", 3 .. 3, "n evocative", 2 .. 15); test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); + test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); + test("#call() abc", 7 .. 7, "[]", 0 .. 13); + // test( + // "hi\n- item\n- item 2\n - item 3", + // 10 .. 10, + // " ", + // 9 .. 33, + // ); test( "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, @@ -535,7 +542,7 @@ mod tests { "C ", 14 .. 22, ); - test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); + test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); test( "a #let rect with (fill: eastern)\nb", 16 .. 31, diff --git a/src/syntax/incremental.rs b/src/syntax/incremental.rs new file mode 100644 index 000000000..d7b5ca3c8 --- /dev/null +++ b/src/syntax/incremental.rs @@ -0,0 +1,515 @@ +use std::ops::Range; +use std::rc::Rc; + +use super::{Green, GreenNode, NodeKind, Span}; + +use crate::parse::{ + parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, + parse_markup_elements, parse_template, TokenMode, +}; + +pub struct Reparser<'a> { + src: &'a str, + replace_range: Span, + replace_len: usize, +} + +impl<'a> Reparser<'a> { + pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn incremental(&self, green: &mut GreenNode) -> Result, ()> { + self.incremental_int(green, 0, TokenMode::Markup, true) + } + + fn incremental_int( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + outermost: bool, + ) -> Result, ()> { + let kind = green.kind().clone(); + let mode = kind.mode().contextualize(parent_mode); + + let mut loop_result = None; + let mut child_at_start = true; + let last = green.children.len() - 1; + let mut start = None; + for (i, child) in green.children.iter_mut().enumerate() { + let child_span = + Span::new(self.replace_range.source, offset, offset + child.len()); + if child_span.surrounds(self.replace_range) + && start.is_none() + && ((self.replace_range.start != child_span.end + && self.replace_range.end != child_span.start) + || mode == TokenMode::Code + || i == last) + { + let old_len = child.len(); + // First, we try if the child has another, more specific applicable child. + if !kind.post().unsafe_interior() { + if let Ok(range) = match child { + Green::Node(n) => self.incremental_int( + Rc::make_mut(n), + offset, + kind.mode().child_mode(), + i == last && outermost, + ), + Green::Token(_) => Err(()), + } { + let new_len = child.len(); + green.update_child_len(new_len, old_len); + return Ok(range); + } + } + + // This didn't work, so we try to self.replace_range the child at this + // level. + loop_result = + Some((i .. i + 1, child_span, i == last && outermost, child.kind())); + break; + } else if start.is_none() + && child_span.contains(self.replace_range.start) + && mode == TokenMode::Markup + && child.kind().post().markup_safe() + { + start = Some((i, offset)); + } else if child_span.contains(self.replace_range.end) + && (self.replace_range.end != child_span.end || i == last) + && mode == TokenMode::Markup + && child.kind().post().markup_safe() + { + if let Some((start, start_offset)) = start { + loop_result = Some(( + start .. i + 1, + Span::new( + self.replace_range.source, + start_offset, + offset + child.len(), + ), + i == last && outermost, + child.kind(), + )); + } + break; + } else if start.is_some() + && (mode != TokenMode::Markup || !child.kind().post().markup_safe()) + { + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + + // We now have a child that we can self.replace_range and a function to do so if + // the loop found any results at all. + let (child_idx_range, child_span, child_outermost, func, policy) = + loop_result.ok_or(()).and_then(|(a, b, c, child_kind)| { + let (func, policy) = + child_kind.reparsing_function(kind.mode().child_mode()); + Ok((a, b, c, func?, policy)) + })?; + + let src_span = child_span.inserted(self.replace_range, self.replace_len); + let recompile_range = if policy == Postcondition::AtomicPrimary { + src_span.start .. self.src.len() + } else { + src_span.to_range() + }; + + let (mut new_children, unterminated) = + func(&self.src[recompile_range], child_at_start).ok_or(())?; + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !child_outermost && unterminated { + return Err(()); + } + + let insertion = match check_invariants( + &new_children, + green.children(), + child_idx_range.clone(), + child_at_start, + mode, + src_span, + policy, + ) { + InvariantResult::Ok => Ok(new_children), + InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => Err(()), + }?; + + green.replace_child_range(child_idx_range, insertion); + + Ok(src_span.to_range()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + child_idx_range: Range, + child_at_start: bool, + mode: TokenMode, + src_span: Span, + policy: Postcondition, +) -> InvariantResult { + let (new_children, ok) = if policy == Postcondition::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); + + // Check if the children / child has the right type. + let same_kind = match policy { + Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), + _ => false, + }; + + if same_kind || policy == Postcondition::AtomicPrimary { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if same_kind { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + if new_children.last().map(|x| x.kind().pre()) + == Some(Precondition::RightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + + let mut new_at_start = child_at_start; + for child in new_children { + new_at_start = child.kind().is_at_start(new_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + new_at_start = child.kind().is_at_start(new_at_start); + continue; + } + + match child.kind().pre() { + Precondition::AtStart if !new_at_start => { + return InvariantResult::Error; + } + Precondition::NotAtStart if new_at_start => { + return InvariantResult::Error; + } + _ => {} + } + break; + } + } + + ok +} + +impl NodeKind { + pub fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> ( + Result Option<(Vec, bool)>, ()>, + Postcondition, + ) { + let policy = self.post(); + let mode = self.mode().contextualize(parent_mode); + + match policy { + Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), + Postcondition::AtomicPrimary if mode == TokenMode::Code => { + (Ok(parse_atomic), policy) + } + Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), + Postcondition::SameKind(x) if x == None || x == Some(mode) => { + let parser: fn(&str, bool) -> _ = match self { + NodeKind::Template => parse_template, + NodeKind::Block => parse_block, + NodeKind::LineComment | NodeKind::BlockComment => parse_comment, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + _ => { + let parser: fn(&str, bool) -> _ = match mode { + TokenMode::Markup if self == &Self::Markup => parse_markup, + TokenMode::Markup => parse_markup_elements, + _ => return (Err(()), policy), + }; + + (Ok(parser), policy) + } + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn post(&self) -> Postcondition { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => Postcondition::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => Postcondition::Unsafe, + + // These keywords are literals and can be safely be substituted with + // other expressions. + Self::None | Self::Auto => Postcondition::AtomicPrimary, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => Postcondition::Unsafe, + + Self::Markup => Postcondition::SameKind(None), + + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => Postcondition::Unsafe, + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group => Postcondition::AtomicPrimary, + + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => Postcondition::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => Postcondition::SameKind(None), + Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => Postcondition::AtomicPrimary, + + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + } + } + + /// The appropriate precondition for the type. + pub fn pre(&self) -> Precondition { + match self { + Self::Heading | Self::Enum | Self::List => Precondition::AtStart, + Self::TextInLine(_) => Precondition::NotAtStart, + Self::Linebreak => Precondition::RightWhitespace, + _ => Precondition::None, + } + } +} + +/// This enum describes what conditions a node has for being replaced by a new +/// parse result. +/// +/// Safe nodes are replaced by the new parse result from the respective mode. +/// They can be replaced by multiple tokens. If a token is inserted in Markup +/// mode and the next token would not be `at_start` there needs to be a forward +/// check for a `EnsureAtStart` node. If this fails, the parent has to be +/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is +/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last +/// tokens to be inserted, the edit is invalidated if there is no following +/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The +/// unsafe layers cannot be used but allow children access, the unsafe nodes do +/// neither. +/// +/// *Procedure:* +/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return +/// None. +/// 2. Reparse with appropriate node kind and `at_start`. +/// 3. Check whether the topmost group is terminated and the range was +/// completely consumed, otherwise return None. +/// 4. Check if the type criteria are met. +/// 5. If the node is not at the end of the tree, check if Strings etc. are +/// terminated. +/// 6. If this is markup, check the following things: +/// - The `at_start` conditions of the next non-comment and non-space(0) node +/// are met. +/// - The first node is whitespace or the previous siblings are not +/// `EnsureRightWhitespace`. +/// - If any of those fails, return None. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Postcondition { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option), + /// Changing this node into a single atomic expression is allowed if it + /// appears in code mode, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node changes what the parents or the + /// surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children will trigger undefined + /// behavior. Change the parents instead. + Unsafe, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Precondition { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Otherwise, this node is + /// safe. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes must be followed by whitespace. + RightWhitespace, + /// No additional requirements. + None, +} + +impl Postcondition { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe => true, + _ => false, + } + } + + pub fn markup_safe(&self) -> bool { + match self { + Self::Safe | Self::UnsafeLayer => true, + Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), + _ => false, + } + } +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index cfb443761..4d0ca0261 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,6 +2,7 @@ pub mod ast; mod highlight; +mod incremental; mod pretty; mod span; @@ -10,16 +11,14 @@ use std::ops::Range; use std::rc::Rc; pub use highlight::*; +pub use incremental::*; pub use pretty::*; pub use span::*; use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::{ - parse_atomic, parse_block, parse_comment, parse_markup, parse_markup_elements, - parse_template, TokenMode, -}; +use crate::parse::TokenMode; use crate::source::SourceId; use crate::util::EcoString; @@ -87,29 +86,6 @@ impl Green { Self::Token(data) => data.kind = kind, } } - - /// Find the innermost child that is incremental safe. - fn incremental( - &mut self, - edit: &str, - replace: Span, - replacement_len: usize, - offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - match self { - Green::Node(n) => Rc::make_mut(n).incremental_int( - edit, - replace, - replacement_len, - offset, - parent_mode, - outermost, - ), - Green::Token(_) => Err(()), - } - } } impl Default for Green { @@ -194,8 +170,22 @@ impl GreenNode { self.children[child_idx_range.clone()].iter().map(Green::len).sum(); let new_len: usize = replacement.iter().map(Green::len).sum(); + if self.erroneous { + if self.children[child_idx_range.clone()].iter().any(Green::erroneous) { + // the old range was erroneous but we do not know if anywhere + // else was so we have to iterate over the whole thing. + self.erroneous = self.children[.. child_idx_range.start] + .iter() + .any(Green::erroneous) + || self.children[child_idx_range.end ..].iter().any(Green::erroneous); + } + // in this case nothing changes so we do not have to bother. + } + + // the or assignment operator is not lazy. + self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); + self.children.splice(child_idx_range, replacement); - self.erroneous = self.children.iter().any(|x| x.erroneous()); self.data.set_len(self.data.len + new_len - old_len); } @@ -203,250 +193,6 @@ impl GreenNode { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); } - - /// Find the innermost child that is incremental safe. - pub fn incremental( - &mut self, - src: &str, - replace: Span, - replacement_len: usize, - ) -> Result, ()> { - self.incremental_int(src, replace, replacement_len, 0, TokenMode::Markup, true) - } - - fn incremental_int( - &mut self, - src: &str, - replace: Span, - replacement_len: usize, - mut offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - let kind = self.kind().clone(); - let mode = kind.mode().contextualize(parent_mode); - - let mut loop_result = None; - let mut child_at_start = true; - let last = self.children.len() - 1; - let mut start = None; - for (i, child) in self.children.iter_mut().enumerate() { - let child_span = Span::new(replace.source, offset, offset + child.len()); - if child_span.surrounds(replace) - && start.is_none() - && ((replace.start != child_span.end && replace.end != child_span.start) - || mode == TokenMode::Code - || i == last) - { - let old_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !kind.incremental_safety().unsafe_interior() { - if let Ok(range) = child.incremental( - src, - replace, - replacement_len, - offset, - kind.mode().child_mode(), - i == last && outermost, - ) { - let new_len = child.len(); - self.update_child_len(new_len, old_len); - return Ok(range); - } - } - - // This didn't work, so we try to replace the child at this - // level. - let (function, policy) = - child.kind().reparsing_function(kind.mode().child_mode()); - let function = function?; - loop_result = Some(( - i .. i + 1, - child_span, - i == last && outermost, - function, - policy, - )); - break; - } else if start.is_none() - && child_span.contains(replace.start) - && mode == TokenMode::Markup - && child.kind().incremental_safety().markup_safe() - { - start = Some((i, offset)); - } else if child_span.contains(replace.end) - && (replace.end != child_span.end || i == last) - && mode == TokenMode::Markup - && child.kind().incremental_safety().markup_safe() - { - if let Some((start, start_offset)) = start { - let (function, policy) = - child.kind().reparsing_function(kind.mode().child_mode()); - let function = function?; - loop_result = Some(( - start .. i + 1, - Span::new(replace.source, start_offset, offset + child.len()), - i == last && outermost, - function, - policy, - )); - } - break; - } else if start.is_some() - && (mode != TokenMode::Markup - || !child.kind().incremental_safety().markup_safe()) - { - break; - } - - offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); - } - - - // We now have a child that we can replace and a function to do so if - // the loop found any results at all. - let (child_idx_range, child_span, child_outermost, func, policy) = - loop_result.ok_or(())?; - - let src_span = child_span.inserted(replace, replacement_len); - let recompile_range = if policy == IncrementalSafety::AtomicPrimary { - src_span.start .. src.len() - } else { - src_span.to_range() - }; - - let (mut new_children, unterminated) = - func(&src[recompile_range], child_at_start).ok_or(())?; - - let insertion = match check_invariants( - &new_children, - self.children(), - unterminated, - child_idx_range.clone(), - child_outermost, - child_at_start, - mode, - src_span, - policy, - ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), - }?; - - self.replace_child_range(child_idx_range, insertion); - - Ok(src_span.to_range()) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - unterminated: bool, - child_idx_range: Range, - outermost: bool, - child_at_start: bool, - mode: TokenMode, - src_span: Span, - policy: IncrementalSafety, -) -> InvariantResult { - let (new_children, ok) = if policy == IncrementalSafety::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) - } else { - return InvariantResult::Error; - } - } else { - (use_children, InvariantResult::Ok) - }; - - let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); - - // Check if the children / child has the right type. - let require_single = match policy { - IncrementalSafety::AtomicPrimary | IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode if child_mode == TokenMode::Code => true, - _ => false, - }; - - if require_single { - if new_children.len() != 1 { - return InvariantResult::Error; - } - - if match policy { - IncrementalSafety::SameKind => true, - IncrementalSafety::SameKindInCode => child_mode == TokenMode::Code, - _ => false, - } { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } - } - } - - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !outermost && unterminated { - return InvariantResult::Error; - } - - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().incremental_safety() - == IncrementalSafety::EnsureRightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } - - let mut new_at_start = child_at_start; - for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); - } - - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; - } - - match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => { - return InvariantResult::Error; - } - IncrementalSafety::EnsureNotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} - } - break; - } - - if new_children.last().map(|x| x.kind().incremental_safety()) - == Some(IncrementalSafety::EnsureRightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } - } - - ok } impl From for Green { @@ -1053,190 +799,6 @@ impl NodeKind { } } - pub fn reparsing_function( - &self, - parent_mode: TokenMode, - ) -> ( - Result Option<(Vec, bool)>, ()>, - IncrementalSafety, - ) { - let policy = self.incremental_safety(); - if policy.is_unsafe() { - return (Err(()), policy); - } - - let contextualized = self.mode().contextualize(parent_mode); - let is_code = contextualized == TokenMode::Code; - - if is_code && policy == IncrementalSafety::UnsafeLayer { - return (Err(()), policy); - } - - if is_code && policy == IncrementalSafety::AtomicPrimary { - return (Ok(parse_atomic), policy); - } - - if policy == IncrementalSafety::SameKind - || (policy == IncrementalSafety::SameKindInCode && is_code) - { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - return (Ok(parser), policy); - } - - let parser: fn(&str, bool) -> _ = match contextualized { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - - /// Whether it is safe to do incremental parsing on this node. Never allow - /// non-termination errors if this is not already the last leaf node. - pub fn incremental_safety(&self) -> IncrementalSafety { - match self { - // Replacing parenthesis changes if the expression is balanced and - // is therefore not safe. - Self::LeftBracket - | Self::RightBracket - | Self::LeftBrace - | Self::RightBrace - | Self::LeftParen - | Self::RightParen => IncrementalSafety::Unsafe, - - // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. The star can appear in markup. - Self::Star - | Self::Comma - | Self::Semicolon - | Self::Colon - | Self::Plus - | Self::Minus - | Self::Slash - | Self::Eq - | Self::EqEq - | Self::ExclEq - | Self::Lt - | Self::LtEq - | Self::Gt - | Self::GtEq - | Self::PlusEq - | Self::HyphEq - | Self::StarEq - | Self::SlashEq - | Self::Not - | Self::And - | Self::Or - | Self::With - | Self::Dots - | Self::Arrow => IncrementalSafety::Unsafe, - - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, - - // These keywords change what kind of expression the parent is and - // how far the expression would go. - Self::Let - | Self::If - | Self::Else - | Self::For - | Self::In - | Self::While - | Self::Break - | Self::Continue - | Self::Return - | Self::Set - | Self::Import - | Self::Include - | Self::From => IncrementalSafety::Unsafe, - - // This is a backslash followed by a space. But changing it to - // anything else is fair game. - Self::Linebreak => IncrementalSafety::EnsureRightWhitespace, - - Self::Markup => IncrementalSafety::SameKind, - - Self::Space(_) => IncrementalSafety::SameKindInCode, - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Text(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph => IncrementalSafety::Safe, - - // This is text that needs to be not `at_start`, otherwise it would - // start one of the below items. - Self::TextInLine(_) => IncrementalSafety::EnsureNotAtStart, - - // These have to be `at_start` so they must be preceeded with a - // Space(n) with n > 0 or a Parbreak. - Self::Heading | Self::Enum | Self::List => IncrementalSafety::EnsureAtStart, - - // Changing the heading level, enum numbering, or list bullet - // changes the next layer. - Self::EnumNumbering(_) => IncrementalSafety::Unsafe, - - Self::Raw(_) | Self::Math(_) => IncrementalSafety::Safe, - - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => IncrementalSafety::AtomicPrimary, - - Self::Call | Self::Unary | Self::Binary | Self::SetExpr => { - IncrementalSafety::UnsafeLayer - } - - Self::CallArgs | Self::Named | Self::Spread => IncrementalSafety::UnsafeLayer, - - // The closure is a bit magic with the let expression, and also it - // is not atomic. - Self::Closure | Self::ClosureParams => IncrementalSafety::UnsafeLayer, - - // These can appear as bodies and would trigger an error if they - // became something else. - Self::Template | Self::Block => IncrementalSafety::SameKindInCode, - - Self::ForExpr - | Self::WhileExpr - | Self::IfExpr - | Self::LetExpr - | Self::ImportExpr - | Self::IncludeExpr => IncrementalSafety::AtomicPrimary, - - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - IncrementalSafety::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => IncrementalSafety::SameKind, - - Self::Error(_, _) | Self::Unknown(_) => IncrementalSafety::Unsafe, - } - } - /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { @@ -1351,95 +913,6 @@ impl Display for NodeKind { } } -/// This enum describes what conditions a node has for being replaced by a new -/// parse result. -/// -/// Safe nodes are replaced by the new parse result from the respective mode. -/// They can be replaced by multiple tokens. If a token is inserted in Markup -/// mode and the next token would not be `at_start` there needs to be a forward -/// check for a `EnsureAtStart` node. If this fails, the parent has to be -/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is -/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last -/// tokens to be inserted, the edit is invalidated if there is no following -/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The -/// unsafe layers cannot be used but allow children access, the unsafe nodes do -/// neither. -/// -/// *Procedure:* -/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return -/// None. -/// 2. Reparse with appropriate node kind and `at_start`. -/// 3. Check whether the topmost group is terminated and the range was -/// completely consumed, otherwise return None. -/// 4. Check if the type criteria are met. -/// 5. If the node is not at the end of the tree, check if Strings etc. are -/// terminated. -/// 6. If this is markup, check the following things: -/// - The `at_start` conditions of the next non-comment and non-space(0) node -/// are met. -/// - The first node is whitespace or the previous siblings are not -/// `EnsureRightWhitespace`. -/// - If any of those fails, return None. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum IncrementalSafety { - /// Changing this node can never have an influence on the other nodes. - Safe, - /// This node has to be replaced with a single token of the same kind. - SameKind, - /// This node has to be replaced with a single token of the same kind if in - /// code mode. - SameKindInCode, - /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. - EnsureAtStart, - /// These nodes depend on not being at the start of a line. Reparsing of - /// safe left neighbors has to check this invariant. Otherwise, this node is - /// safe. - EnsureNotAtStart, - /// These nodes must be followed by whitespace. - EnsureRightWhitespace, - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. - AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the - /// parents or children instead. If it appears in Markup, however, it is - /// safe to change. - UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. - Unsafe, -} - -impl IncrementalSafety { - pub fn unsafe_interior(&self) -> bool { - match self { - Self::Unsafe => true, - _ => false, - } - } - - pub fn is_unsafe(&self) -> bool { - match self { - Self::UnsafeLayer | Self::Unsafe => true, - _ => false, - } - } - - pub fn markup_safe(&self) -> bool { - match self { - Self::Safe - | Self::SameKindInCode - | Self::EnsureAtStart - | Self::EnsureNotAtStart - | Self::EnsureRightWhitespace - | Self::UnsafeLayer => true, - _ => false, - } - } -} - /// This enum describes which mode a token of [`NodeKind`] can appear in. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum NodeMode { From edc686d7384470068858e16f2926cf50f31b2c90 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sat, 27 Nov 2021 16:10:22 +0100 Subject: [PATCH 10/16] Make incremental parsing simpler and move it somewhere else --- src/{syntax => parse}/incremental.rs | 350 +++++++++++++++++++-------- src/parse/mod.rs | 2 + src/source.rs | 128 +--------- src/syntax/mod.rs | 77 ++---- src/syntax/span.rs | 12 - 5 files changed, 283 insertions(+), 286 deletions(-) rename src/{syntax => parse}/incremental.rs (58%) diff --git a/src/syntax/incremental.rs b/src/parse/incremental.rs similarity index 58% rename from src/syntax/incremental.rs rename to src/parse/incremental.rs index d7b5ca3c8..9c912aae4 100644 --- a/src/syntax/incremental.rs +++ b/src/parse/incremental.rs @@ -1,20 +1,28 @@ use std::ops::Range; use std::rc::Rc; -use super::{Green, GreenNode, NodeKind, Span}; +use crate::syntax::{Green, GreenNode, NodeKind, Span}; -use crate::parse::{ +use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, parse_markup_elements, parse_template, TokenMode, }; +/// Allows partial refreshs of the [`Green`] node tree. +/// +/// This struct holds a description of a change. Its methods can be used to try +/// and apply the change to a green tree. pub struct Reparser<'a> { + /// The new source code, with the change applied. src: &'a str, + /// Which range in the old source file was changed. replace_range: Span, + /// How many characters replaced the text in `replacement_range`. replace_len: usize, } impl<'a> Reparser<'a> { + /// Create a new reparser. pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { Self { src, replace_range, replace_len } } @@ -22,11 +30,11 @@ impl<'a> Reparser<'a> { impl Reparser<'_> { /// Find the innermost child that is incremental safe. - pub fn incremental(&self, green: &mut GreenNode) -> Result, ()> { - self.incremental_int(green, 0, TokenMode::Markup, true) + pub fn reparse(&self, green: &mut GreenNode) -> Result, ()> { + self.reparse_step(green, 0, TokenMode::Markup, true) } - fn incremental_int( + fn reparse_step( &self, green: &mut GreenNode, mut offset: usize, @@ -34,72 +42,29 @@ impl Reparser<'_> { outermost: bool, ) -> Result, ()> { let kind = green.kind().clone(); - let mode = kind.mode().contextualize(parent_mode); + let mode = kind.mode().unwrap_or(parent_mode); let mut loop_result = None; let mut child_at_start = true; - let last = green.children.len() - 1; + let last = green.children().len() - 1; let mut start = None; - for (i, child) in green.children.iter_mut().enumerate() { + + for (i, child) in green.children_mut().iter_mut().enumerate() { let child_span = Span::new(self.replace_range.source, offset, offset + child.len()); - if child_span.surrounds(self.replace_range) - && start.is_none() - && ((self.replace_range.start != child_span.end - && self.replace_range.end != child_span.start) - || mode == TokenMode::Code + + // We look for the start in the element but we only take a position + // at the right border if this is markup or the last element. + // + // This is because in Markup mode, we want to examine all nodes + // touching a replacement but in code we want to atomically replace. + if child_span.contains(self.replace_range.start) + && (mode == TokenMode::Markup + || self.replace_range.start != child_span.end + || self.replace_range.len() == 0 || i == last) - { - let old_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !kind.post().unsafe_interior() { - if let Ok(range) = match child { - Green::Node(n) => self.incremental_int( - Rc::make_mut(n), - offset, - kind.mode().child_mode(), - i == last && outermost, - ), - Green::Token(_) => Err(()), - } { - let new_len = child.len(); - green.update_child_len(new_len, old_len); - return Ok(range); - } - } - - // This didn't work, so we try to self.replace_range the child at this - // level. - loop_result = - Some((i .. i + 1, child_span, i == last && outermost, child.kind())); - break; - } else if start.is_none() - && child_span.contains(self.replace_range.start) - && mode == TokenMode::Markup - && child.kind().post().markup_safe() { start = Some((i, offset)); - } else if child_span.contains(self.replace_range.end) - && (self.replace_range.end != child_span.end || i == last) - && mode == TokenMode::Markup - && child.kind().post().markup_safe() - { - if let Some((start, start_offset)) = start { - loop_result = Some(( - start .. i + 1, - Span::new( - self.replace_range.source, - start_offset, - offset + child.len(), - ), - i == last && outermost, - child.kind(), - )); - } - break; - } else if start.is_some() - && (mode != TokenMode::Markup || !child.kind().post().markup_safe()) - { break; } @@ -107,17 +72,77 @@ impl Reparser<'_> { child_at_start = child.kind().is_at_start(child_at_start); } + let (start_idx, start_offset) = start.ok_or(())?; - // We now have a child that we can self.replace_range and a function to do so if - // the loop found any results at all. - let (child_idx_range, child_span, child_outermost, func, policy) = - loop_result.ok_or(()).and_then(|(a, b, c, child_kind)| { - let (func, policy) = - child_kind.reparsing_function(kind.mode().child_mode()); - Ok((a, b, c, func?, policy)) - })?; + for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() { + let i = i + start_idx; + let child_span = + Span::new(self.replace_range.source, offset, offset + child.len()); - let src_span = child_span.inserted(self.replace_range, self.replace_len); + // Similarly to above, the end of the edit must be in the node but + // if it is at the edge and we are in markup node, we also want its + // neighbor! + if child_span.contains(self.replace_range.end) + && (mode != TokenMode::Markup + || self.replace_range.end != child_span.end + || i == last) + { + loop_result = Some(( + start_idx .. i + 1, + Span::new( + self.replace_range.source, + start_offset, + offset + child.len(), + ), + i == last && outermost, + child.kind().clone(), + )); + break; + } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { + break; + } + + offset += child.len(); + } + + let (child_idx_range, child_span, child_outermost, child_kind) = + loop_result.ok_or(())?; + + if child_idx_range.len() == 1 { + let idx = child_idx_range.start; + let child = &mut green.children_mut()[idx]; + + let old_len = child.len(); + // First, we try if the child has another, more specific applicable child. + if !child_kind.post().unsafe_interior() { + if let Ok(range) = match child { + Green::Node(n) => self.reparse_step( + Rc::make_mut(n), + start_offset, + kind.mode().unwrap_or(TokenMode::Code), + child_outermost, + ), + Green::Token(_) => Err(()), + } { + let new_len = child.len(); + green.update_child_len(new_len, old_len); + return Ok(range); + } + } + } + + debug_assert_ne!(child_idx_range.len(), 0); + + if mode == TokenMode::Code && child_idx_range.len() > 1 { + return Err(()); + } + + // We now have a child that we can replace and a function to do so. + let (func, policy) = + child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code)); + let func = func?; + + let src_span = inserted_span(child_span, self.replace_range, self.replace_len); let recompile_range = if policy == Postcondition::AtomicPrimary { src_span.start .. self.src.len() } else { @@ -181,7 +206,10 @@ fn check_invariants( (use_children, InvariantResult::Ok) }; - let child_mode = old_children[child_idx_range.start].kind().mode().child_mode(); + let child_mode = old_children[child_idx_range.start] + .kind() + .mode() + .unwrap_or(TokenMode::Code); // Check if the children / child has the right type. let same_kind = match policy { @@ -248,8 +276,22 @@ fn check_invariants( ok } +/// Create a new span by specifying a span in which a modification happened +/// and how many characters are now in that span. +fn inserted_span(mut source: Span, other: Span, n: usize) -> Span { + if !source.surrounds(other) { + panic!(); + } + + let len_change = n as i64 - other.len() as i64; + source.end = (source.end as i64 + len_change) as usize; + source +} + impl NodeKind { - pub fn reparsing_function( + /// Return the correct reparsing function given the postconditions for the + /// type. + fn reparsing_function( &self, parent_mode: TokenMode, ) -> ( @@ -257,7 +299,7 @@ impl NodeKind { Postcondition, ) { let policy = self.post(); - let mode = self.mode().contextualize(parent_mode); + let mode = self.mode().unwrap_or(parent_mode); match policy { Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), @@ -433,35 +475,10 @@ impl NodeKind { } } -/// This enum describes what conditions a node has for being replaced by a new -/// parse result. +/// The conditions that a node has to fulfill in order to be replaced. /// -/// Safe nodes are replaced by the new parse result from the respective mode. -/// They can be replaced by multiple tokens. If a token is inserted in Markup -/// mode and the next token would not be `at_start` there needs to be a forward -/// check for a `EnsureAtStart` node. If this fails, the parent has to be -/// reparsed. if the direct whitespace sibling of a `EnsureRightWhitespace` is -/// `Unsafe`. Similarly, if a `EnsureRightWhitespace` token is one of the last -/// tokens to be inserted, the edit is invalidated if there is no following -/// whitespace. The atomic nodes may only be replaced by other atomic nodes. The -/// unsafe layers cannot be used but allow children access, the unsafe nodes do -/// neither. -/// -/// *Procedure:* -/// 1. Check if the node is safe - if unsafe layer recurse, if unsafe, return -/// None. -/// 2. Reparse with appropriate node kind and `at_start`. -/// 3. Check whether the topmost group is terminated and the range was -/// completely consumed, otherwise return None. -/// 4. Check if the type criteria are met. -/// 5. If the node is not at the end of the tree, check if Strings etc. are -/// terminated. -/// 6. If this is markup, check the following things: -/// - The `at_start` conditions of the next non-comment and non-space(0) node -/// are met. -/// - The first node is whitespace or the previous siblings are not -/// `EnsureRightWhitespace`. -/// - If any of those fails, return None. +/// This can dictate if a node can be replaced at all and if yes, what can take +/// its place. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Postcondition { /// Changing this node can never have an influence on the other nodes. @@ -481,6 +498,11 @@ pub enum Postcondition { Unsafe, } +/// The conditions under which a node can be inserted or remain in a tree. +/// +/// These conditions all search the neighbors of the node and see if its +/// existence is plausible with them present. This can be used to encode some +/// context-free language components for incremental parsing. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Precondition { /// These nodes depend on being at the start of a line. Reparsing of safe @@ -513,3 +535,127 @@ impl Postcondition { } } } + +#[cfg(test)] +mod tests { + use crate::parse::parse; + use crate::source::SourceFile; + + use super::*; + + #[test] + fn test_incremental_parse() { + #[track_caller] + fn test(prev: &str, range: Range, with: &str, incr: Range) { + let mut source = SourceFile::detached(prev); + let range = source.edit(range, with); + assert_eq!(range, incr); + + let incr_tree = source.root(); + assert_eq!(parse(source.src()), incr_tree); + } + + // Test simple replacements. + test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("a d e", 1 .. 3, " b c d", 0 .. 8); + test("a #f() e", 1 .. 6, " b c d", 0 .. 8); + test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); + test("= A heading", 3 .. 3, "n evocative", 2 .. 15); + test("your thing", 5 .. 5, "a", 4 .. 11); + test("a your thing a", 6 .. 7, "a", 2 .. 12); + test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); + test("#call() abc", 7 .. 7, "[]", 0 .. 13); + // test( + // "hi\n- item\n- item 2\n - item 3", + // 10 .. 10, + // " ", + // 9 .. 33, + // ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 16 .. 20, + "none", + 16 .. 20, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 33 .. 42, + "[_gronk_]", + 33 .. 42, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 34 .. 41, + "_bar_", + 34 .. 39, + ); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("hello {x}", 6 .. 9, "#f()", 5 .. 10); + test( + "this is -- in my opinion -- spectacular", + 8 .. 10, + "---", + 7 .. 12, + ); + test( + "understanding `code` is complicated", + 15 .. 15, + "C ", + 14 .. 22, + ); + test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); + test( + "a #let rect with (fill: eastern)\nb", + 16 .. 31, + " (stroke: conifer", + 2 .. 34, + ); + + // Test the whitespace invariants. + test("hello \\ world", 7 .. 8, "a ", 6 .. 14); + test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("x = y", 1 .. 1, " + y", 0 .. 6); + test("x = y", 1 .. 1, " + y\n", 0 .. 10); + test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); + test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); + test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); + + // Test type invariants. + test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); + test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); + test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); + test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17); + test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); + test("a // b c #f()", 3 .. 4, "", 0 .. 12); + test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); + test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); + test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); + test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); + test("a b c", 1 .. 1, "{[}", 0 .. 5); + + // Test unclosed things. + test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); + test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); + test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); + test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); + test("a b c", 1 .. 1, " /* letters */", 0 .. 16); + test("a b c", 1 .. 1, " /* letters", 0 .. 16); + test( + "{if i==1 {a} else [b]; b()}", + 12 .. 12, + " /* letters */", + 1 .. 35, + ); + test( + "{if i==1 {a} else [b]; b()}", + 12 .. 12, + " /* letters", + 0 .. 38, + ); + + test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); + test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); + } +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f1f1e8b6c..2c421374b 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,10 +1,12 @@ //! Parsing and tokenization. +mod incremental; mod parser; mod resolve; mod scanner; mod tokens; +pub use incremental::*; pub use parser::*; pub use resolve::*; pub use scanner::*; diff --git a/src/source.rs b/src/source.rs index aaf009e0b..421412ee8 100644 --- a/src/source.rs +++ b/src/source.rs @@ -10,9 +10,9 @@ use serde::{Deserialize, Serialize}; use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; -use crate::parse::{is_newline, parse, Scanner}; +use crate::parse::{is_newline, parse, Reparser, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{self, Category, GreenNode, RedNode, Reparser, Span}; +use crate::syntax::{self, Category, GreenNode, RedNode, Span}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -286,7 +286,7 @@ impl SourceFile { // Update the root node. let span = Span::new(self.id, replace.start, replace.end); let reparser = Reparser::new(&self.src, span, with.len()); - if let Ok(range) = reparser.incremental(Rc::make_mut(&mut self.root)) { + if let Ok(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { range } else { self.root = parse(&self.src); @@ -302,6 +302,12 @@ impl SourceFile { let red = RedNode::from_root(self.root.clone(), self.id); syntax::highlight(red.as_ref(), range, &mut f) } + + /// Obtain a reference to the source's root green node. + #[cfg(test)] + pub(crate) fn root(&self) -> Rc { + self.root.clone() + } } /// The indices at which lines start (right behind newlines). @@ -480,120 +486,4 @@ mod tests { // Test removing everything. test(TEST, 0 .. 21, "", ""); } - - #[test] - fn test_incremental_parse() { - #[track_caller] - fn test(prev: &str, range: Range, with: &str, incr: Range) { - let mut source = SourceFile::detached(prev); - let range = source.edit(range, with); - assert_eq!(range, incr); - - let incr_tree = source.root.clone(); - assert_eq!(parse(source.src()), incr_tree); - } - - // Test simple replacements. - test("hello world", 6 .. 11, "wankers", 5 .. 13); - test("a d e", 1 .. 3, " b c d", 0 .. 8); - test("a #f() e", 1 .. 6, " b c d", 0 .. 8); - test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); - test("= A heading", 3 .. 3, "n evocative", 2 .. 15); - test("your thing", 5 .. 5, "a", 4 .. 11); - test("a your thing a", 6 .. 7, "a", 2 .. 12); - test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 13); - // test( - // "hi\n- item\n- item 2\n - item 3", - // 10 .. 10, - // " ", - // 9 .. 33, - // ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 16 .. 20, - "none", - 16 .. 20, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 33 .. 42, - "[_gronk_]", - 33 .. 42, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 34 .. 41, - "_bar_", - 34 .. 39, - ); - test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); - test("hello {x}", 6 .. 9, "#f()", 5 .. 10); - test( - "this is -- in my opinion -- spectacular", - 8 .. 10, - "---", - 7 .. 12, - ); - test( - "understanding `code` is complicated", - 15 .. 15, - "C ", - 14 .. 22, - ); - test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); - test( - "a #let rect with (fill: eastern)\nb", - 16 .. 31, - " (stroke: conifer", - 2 .. 34, - ); - - // Test the whitespace invariants. - test("hello \\ world", 7 .. 8, "a ", 6 .. 14); - test("hello \\ world", 7 .. 8, " a", 6 .. 14); - test("x = y", 1 .. 1, " + y", 0 .. 6); - test("x = y", 1 .. 1, " + y\n", 0 .. 10); - test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); - test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); - test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); - test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); - - // Test type invariants. - test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); - test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); - test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); - test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17); - test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); - test("a // b c #f()", 3 .. 4, "", 0 .. 12); - test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); - test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); - test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); - test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); - test("a b c", 1 .. 1, "{[}", 0 .. 5); - - // Test unclosed things. - test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); - test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); - test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); - test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); - test("a b c", 1 .. 1, " /* letters */", 0 .. 16); - test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters */", - 1 .. 35, - ); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters", - 0 .. 38, - ); - - test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); - test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); - } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 4d0ca0261..9ab530d81 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -2,7 +2,6 @@ pub mod ast; mod highlight; -mod incremental; mod pretty; mod span; @@ -11,7 +10,6 @@ use std::ops::Range; use std::rc::Rc; pub use highlight::*; -pub use incremental::*; pub use pretty::*; pub use span::*; @@ -161,6 +159,9 @@ impl GreenNode { self.data().len() } + /// Replaces a range of children with some replacement. + /// + /// This method updates the `erroneous` and `data.len` fields. pub fn replace_child_range( &mut self, child_idx_range: Range, @@ -189,6 +190,8 @@ impl GreenNode { self.data.set_len(self.data.len + new_len - old_len); } + /// Update the length of this node given the old and new length of a + /// replaced child. pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); @@ -377,22 +380,6 @@ impl<'a> RedRef<'a> { self.green.erroneous() } - /// The node's children. - pub fn children(self) -> Children<'a> { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - Children { - id: self.id, - iter: children.iter(), - front: self.offset, - back: self.offset + self.len(), - } - } - - /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -425,6 +412,21 @@ impl<'a> RedRef<'a> { T::from_red(self) } + /// The node's children. + pub fn children(self) -> Children<'a> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + Children { + id: self.id, + iter: children.iter(), + front: self.offset, + back: self.offset + self.len(), + } + } + /// Get the first child that can cast to some AST type. pub fn cast_first_child(self) -> Option { self.children().find_map(RedRef::cast) @@ -760,7 +762,7 @@ impl NodeKind { } /// Whether this token appears in Markup. - pub fn mode(&self) -> NodeMode { + pub fn mode(&self) -> Option { match self { Self::Markup | Self::Space(_) @@ -779,7 +781,7 @@ impl NodeKind { | Self::EnumNumbering(_) | Self::List | Self::Raw(_) - | Self::Math(_) => NodeMode::Markup, + | Self::Math(_) => Some(TokenMode::Markup), Self::Template | Self::Block | Self::Ident(_) @@ -794,8 +796,8 @@ impl NodeKind { | Self::BlockComment | Self::Error(_, _) | Self::Minus - | Self::Eq => NodeMode::Universal, - _ => NodeMode::Code, + | Self::Eq => None, + _ => Some(TokenMode::Code), } } @@ -912,34 +914,3 @@ impl Display for NodeKind { f.pad(self.as_str()) } } - -/// This enum describes which mode a token of [`NodeKind`] can appear in. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum NodeMode { - /// The token can only appear in markup mode. - Markup, - /// The token can only appear in code mode. - Code, - /// The token can appear in either mode. Look at the parent node to decide - /// which mode it is in. After an apply, this is equivalent to Markup. - Universal, -} - -impl NodeMode { - /// Returns a new mode considering the parent node. - pub fn contextualize(&self, old: TokenMode) -> TokenMode { - match self { - Self::Markup => TokenMode::Markup, - Self::Code => TokenMode::Code, - Self::Universal => old, - } - } - - /// The mode of the children of this node. - pub fn child_mode(&self) -> TokenMode { - match self { - Self::Markup => TokenMode::Markup, - Self::Code | Self::Universal => TokenMode::Code, - } - } -} diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 2691acc7c..4d5b88195 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -125,18 +125,6 @@ impl Span { *self = self.join(other) } - /// Create a new span by specifying a span in which a modification happened - /// and how many characters are now in that span. - pub fn inserted(mut self, other: Self, n: usize) -> Self { - if !self.surrounds(other) { - panic!(); - } - - let len_change = n as isize - other.len() as isize; - self.end += len_change as usize; - self - } - /// Test whether a position is within the span. pub fn contains(&self, pos: usize) -> bool { self.start <= pos && self.end >= pos From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 28 Nov 2021 18:18:45 +0100 Subject: [PATCH 11/16] Code Review: Parser, I can't let you do this --- Cargo.lock | 420 +---------------- Cargo.toml | 6 - benches/timed.rs | 98 ---- src/parse/incremental.rs | 975 ++++++++++++++++++--------------------- src/parse/mod.rs | 10 +- src/parse/parser.rs | 14 +- src/parse/tokens.rs | 28 +- src/source.rs | 16 +- src/syntax/mod.rs | 50 +- 9 files changed, 491 insertions(+), 1126 deletions(-) delete mode 100644 benches/timed.rs diff --git a/Cargo.lock b/Cargo.lock index 98cb4d81b..df3bf74a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,17 +38,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.0.1" @@ -67,24 +56,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "bumpalo" -version = "3.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" - [[package]] name = "bytemuck" version = "1.7.3" @@ -97,32 +68,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" -[[package]] -name = "cast" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" -dependencies = [ - "rustc_version", -] - [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "bitflags", - "textwrap", - "unicode-width", -] - [[package]] name = "codespan-reporting" version = "0.11.1" @@ -148,108 +99,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "criterion" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" -dependencies = [ - "atty", - "cast", - "clap", - "criterion-plot", - "csv", - "itertools", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_cbor", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "lazy_static", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" -dependencies = [ - "cfg-if", - "lazy_static", -] - -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa 0.4.8", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - [[package]] name = "data-url" version = "0.1.1" @@ -427,21 +276,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "iai" version = "0.1.1" @@ -481,27 +315,12 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" -[[package]] -name = "itoa" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" - [[package]] name = "jpeg-decoder" version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "229d53d58899083193af11e15917b5640cd40b29ff475a1fe4ef725deb02d0f2" -[[package]] -name = "js-sys" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "kurbo" version = "0.8.3" @@ -511,12 +330,6 @@ dependencies = [ "arrayvec 0.7.2", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.112" @@ -538,12 +351,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" -[[package]] -name = "memchr" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" - [[package]] name = "memmap2" version = "0.5.0" @@ -553,15 +360,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" -dependencies = [ - "autocfg", -] - [[package]] name = "miniz_oxide" version = "0.3.7" @@ -622,28 +420,12 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "once_cell" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" -[[package]] -name = "oorandom" -version = "11.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" - [[package]] name = "pdf-writer" version = "0.4.1" @@ -651,7 +433,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36d760a6f2ac90811cba1006a298e8a7e5ce2c922bb5dc7f7000911a4a6b60f4" dependencies = [ "bitflags", - "itoa 0.4.8", + "itoa", "ryu", ] @@ -661,34 +443,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" -[[package]] -name = "plotters" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" - -[[package]] -name = "plotters-svg" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" -dependencies = [ - "plotters-backend", -] - [[package]] name = "png" version = "0.16.8" @@ -778,31 +532,6 @@ dependencies = [ "rand_core", ] -[[package]] -name = "rayon" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - [[package]] name = "rctree" version = "0.4.0" @@ -828,27 +557,6 @@ dependencies = [ "redox_syscall", ] -[[package]] -name = "regex" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" -dependencies = [ - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - [[package]] name = "resvg" version = "0.19.0" @@ -882,15 +590,6 @@ dependencies = [ "xmlparser", ] -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustybuzz" version = "0.4.0" @@ -931,18 +630,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" - [[package]] name = "serde" version = "1.0.132" @@ -952,16 +639,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half", - "serde", -] - [[package]] name = "serde_derive" version = "1.0.132" @@ -973,17 +650,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2bb9cd061c5865d345bb02ca49fcef1391741b672b54a0bf7b679badec3142" -dependencies = [ - "itoa 1.0.1", - "ryu", - "serde", -] - [[package]] name = "simplecss" version = "0.2.1" @@ -1046,15 +712,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "thiserror" version = "1.0.30" @@ -1089,16 +746,6 @@ dependencies = [ "safe_arch", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "ttf-parser" version = "0.12.3" @@ -1111,7 +758,6 @@ version = "0.1.0" dependencies = [ "anyhow", "codespan-reporting", - "criterion", "dirs", "filedescriptor", "fxhash", @@ -1246,70 +892,6 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" -[[package]] -name = "wasm-bindgen" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" - -[[package]] -name = "web-sys" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 5c4dddcb7..8251a7fa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,6 @@ walkdir = "2" # Dependencies updates: # - Bump ttf-parser when rustybuzz is updated # - Bump usvg and resvg in conjunction with svg2pdf -criterion = "0.3" [[bin]] name = "typst" @@ -76,8 +75,3 @@ harness = false name = "oneshot" path = "benches/oneshot.rs" harness = false - -[[bench]] -name = "timed" -path = "benches/timed.rs" -harness = false diff --git a/benches/timed.rs b/benches/timed.rs deleted file mode 100644 index 83820af2f..000000000 --- a/benches/timed.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::path::Path; - -use criterion::{black_box, criterion_group, criterion_main, Criterion}; - -use typst::eval::eval; -use typst::layout::layout; -use typst::loading::MemLoader; -use typst::parse::{parse, Scanner, TokenMode, Tokens}; -use typst::source::SourceId; -use typst::Context; - -const SRC: &str = include_str!("bench.typ"); -const FONT: &[u8] = include_bytes!("../fonts/IBMPlexSans-Regular.ttf"); - -fn context() -> (Context, SourceId) { - let loader = MemLoader::new().with(Path::new("font.ttf"), FONT).wrap(); - let mut ctx = Context::new(loader); - let id = ctx.sources.provide(Path::new("src.typ"), SRC.to_string()); - (ctx, id) -} - -fn bench_decode(c: &mut Criterion) { - c.bench_function("decode", |b| { - b.iter(|| { - // We don't use chars().count() because that has a special - // superfast implementation. - let mut count = 0; - let mut chars = black_box(SRC).chars(); - while let Some(_) = chars.next() { - count += 1; - } - count - }) - }); -} - -fn bench_scan(c: &mut Criterion) { - c.bench_function("scan", |b| { - b.iter(|| { - let mut count = 0; - let mut scanner = Scanner::new(black_box(SRC)); - while let Some(_) = scanner.eat() { - count += 1; - } - count - }) - }); -} - -fn bench_tokenize(c: &mut Criterion) { - c.bench_function("tokenize", |b| { - b.iter(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()) - }); -} - -fn bench_parse(c: &mut Criterion) { - c.bench_function("parse", |b| b.iter(|| parse(SRC))); -} - -fn bench_edit(c: &mut Criterion) { - let (mut ctx, id) = context(); - c.bench_function("edit", |b| { - b.iter(|| black_box(ctx.sources.edit(id, 1168 .. 1171, "_Uhr_"))) - }); -} - -fn bench_eval(c: &mut Criterion) { - let (mut ctx, id) = context(); - let ast = ctx.sources.get(id).ast().unwrap(); - c.bench_function("eval", |b| b.iter(|| eval(&mut ctx, id, &ast).unwrap())); -} - -fn bench_to_tree(c: &mut Criterion) { - let (mut ctx, id) = context(); - let module = ctx.evaluate(id).unwrap(); - c.bench_function("to_tree", |b| { - b.iter(|| module.template.to_pages(ctx.style())) - }); -} - -fn bench_layout(c: &mut Criterion) { - let (mut ctx, id) = context(); - let tree = ctx.execute(id).unwrap(); - c.bench_function("layout", |b| b.iter(|| layout(&mut ctx, &tree))); -} - -criterion_group!( - benches, - bench_decode, - bench_scan, - bench_tokenize, - bench_parse, - bench_edit, - bench_eval, - bench_to_tree, - bench_layout -); -criterion_main!(benches); diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 9c912aae4..8e52c1437 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -1,480 +1,13 @@ use std::ops::Range; use std::rc::Rc; -use crate::syntax::{Green, GreenNode, NodeKind, Span}; +use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, parse_markup_elements, parse_template, TokenMode, }; -/// Allows partial refreshs of the [`Green`] node tree. -/// -/// This struct holds a description of a change. Its methods can be used to try -/// and apply the change to a green tree. -pub struct Reparser<'a> { - /// The new source code, with the change applied. - src: &'a str, - /// Which range in the old source file was changed. - replace_range: Span, - /// How many characters replaced the text in `replacement_range`. - replace_len: usize, -} - -impl<'a> Reparser<'a> { - /// Create a new reparser. - pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { - Self { src, replace_range, replace_len } - } -} - -impl Reparser<'_> { - /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Result, ()> { - self.reparse_step(green, 0, TokenMode::Markup, true) - } - - fn reparse_step( - &self, - green: &mut GreenNode, - mut offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - let kind = green.kind().clone(); - let mode = kind.mode().unwrap_or(parent_mode); - - let mut loop_result = None; - let mut child_at_start = true; - let last = green.children().len() - 1; - let mut start = None; - - for (i, child) in green.children_mut().iter_mut().enumerate() { - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); - - // We look for the start in the element but we only take a position - // at the right border if this is markup or the last element. - // - // This is because in Markup mode, we want to examine all nodes - // touching a replacement but in code we want to atomically replace. - if child_span.contains(self.replace_range.start) - && (mode == TokenMode::Markup - || self.replace_range.start != child_span.end - || self.replace_range.len() == 0 - || i == last) - { - start = Some((i, offset)); - break; - } - - offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); - } - - let (start_idx, start_offset) = start.ok_or(())?; - - for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() { - let i = i + start_idx; - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); - - // Similarly to above, the end of the edit must be in the node but - // if it is at the edge and we are in markup node, we also want its - // neighbor! - if child_span.contains(self.replace_range.end) - && (mode != TokenMode::Markup - || self.replace_range.end != child_span.end - || i == last) - { - loop_result = Some(( - start_idx .. i + 1, - Span::new( - self.replace_range.source, - start_offset, - offset + child.len(), - ), - i == last && outermost, - child.kind().clone(), - )); - break; - } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { - break; - } - - offset += child.len(); - } - - let (child_idx_range, child_span, child_outermost, child_kind) = - loop_result.ok_or(())?; - - if child_idx_range.len() == 1 { - let idx = child_idx_range.start; - let child = &mut green.children_mut()[idx]; - - let old_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !child_kind.post().unsafe_interior() { - if let Ok(range) = match child { - Green::Node(n) => self.reparse_step( - Rc::make_mut(n), - start_offset, - kind.mode().unwrap_or(TokenMode::Code), - child_outermost, - ), - Green::Token(_) => Err(()), - } { - let new_len = child.len(); - green.update_child_len(new_len, old_len); - return Ok(range); - } - } - } - - debug_assert_ne!(child_idx_range.len(), 0); - - if mode == TokenMode::Code && child_idx_range.len() > 1 { - return Err(()); - } - - // We now have a child that we can replace and a function to do so. - let (func, policy) = - child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code)); - let func = func?; - - let src_span = inserted_span(child_span, self.replace_range, self.replace_len); - let recompile_range = if policy == Postcondition::AtomicPrimary { - src_span.start .. self.src.len() - } else { - src_span.to_range() - }; - - let (mut new_children, unterminated) = - func(&self.src[recompile_range], child_at_start).ok_or(())?; - - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !child_outermost && unterminated { - return Err(()); - } - - let insertion = match check_invariants( - &new_children, - green.children(), - child_idx_range.clone(), - child_at_start, - mode, - src_span, - policy, - ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), - }?; - - green.replace_child_range(child_idx_range, insertion); - - Ok(src_span.to_range()) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - child_idx_range: Range, - child_at_start: bool, - mode: TokenMode, - src_span: Span, - policy: Postcondition, -) -> InvariantResult { - let (new_children, ok) = if policy == Postcondition::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) - } else { - return InvariantResult::Error; - } - } else { - (use_children, InvariantResult::Ok) - }; - - let child_mode = old_children[child_idx_range.start] - .kind() - .mode() - .unwrap_or(TokenMode::Code); - - // Check if the children / child has the right type. - let same_kind = match policy { - Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), - _ => false, - }; - - if same_kind || policy == Postcondition::AtomicPrimary { - if new_children.len() != 1 { - return InvariantResult::Error; - } - - if same_kind { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } - } - } - - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } - - if new_children.last().map(|x| x.kind().pre()) - == Some(Precondition::RightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } - - let mut new_at_start = child_at_start; - for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); - } - - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; - } - - match child.kind().pre() { - Precondition::AtStart if !new_at_start => { - return InvariantResult::Error; - } - Precondition::NotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} - } - break; - } - } - - ok -} - -/// Create a new span by specifying a span in which a modification happened -/// and how many characters are now in that span. -fn inserted_span(mut source: Span, other: Span, n: usize) -> Span { - if !source.surrounds(other) { - panic!(); - } - - let len_change = n as i64 - other.len() as i64; - source.end = (source.end as i64 + len_change) as usize; - source -} - -impl NodeKind { - /// Return the correct reparsing function given the postconditions for the - /// type. - fn reparsing_function( - &self, - parent_mode: TokenMode, - ) -> ( - Result Option<(Vec, bool)>, ()>, - Postcondition, - ) { - let policy = self.post(); - let mode = self.mode().unwrap_or(parent_mode); - - match policy { - Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), - Postcondition::AtomicPrimary if mode == TokenMode::Code => { - (Ok(parse_atomic), policy) - } - Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), - Postcondition::SameKind(x) if x == None || x == Some(mode) => { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - _ => { - let parser: fn(&str, bool) -> _ = match mode { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - } - } - - /// Whether it is safe to do incremental parsing on this node. Never allow - /// non-termination errors if this is not already the last leaf node. - pub fn post(&self) -> Postcondition { - match self { - // Replacing parenthesis changes if the expression is balanced and - // is therefore not safe. - Self::LeftBracket - | Self::RightBracket - | Self::LeftBrace - | Self::RightBrace - | Self::LeftParen - | Self::RightParen => Postcondition::Unsafe, - - // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. The star can appear in markup. - Self::Star - | Self::Comma - | Self::Semicolon - | Self::Colon - | Self::Plus - | Self::Minus - | Self::Slash - | Self::Eq - | Self::EqEq - | Self::ExclEq - | Self::Lt - | Self::LtEq - | Self::Gt - | Self::GtEq - | Self::PlusEq - | Self::HyphEq - | Self::StarEq - | Self::SlashEq - | Self::Not - | Self::And - | Self::Or - | Self::With - | Self::Dots - | Self::Arrow => Postcondition::Unsafe, - - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => Postcondition::AtomicPrimary, - - // These keywords change what kind of expression the parent is and - // how far the expression would go. - Self::Let - | Self::Set - | Self::If - | Self::Else - | Self::For - | Self::In - | Self::While - | Self::Break - | Self::Continue - | Self::Return - | Self::Import - | Self::Include - | Self::From => Postcondition::Unsafe, - - Self::Markup => Postcondition::SameKind(None), - - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Linebreak - | Self::Text(_) - | Self::TextInLine(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph - | Self::Heading - | Self::Enum - | Self::List - | Self::Raw(_) - | Self::Math(_) => Postcondition::Safe, - - // Changing the heading level, enum numbering, or list bullet - // changes the next layer. - Self::EnumNumbering(_) => Postcondition::Unsafe, - - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => Postcondition::AtomicPrimary, - - Self::Call - | Self::Unary - | Self::Binary - | Self::CallArgs - | Self::Named - | Self::Spread => Postcondition::UnsafeLayer, - - // The closure is a bit magic with the let expression, and also it - // is not atomic. - Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, - - // These can appear as bodies and would trigger an error if they - // became something else. - Self::Template => Postcondition::SameKind(None), - Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), - - Self::ForExpr - | Self::WhileExpr - | Self::IfExpr - | Self::LetExpr - | Self::SetExpr - | Self::ImportExpr - | Self::IncludeExpr => Postcondition::AtomicPrimary, - - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), - - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, - } - } - - /// The appropriate precondition for the type. - pub fn pre(&self) -> Precondition { - match self { - Self::Heading | Self::Enum | Self::List => Precondition::AtStart, - Self::TextInLine(_) => Precondition::NotAtStart, - Self::Linebreak => Precondition::RightWhitespace, - _ => Precondition::None, - } - } -} - /// The conditions that a node has to fulfill in order to be replaced. /// /// This can dictate if a node can be replaced at all and if yes, what can take @@ -519,6 +52,438 @@ pub enum Precondition { None, } +/// Allows partial refreshs of the [`Green`] node tree. +/// +/// This struct holds a description of a change. Its methods can be used to try +/// and apply the change to a green tree. +pub struct Reparser<'a> { + /// The new source code, with the change applied. + src: &'a str, + /// Which range in the old source file was changed. + replace_range: Range, + /// How many characters replaced the text in `replace_range`. + replace_len: usize, +} + +impl<'a> Reparser<'a> { + /// Create a new reparser. + pub fn new(src: &'a str, replace_range: Range, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn reparse(&self, green: &mut GreenNode) -> Option> { + self.reparse_step(green, 0, TokenMode::Markup, true) + } + + fn reparse_step( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + mut outermost: bool, + ) -> Option> { + let kind = green.kind().clone(); + let mode = kind.mode().unwrap_or(parent_mode); + + let mut child_at_start = true; + let last = green.children().len().saturating_sub(1); + let mut start = None; + + for (i, child) in green.children_mut().iter_mut().enumerate() { + let child_span = offset .. offset + child.len(); + + // We look for the start in the element but we only take a position + // at the right border if this is markup or the last element. + // + // This is because in Markup mode, we want to examine all nodes + // touching a replacement but in code we want to atomically replace. + if child_span.contains(&self.replace_range.start) + || (mode == TokenMode::Markup + && self.replace_range.start == child_span.end) + { + start = Some((i, offset)); + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + let (start_idx, start_offset) = start?; + let mut end = None; + + for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { + let child_span = offset .. offset + child.len(); + + // Similarly to above, the end of the edit must be in the node but + // if it is at the edge and we are in markup node, we also want its + // neighbor! + if child_span.contains(&self.replace_range.end) + || self.replace_range.end == child_span.end + && (mode != TokenMode::Markup || i == last) + { + outermost &= i == last; + end = Some(i); + break; + } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { + break; + } + + offset += child.len(); + } + + let end = end?; + let child_idx_range = start_idx .. end + 1; + let child_span = start_offset .. offset + green.children()[end].len(); + let child_kind = green.children()[end].kind().clone(); + + if child_idx_range.len() == 1 { + let idx = child_idx_range.start; + let child = &mut green.children_mut()[idx]; + let prev_len = child.len(); + + // First, we try if the child has another, more specific applicable child. + if !child_kind.post().unsafe_interior() { + if let Some(range) = match child { + Green::Node(n) => self.reparse_step( + Rc::make_mut(n), + start_offset, + kind.mode().unwrap_or(TokenMode::Code), + outermost, + ), + Green::Token(_) => None, + } { + let new_len = child.len(); + green.update_child_len(new_len, prev_len); + return Some(range); + } + } + } + + debug_assert_ne!(child_idx_range.len(), 0); + + if mode == TokenMode::Code && child_idx_range.len() > 1 { + return None; + } + + // We now have a child that we can replace and a function to do so. + let func = + child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; + let policy = child_kind.post(); + + let len_change = self.replace_len as isize - self.replace_range.len() as isize; + let mut src_span = child_span; + src_span.end = (src_span.end as isize + len_change) as usize; + + let recompile_range = if policy == Postcondition::AtomicPrimary { + src_span.start .. self.src.len() + } else { + src_span.clone() + }; + + let (mut new_children, terminated) = + func(&self.src[recompile_range], child_at_start)?; + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !outermost && !terminated { + return None; + } + + let insertion = match check_invariants( + &new_children, + green.children(), + child_idx_range.clone(), + child_at_start, + mode, + src_span.clone(), + policy, + ) { + InvariantResult::Ok => Some(new_children), + InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => None, + }?; + + green.replace_child_range(child_idx_range, insertion); + + Some(src_span) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + child_idx_range: Range, + child_at_start: bool, + mode: TokenMode, + src_span: Range, + policy: Postcondition, +) -> InvariantResult { + let (new_children, ok) = if policy == Postcondition::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode); + + // Check if the children / child has the right type. + let same_kind = match policy { + Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), + _ => false, + }; + + if same_kind || policy == Postcondition::AtomicPrimary { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if same_kind { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + if new_children.last().map(|x| x.kind().pre()) + == Some(Precondition::RightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + + let mut post_at_start = child_at_start; + for child in new_children { + post_at_start = child.kind().is_at_start(post_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + post_at_start = child.kind().is_at_start(post_at_start); + continue; + } + + let pre = child.kind().pre(); + if pre == Precondition::AtStart && !post_at_start + || pre == Precondition::NotAtStart && post_at_start + { + return InvariantResult::Error; + } + break; + } + } + + ok +} + +impl NodeKind { + /// Return the correct reparsing function given the postconditions for the + /// type. + fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> Option Option<(Vec, bool)>> { + let policy = self.post(); + let mode = self.mode().unwrap_or(parent_mode); + + match policy { + Postcondition::Unsafe | Postcondition::UnsafeLayer => None, + Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), + Postcondition::AtomicPrimary => Some(parse_atomic_markup), + Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Template => Some(parse_template), + NodeKind::Block => Some(parse_block), + NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), + _ => None, + }, + _ => match mode { + TokenMode::Markup if self == &Self::Markup => Some(parse_markup), + TokenMode::Markup => Some(parse_markup_elements), + _ => return None, + }, + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn post(&self) -> Postcondition { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => Postcondition::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => Postcondition::Unsafe, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => Postcondition::Unsafe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => Postcondition::Unsafe, + + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + + // These are complex expressions which may screw with their + // environments. + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => Postcondition::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + + // Missing these creates errors for the parents. + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // Only markup is expected at the points where it does occur. + Self::Markup => Postcondition::SameKind(None), + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => Postcondition::SameKind(None), + Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + + // Whitespace in code mode has to remain whitespace or else the type + // of things would change. + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::None + | Self::Auto => Postcondition::AtomicPrimary, + + // More complex, but still an expression. + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => Postcondition::AtomicPrimary, + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, + } + } + + /// The appropriate precondition for the type. + pub fn pre(&self) -> Precondition { + match self { + Self::Heading | Self::Enum | Self::List => Precondition::AtStart, + Self::TextInLine(_) => Precondition::NotAtStart, + Self::Linebreak => Precondition::RightWhitespace, + _ => Precondition::None, + } + } +} + impl Postcondition { pub fn unsafe_interior(&self) -> bool { match self { @@ -544,6 +509,7 @@ mod tests { use super::*; #[test] + #[rustfmt::skip] fn test_incremental_parse() { #[track_caller] fn test(prev: &str, range: Range, with: &str, incr: Range) { @@ -551,12 +517,14 @@ mod tests { let range = source.edit(range, with); assert_eq!(range, incr); - let incr_tree = source.root(); + let incr_tree = source.root().clone(); assert_eq!(parse(source.src()), incr_tree); } // Test simple replacements. - test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("hello world", 6 .. 11, "walkers", 5 .. 13); + test("some content", 0..12, "", 0..0); + test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); @@ -564,53 +532,18 @@ mod tests { test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 13); - // test( - // "hi\n- item\n- item 2\n - item 3", - // 10 .. 10, - // " ", - // 9 .. 33, - // ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 16 .. 20, - "none", - 16 .. 20, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 33 .. 42, - "[_gronk_]", - 33 .. 42, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 34 .. 41, - "_bar_", - 34 .. 39, - ); + test("#call() abc", 7 .. 7, "[]", 0 .. 10); + // test("hi\n- item\n- item 2\n - item 3", 10 .. 10, " ", 9 .. 33); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); - test( - "this is -- in my opinion -- spectacular", - 8 .. 10, - "---", - 7 .. 12, - ); - test( - "understanding `code` is complicated", - 15 .. 15, - "C ", - 14 .. 22, - ); + test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); + test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); - test( - "a #let rect with (fill: eastern)\nb", - 16 .. 31, - " (stroke: conifer", - 2 .. 34, - ); + test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); // Test the whitespace invariants. test("hello \\ world", 7 .. 8, "a ", 6 .. 14); @@ -642,18 +575,8 @@ mod tests { test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); test("a b c", 1 .. 1, " /* letters */", 0 .. 16); test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters */", - 1 .. 35, - ); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters", - 0 .. 38, - ); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2c421374b..2c5afb6b3 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -64,7 +64,7 @@ pub fn parse_markup_elements( /// Parse a template literal. Returns `Some` if all of the input was consumed. pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); - if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { + if !p.at(&NodeKind::LeftBracket) { return None; } @@ -75,7 +75,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { /// Parse a code block. Returns `Some` if all of the input was consumed. pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); - if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { + if !p.at(&NodeKind::LeftBrace) { return None; } @@ -252,14 +252,14 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() { - Some(op) => { + match p.peek().and_then(UnOp::from_token) { + Some(op) if !atomic => { p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } - None => primary(p, atomic)?, + _ => primary(p, atomic)?, }; loop { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 06cb15785..ade9b5df5 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -22,7 +22,7 @@ pub struct Parser<'s> { /// The children of the currently built node. children: Vec, /// Whether the last group was terminated. - last_group_terminated: bool, + last_terminated: bool, } impl<'s> Parser<'s> { @@ -38,7 +38,7 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_group_terminated: true, + last_terminated: true, } } @@ -50,7 +50,7 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children. pub fn eject(self) -> Option<(Vec, bool)> { if self.eof() && self.group_success() { - Some((self.children, self.tokens.was_unterminated())) + Some((self.children, self.tokens.was_terminated())) } else { None } @@ -99,7 +99,7 @@ impl<'s> Parser<'s> { /// remains stuff in the string. pub fn eject_partial(self) -> Option<(Vec, bool)> { self.group_success() - .then(|| (self.children, self.tokens.was_unterminated())) + .then(|| (self.children, self.tokens.was_terminated())) } /// Whether the end of the source string or group is reached. @@ -244,7 +244,7 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - self.last_group_terminated = true; + self.last_terminated = true; let mut rescan = self.tokens.mode() != group_mode; @@ -263,7 +263,7 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); - self.last_group_terminated = false; + self.last_terminated = false; } } @@ -283,7 +283,7 @@ impl<'s> Parser<'s> { /// Check if the group processing was successfully terminated. pub fn group_success(&self) -> bool { - self.last_group_terminated && self.groups.is_empty() + self.last_terminated && self.groups.is_empty() } /// Low-level bump that consumes exactly one token without special trivia diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7be31fe18..836e8cf17 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -13,7 +13,7 @@ use crate::util::EcoString; pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, - has_unterminated: bool, + was_terminated: bool, } /// What kind of tokens to emit. @@ -32,7 +32,7 @@ impl<'s> Tokens<'s> { Self { s: Scanner::new(src), mode, - has_unterminated: false, + was_terminated: true, } } @@ -69,10 +69,10 @@ impl<'s> Tokens<'s> { self.s } - /// Whether the last token was unterminated. + /// Whether the last token was terminated. #[inline] - pub fn was_unterminated(&self) -> bool { - self.has_unterminated + pub fn was_terminated(&self) -> bool { + self.was_terminated } } @@ -259,7 +259,7 @@ impl<'s> Tokens<'s> { ) } } else { - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error( ErrorPos::End, "expected closing brace".into(), @@ -358,7 +358,7 @@ impl<'s> Tokens<'s> { let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error( ErrorPos::End, if found == 0 { @@ -406,7 +406,7 @@ impl<'s> Tokens<'s> { display, })) } else { - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error( ErrorPos::End, if !display || (!escaped && dollar) { @@ -495,7 +495,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -503,7 +503,7 @@ impl<'s> Tokens<'s> { fn line_comment(&mut self) -> NodeKind { self.s.eat_until(is_newline); if self.s.peek().is_none() { - self.has_unterminated = true; + self.was_terminated = false; } NodeKind::LineComment } @@ -511,7 +511,7 @@ impl<'s> Tokens<'s> { fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; - let mut terminated = false; + self.was_terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -519,7 +519,7 @@ impl<'s> Tokens<'s> { ('*', '/') => { depth -= 1; if depth == 0 { - terminated = true; + self.was_terminated = true; break; } '_' @@ -532,10 +532,6 @@ impl<'s> Tokens<'s> { } } - if !terminated { - self.has_unterminated = true; - } - NodeKind::BlockComment } diff --git a/src/source.rs b/src/source.rs index 421412ee8..6cca9f751 100644 --- a/src/source.rs +++ b/src/source.rs @@ -12,7 +12,7 @@ use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Reparser, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{self, Category, GreenNode, RedNode, Span}; +use crate::syntax::{self, Category, GreenNode, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -265,7 +265,8 @@ impl SourceFile { /// Edit the source file by replacing the given range. /// - /// This panics if the `replace` range is out of bounds. + /// Returns the range of the section in the new source that was ultimately + /// reparsed. The method panics if the `replace` range is out of bounds. pub fn edit(&mut self, replace: Range, with: &str) -> Range { let start = replace.start; self.src.replace_range(replace.clone(), with); @@ -284,9 +285,8 @@ impl SourceFile { .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); // Update the root node. - let span = Span::new(self.id, replace.start, replace.end); - let reparser = Reparser::new(&self.src, span, with.len()); - if let Ok(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { + let reparser = Reparser::new(&self.src, replace, with.len()); + if let Some(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { range } else { self.root = parse(&self.src); @@ -302,12 +302,6 @@ impl SourceFile { let red = RedNode::from_root(self.root.clone(), self.id); syntax::highlight(red.as_ref(), range, &mut f) } - - /// Obtain a reference to the source's root green node. - #[cfg(test)] - pub(crate) fn root(&self) -> Rc { - self.root.clone() - } } /// The indices at which lines start (right behind newlines). diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 9ab530d81..b72e58431 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -48,15 +48,6 @@ impl Green { self.data().len() } - /// Set the length of the node. - pub fn set_len(&mut self, len: usize) { - let data = match self { - Self::Node(node) => &mut Rc::make_mut(node).data, - Self::Token(data) => data, - }; - data.set_len(len); - } - /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { @@ -139,11 +130,6 @@ impl GreenNode { &self.children } - /// The node's children, mutably. - pub fn children_mut(&mut self) -> &mut [Green] { - &mut self.children - } - /// The node's metadata. pub fn data(&self) -> &GreenData { &self.data @@ -159,10 +145,15 @@ impl GreenNode { self.data().len() } + /// The node's children, mutably. + pub(crate) fn children_mut(&mut self) -> &mut [Green] { + &mut self.children + } + /// Replaces a range of children with some replacement. /// /// This method updates the `erroneous` and `data.len` fields. - pub fn replace_child_range( + pub(crate) fn replace_child_range( &mut self, child_idx_range: Range, replacement: Vec, @@ -187,12 +178,12 @@ impl GreenNode { self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); self.children.splice(child_idx_range, replacement); - self.data.set_len(self.data.len + new_len - old_len); + self.data.len = self.data.len + new_len - old_len; } /// Update the length of this node given the old and new length of a /// replaced child. - pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { + pub(crate) fn update_child_len(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); } @@ -246,11 +237,6 @@ impl GreenData { pub fn len(&self) -> usize { self.len } - - /// Set the length of the node. - pub fn set_len(&mut self, len: usize) { - self.len = len; - } } impl From for Green { @@ -261,7 +247,7 @@ impl From for Green { impl Debug for GreenData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len) + write!(f, "{:?}: {}", &self.kind, self.len) } } @@ -375,11 +361,6 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } - /// Whether the node or its children contain an error. - pub fn erroneous(self) -> bool { - self.green.erroneous() - } - /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -731,19 +712,12 @@ impl NodeKind { /// Whether this is whitespace. pub fn is_whitespace(&self) -> bool { - match self { - Self::Space(_) | Self::Parbreak => true, - _ => false, - } + matches!(self, Self::Space(_) | Self::Parbreak) } /// Whether this is trivia. pub fn is_trivia(&self) -> bool { - match self { - _ if self.is_whitespace() => true, - Self::LineComment | Self::BlockComment => true, - _ => false, - } + self.is_whitespace() || matches!(self, Self::LineComment | Self::BlockComment) } /// Whether this is some kind of error. @@ -765,7 +739,6 @@ impl NodeKind { pub fn mode(&self) -> Option { match self { Self::Markup - | Self::Space(_) | Self::Linebreak | Self::Parbreak | Self::Text(_) @@ -783,6 +756,7 @@ impl NodeKind { | Self::Raw(_) | Self::Math(_) => Some(TokenMode::Markup), Self::Template + | Self::Space(_) | Self::Block | Self::Ident(_) | Self::LetExpr From 12f7335ac365759a8c6bc942ef6830c23a4176fc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 28 Nov 2021 22:32:20 +0100 Subject: [PATCH 12/16] Clarity and bugfix Fixes a bug where validation would wrongly reject an atomic primary reparse due to trailing whitespace. Co-Authored-By: Martin --- src/parse/incremental.rs | 289 ++++++++++++++++++--------------------- 1 file changed, 136 insertions(+), 153 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 8e52c1437..cc100a4c3 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -85,13 +85,14 @@ impl Reparser<'_> { parent_mode: TokenMode, mut outermost: bool, ) -> Option> { - let kind = green.kind().clone(); - let mode = kind.mode().unwrap_or(parent_mode); + let mode = green.kind().mode().unwrap_or(parent_mode); + let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); + let child_count = green.children().len(); - let mut child_at_start = true; - let last = green.children().len().saturating_sub(1); - let mut start = None; + let mut first = None; + let mut at_start = true; + // Find the the first child in the range of children to reparse. for (i, child) in green.children_mut().iter_mut().enumerate() { let child_span = offset .. offset + child.len(); @@ -104,18 +105,19 @@ impl Reparser<'_> { || (mode == TokenMode::Markup && self.replace_range.start == child_span.end) { - start = Some((i, offset)); + first = Some((i, offset)); break; } offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); + at_start = child.kind().is_at_start(at_start); } - let (start_idx, start_offset) = start?; - let mut end = None; + let (first_idx, first_start) = first?; + let mut last = None; - for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { + // Find the the last child in the range of children to reparse. + for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) { let child_span = offset .. offset + child.len(); // Similarly to above, the end of the edit must be in the node but @@ -123,35 +125,35 @@ impl Reparser<'_> { // neighbor! if child_span.contains(&self.replace_range.end) || self.replace_range.end == child_span.end - && (mode != TokenMode::Markup || i == last) + && (mode != TokenMode::Markup || i + 1 == child_count) { - outermost &= i == last; - end = Some(i); + outermost &= i + 1 == child_count; + last = Some((i, offset + child.len())); break; - } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { + } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() { break; } offset += child.len(); } - let end = end?; - let child_idx_range = start_idx .. end + 1; - let child_span = start_offset .. offset + green.children()[end].len(); - let child_kind = green.children()[end].kind().clone(); + let (last_idx, last_end) = last?; + let children_range = first_idx .. last_idx + 1; + let children_span = first_start .. last_end; + let last_kind = green.children()[last_idx].kind().clone(); - if child_idx_range.len() == 1 { - let idx = child_idx_range.start; - let child = &mut green.children_mut()[idx]; + // First, we try if the child itself has another, more specific + // applicable child. + if children_range.len() == 1 { + let child = &mut green.children_mut()[children_range.start]; let prev_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !child_kind.post().unsafe_interior() { + if last_kind.post() != Postcondition::Unsafe { if let Some(range) = match child { - Green::Node(n) => self.reparse_step( - Rc::make_mut(n), - start_offset, - kind.mode().unwrap_or(TokenMode::Code), + Green::Node(node) => self.reparse_step( + Rc::make_mut(node), + first_start, + child_mode, outermost, ), Green::Token(_) => None, @@ -163,159 +165,147 @@ impl Reparser<'_> { } } - debug_assert_ne!(child_idx_range.len(), 0); - - if mode == TokenMode::Code && child_idx_range.len() > 1 { + // We only replace multiple children in markup mode. + if children_range.len() > 1 && mode == TokenMode::Code { return None; } // We now have a child that we can replace and a function to do so. - let func = - child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; - let policy = child_kind.post(); + let func = last_kind.reparsing_func(child_mode)?; + let post = last_kind.post(); - let len_change = self.replace_len as isize - self.replace_range.len() as isize; - let mut src_span = child_span; - src_span.end = (src_span.end as isize + len_change) as usize; + // The span of the to-be-reparsed children in the new source. + let replace_span = children_span.start + .. children_span.end + self.replace_len - self.replace_range.len(); - let recompile_range = if policy == Postcondition::AtomicPrimary { - src_span.start .. self.src.len() + // For atomic primaries we need to pass in the whole remaining string to + // check whether the parser would eat more stuff illicitly. + let reparse_span = if post == Postcondition::AtomicPrimary { + replace_span.start .. self.src.len() } else { - src_span.clone() + replace_span.clone() }; - let (mut new_children, terminated) = - func(&self.src[recompile_range], child_at_start)?; + // Do the reparsing! + let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?; - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. + // Make sure that atomic primaries ate only what they were supposed to. + if post == Postcondition::AtomicPrimary { + let len = replace_span.len(); + if newborns.len() > 1 && newborns[0].len() == len { + newborns.truncate(1); + } else if newborns.iter().map(Green::len).sum::() != len { + return None; + } + } + + // Do not accept unclosed nodes if the old node wasn't at the right edge + // of the tree. if !outermost && !terminated { return None; } - let insertion = match check_invariants( - &new_children, + // If all post- and preconditions match, we are good to go! + if validate( green.children(), - child_idx_range.clone(), - child_at_start, + children_range.clone(), + at_start, + &newborns, mode, - src_span.clone(), - policy, + post, ) { - InvariantResult::Ok => Some(new_children), - InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => None, - }?; - - green.replace_child_range(child_idx_range, insertion); - - Some(src_span) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - child_idx_range: Range, - child_at_start: bool, - mode: TokenMode, - src_span: Range, - policy: Postcondition, -) -> InvariantResult { - let (new_children, ok) = if policy == Postcondition::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) + green.replace_child_range(children_range, newborns); + Some(replace_span) } else { - return InvariantResult::Error; + None } - } else { - (use_children, InvariantResult::Ok) - }; + } +} - let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode); +/// Validate that a node replacement is allowed by post- and preconditions. +fn validate( + prev_children: &[Green], + children_range: Range, + mut at_start: bool, + newborns: &[Green], + mode: TokenMode, + post: Postcondition, +) -> bool { + // Atomic primaries must only generate one new child. + if post == Postcondition::AtomicPrimary && newborns.len() != 1 { + return false; + } - // Check if the children / child has the right type. - let same_kind = match policy { - Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), - _ => false, - }; - - if same_kind || policy == Postcondition::AtomicPrimary { - if new_children.len() != 1 { - return InvariantResult::Error; - } - - if same_kind { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } + // Same kind in mode `inside` must generate only one child and that child + // must be of the same kind as previously. + if let Postcondition::SameKind(inside) = post { + let prev_kind = prev_children[children_range.start].kind(); + let prev_mode = prev_kind.mode().unwrap_or(mode); + if inside.map_or(true, |m| m == prev_mode) + && (newborns.len() != 1 || prev_kind != newborns[0].kind()) + { + return false; } } - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } + // Neighbor invariants are only relevant in markup mode. + if mode == TokenMode::Code { + return true; + } - if new_children.last().map(|x| x.kind().pre()) - == Some(Precondition::RightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } + // Ensure that a possible right-whitespace precondition of a node before the + // replacement range is satisfied. + if children_range.start > 0 + && prev_children[children_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !newborns[0].kind().is_whitespace() + { + return false; + } - let mut post_at_start = child_at_start; - for child in new_children { - post_at_start = child.kind().is_at_start(post_at_start); - } + // Ensure that a possible right-whitespace precondition of a new node at the + // end of the replacement range is satisfied. + if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace) + && children_range.end < prev_children.len() + && !prev_children[children_range.end].kind().is_whitespace() + { + return false; + } - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - post_at_start = child.kind().is_at_start(post_at_start); - continue; - } + // Compute the at_start state behind the new children. + for child in newborns { + at_start = child.kind().is_at_start(at_start); + } + // Ensure that a possible at-start or not-at-start precondition of + // a node after the replacement range is satisfied. + for child in &prev_children[children_range.end ..] { + if !child.kind().is_trivia() { let pre = child.kind().pre(); - if pre == Precondition::AtStart && !post_at_start - || pre == Precondition::NotAtStart && post_at_start + if (pre == Precondition::AtStart && !at_start) + || (pre == Precondition::NotAtStart && at_start) { - return InvariantResult::Error; + return false; } + break; } + + at_start = child.kind().is_at_start(at_start); } - ok + true } impl NodeKind { /// Return the correct reparsing function given the postconditions for the /// type. - fn reparsing_function( + fn reparsing_func( &self, parent_mode: TokenMode, ) -> Option Option<(Vec, bool)>> { - let policy = self.post(); let mode = self.mode().unwrap_or(parent_mode); - - match policy { + match self.post() { Postcondition::Unsafe | Postcondition::UnsafeLayer => None, Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), Postcondition::AtomicPrimary => Some(parse_atomic_markup), @@ -393,6 +383,7 @@ impl NodeKind { // changes the next layer. Self::EnumNumbering(_) => Postcondition::Unsafe, + // This can be anything, so we don't make any promises. Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, // These are complex expressions which may screw with their @@ -485,17 +476,11 @@ impl NodeKind { } impl Postcondition { - pub fn unsafe_interior(&self) -> bool { - match self { - Self::Unsafe => true, - _ => false, - } - } - - pub fn markup_safe(&self) -> bool { + /// Whether a node with this condition can be reparsed in markup mode. + pub fn safe_in_markup(&self) -> bool { match self { Self::Safe | Self::UnsafeLayer => true, - Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), + Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup), _ => false, } } @@ -503,22 +488,19 @@ impl Postcondition { #[cfg(test)] mod tests { + use super::*; use crate::parse::parse; use crate::source::SourceFile; - use super::*; - #[test] #[rustfmt::skip] fn test_incremental_parse() { #[track_caller] - fn test(prev: &str, range: Range, with: &str, incr: Range) { + fn test(prev: &str, range: Range, with: &str, goal: Range) { let mut source = SourceFile::detached(prev); let range = source.edit(range, with); - assert_eq!(range, incr); - - let incr_tree = source.root().clone(); - assert_eq!(parse(source.src()), incr_tree); + assert_eq!(range, goal); + assert_eq!(parse(source.src()), *source.root()); } // Test simple replacements. @@ -542,7 +524,7 @@ mod tests { test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); - test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); + test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); // Test the whitespace invariants. @@ -578,6 +560,7 @@ mod tests { test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); + // Test raw tokens. test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); } From 289122e83c085668e56e52225c2dcfd9417d6262 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 29 Nov 2021 12:06:41 +0100 Subject: [PATCH 13/16] Deal with offside rule and remove RightWhitespace --- src/parse/incremental.rs | 92 ++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index cc100a4c3..0e2d196c4 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -5,7 +5,7 @@ use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, - parse_markup_elements, parse_template, TokenMode, + parse_markup_elements, parse_template, Scanner, TokenMode, }; /// The conditions that a node has to fulfill in order to be replaced. @@ -40,14 +40,13 @@ pub enum Postcondition { pub enum Precondition { /// These nodes depend on being at the start of a line. Reparsing of safe /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. + /// safe. Additionally, the indentation of the first right non-trivia, + /// non-whitespace sibling must not be greater than the current indentation. AtStart, /// These nodes depend on not being at the start of a line. Reparsing of /// safe left neighbors has to check this invariant. Otherwise, this node is /// safe. NotAtStart, - /// These nodes must be followed by whitespace. - RightWhitespace, /// No additional requirements. None, } @@ -213,6 +212,8 @@ impl Reparser<'_> { &newborns, mode, post, + replace_span.clone(), + self.src, ) { green.replace_child_range(children_range, newborns); Some(replace_span) @@ -230,6 +231,8 @@ fn validate( newborns: &[Green], mode: TokenMode, post: Postcondition, + replace_span: Range, + src: &str, ) -> bool { // Atomic primaries must only generate one new child. if post == Postcondition::AtomicPrimary && newborns.len() != 1 { @@ -253,23 +256,37 @@ fn validate( return true; } - // Ensure that a possible right-whitespace precondition of a node before the - // replacement range is satisfied. - if children_range.start > 0 - && prev_children[children_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !newborns[0].kind().is_whitespace() - { - return false; - } + // Check if there are any `AtStart` predecessors which require a certain + // indentation. + let s = Scanner::new(src); + let mut prev_pos = replace_span.start; + for child in (&prev_children[.. children_range.start]).iter().rev() { + prev_pos -= child.len(); + if !child.kind().is_trivia() { + if child.kind().pre() == Precondition::AtStart { + let left_col = s.column(prev_pos); - // Ensure that a possible right-whitespace precondition of a new node at the - // end of the replacement range is satisfied. - if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace) - && children_range.end < prev_children.len() - && !prev_children[children_range.end].kind().is_whitespace() - { - return false; + // Search for the first non-trivia newborn. + let mut new_pos = replace_span.start; + let mut child_col = None; + for child in newborns { + if !child.kind().is_trivia() { + child_col = Some(s.column(new_pos)); + break; + } + + new_pos += child.len(); + } + + if let Some(child_col) = child_col { + if child_col > left_col { + return false; + } + } + } + + break; + } } // Compute the at_start state behind the new children. @@ -294,6 +311,37 @@ fn validate( at_start = child.kind().is_at_start(at_start); } + // We have to check whether the last non-trivia newborn is `AtStart` and + // verify the indent of its right neighbors in order to make sure its + // indentation requirements are fulfilled. + let mut child_pos = replace_span.end; + let mut child_col = None; + for child in newborns.iter().rev() { + child_pos -= child.len(); + + if !child.kind().is_trivia() { + if child.kind().pre() == Precondition::AtStart { + child_col = Some(s.column(child_pos)); + } + break; + } + } + + if let Some(child_col) = child_col { + let mut right_pos = replace_span.end; + for child in &prev_children[children_range.end ..] { + if !child.kind().is_trivia() { + if s.column(right_pos) > child_col { + return false; + } + + break; + } + + right_pos += child.len(); + } + } + true } @@ -469,7 +517,6 @@ impl NodeKind { match self { Self::Heading | Self::Enum | Self::List => Precondition::AtStart, Self::TextInLine(_) => Precondition::NotAtStart, - Self::Linebreak => Precondition::RightWhitespace, _ => Precondition::None, } } @@ -515,7 +562,8 @@ mod tests { test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); test("#call() abc", 7 .. 7, "[]", 0 .. 10); - // test("hi\n- item\n- item 2\n - item 3", 10 .. 10, " ", 9 .. 33); + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); + test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 00:46:19 +0100 Subject: [PATCH 14/16] Added a test framework for incremental parsing Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse --- src/parse/incremental.rs | 69 ++++++++++++++---- src/parse/mod.rs | 55 +++++++++----- src/parse/parser.rs | 54 ++++++++++++-- src/parse/tokens.rs | 16 ++-- src/syntax/ast.rs | 2 +- src/syntax/highlight.rs | 2 +- src/syntax/mod.rs | 29 +++++++- tests/typ/code/block.typ | 2 +- tests/typ/code/let.typ | 2 +- tests/typeset.rs | 153 +++++++++++++++++++++++++++++++++++++-- 10 files changed, 322 insertions(+), 62 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 0e2d196c4..1ee37a511 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -47,6 +47,10 @@ pub enum Precondition { /// safe left neighbors has to check this invariant. Otherwise, this node is /// safe. NotAtStart, + /// These nodes could end up somewhere else up the tree if the parse was + /// happening from scratch. The parse result has to be checked for such + /// nodes. They are safe to add if followed up by other nodes. + NotAtEnd, /// No additional requirements. None, } @@ -88,6 +92,12 @@ impl Reparser<'_> { let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let child_count = green.children().len(); + // Save the current indent if this is a markup node. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => 0, + }; + let mut first = None; let mut at_start = true; @@ -170,12 +180,29 @@ impl Reparser<'_> { } // We now have a child that we can replace and a function to do so. - let func = last_kind.reparsing_func(child_mode)?; + let func = last_kind.reparsing_func(child_mode, indent)?; let post = last_kind.post(); + let mut column = if mode == TokenMode::Markup { + // In this case, we want to pass the indentation to the function. + Scanner::new(self.src).column(children_span.start) + } else { + 0 + }; + + // If this is a markup node, we want to save its indent instead to pass + // the right indent argument. + if children_range.len() == 1 { + let child = &mut green.children_mut()[children_range.start]; + if let NodeKind::Markup(n) = child.kind() { + column = *n; + } + } + // The span of the to-be-reparsed children in the new source. let replace_span = children_span.start - .. children_span.end + self.replace_len - self.replace_range.len(); + .. + children_span.end + self.replace_len - self.replace_range.len(); // For atomic primaries we need to pass in the whole remaining string to // check whether the parser would eat more stuff illicitly. @@ -186,7 +213,7 @@ impl Reparser<'_> { }; // Do the reparsing! - let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?; + let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?; // Make sure that atomic primaries ate only what they were supposed to. if post == Postcondition::AtomicPrimary { @@ -311,6 +338,14 @@ fn validate( at_start = child.kind().is_at_start(at_start); } + // Verify that the last of the newborns is not `NotAtEnd`. + if newborns + .last() + .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd) + { + return false; + } + // We have to check whether the last non-trivia newborn is `AtStart` and // verify the indent of its right neighbors in order to make sure its // indentation requirements are fulfilled. @@ -351,21 +386,22 @@ impl NodeKind { fn reparsing_func( &self, parent_mode: TokenMode, - ) -> Option Option<(Vec, bool)>> { + indent: usize, + ) -> Option Option<(Vec, bool)>> { let mode = self.mode().unwrap_or(parent_mode); match self.post() { Postcondition::Unsafe | Postcondition::UnsafeLayer => None, Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), Postcondition::AtomicPrimary => Some(parse_atomic_markup), Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), NodeKind::Block => Some(parse_block), NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), _ => None, }, _ => match mode { - TokenMode::Markup if self == &Self::Markup => Some(parse_markup), - TokenMode::Markup => Some(parse_markup_elements), + TokenMode::Markup if indent == 0 => Some(parse_markup_elements), _ => return None, }, } @@ -452,8 +488,9 @@ impl NodeKind { Postcondition::UnsafeLayer } - // Only markup is expected at the points where it does occur. - Self::Markup => Postcondition::SameKind(None), + // Only markup is expected at the points where it does occur. The + // indentation must be preserved as well, also for the children. + Self::Markup(_) => Postcondition::SameKind(None), // These can appear everywhere and must not change to other stuff // because that could change the outer expression. @@ -493,6 +530,10 @@ impl NodeKind { | Self::ImportExpr | Self::IncludeExpr => Postcondition::AtomicPrimary, + // This element always has to remain in the same column so better + // reparse the whole parent. + Self::Raw(_) => Postcondition::Unsafe, + // These are all replaceable by other tokens. Self::Parbreak | Self::Linebreak @@ -507,7 +548,6 @@ impl NodeKind { | Self::Heading | Self::Enum | Self::List - | Self::Raw(_) | Self::Math(_) => Postcondition::Safe, } } @@ -517,6 +557,7 @@ impl NodeKind { match self { Self::Heading | Self::Enum | Self::List => Precondition::AtStart, Self::TextInLine(_) => Precondition::NotAtStart, + Self::Error(_, _) => Precondition::NotAtEnd, _ => Precondition::None, } } @@ -557,12 +598,12 @@ mod tests { test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); - test("= A heading", 3 .. 3, "n evocative", 2 .. 15); + test("= A heading", 3 .. 3, "n evocative", 2 .. 22); test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); test("#call() abc", 7 .. 7, "[]", 0 .. 10); - test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 3 .. 34); test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); @@ -571,7 +612,7 @@ mod tests { test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); - test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); + test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); @@ -596,7 +637,7 @@ mod tests { test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); - test("a b c", 1 .. 1, "{[}", 0 .. 5); + test("a b c", 1 .. 1, "{[}", 0 .. 8); // Test unclosed things. test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); @@ -610,6 +651,6 @@ mod tests { // Test raw tokens. test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); - test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); + test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2c5afb6b3..f48267300 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,6 +16,7 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; +use crate::util::EcoString; /// Parse a source file. pub fn parse(src: &str) -> Rc { @@ -28,23 +29,27 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject_partial() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); markup_expr(&mut p); p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); - markup(&mut p); + if column == 0 { + markup(&mut p); + } else { + markup_indented(&mut p, column); + } p.eject() } @@ -53,8 +58,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { pub fn parse_markup_elements( src: &str, mut at_start: bool, + column: usize, ) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); + p.offset(column); while !p.eof() { markup_node(&mut p, &mut at_start); } @@ -62,7 +69,7 @@ pub fn parse_markup_elements( } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; @@ -73,7 +80,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; @@ -84,7 +91,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); comment(&mut p).ok()?; p.eject() @@ -92,7 +99,7 @@ pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { /// Parse markup. fn markup(p: &mut Parser) { - markup_while(p, true, &mut |_| true) + markup_while(p, true, 0, &mut |_| true) } /// Parse markup that stays right of the given column. @@ -103,8 +110,8 @@ fn markup_indented(p: &mut Parser, column: usize) { _ => false, }); - markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, + markup_while(p, false, column, &mut |p| match p.peek() { + Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column, _ => true, }) } @@ -113,11 +120,11 @@ fn markup_indented(p: &mut Parser, column: usize) { /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. -fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) +fn markup_while(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - p.perform(NodeKind::Markup, |p| { + p.perform(NodeKind::Markup(column), |p| { while !p.eof() && f(p) { markup_node(p, &mut at_start); } @@ -205,20 +212,32 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { - p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::Minus); + let marker = p.marker(); + let src: EcoString = p.peek_src().into(); + p.eat_assert(&NodeKind::Minus); + + if p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::List); + } else { + marker.convert(p, NodeKind::TextInLine(src)); + } } /// Parse a single enum item. fn enum_node(p: &mut Parser) { - p.perform(NodeKind::Enum, |p| { - p.eat(); + let marker = p.marker(); + let src: EcoString = p.peek_src().into(); + p.eat(); + + if p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Enum); + } else { + marker.convert(p, NodeKind::TextInLine(src)); + } } /// Parse an expression within markup mode. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index ade9b5df5..b31f69d3b 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -21,8 +21,12 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, - /// Whether the last group was terminated. - last_terminated: bool, + /// Is `Some` if there is an unterminated group at the last position where + /// groups were terminated. + last_unterminated: Option, + /// Offset the indentation. This can be used if the parser is processing a + /// subslice of the source and there was leading indent. + column_offset: usize, } impl<'s> Parser<'s> { @@ -38,7 +42,8 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_terminated: true, + last_unterminated: None, + column_offset: 0, } } @@ -102,6 +107,11 @@ impl<'s> Parser<'s> { .then(|| (self.children, self.tokens.was_terminated())) } + /// Set an indentation offset. + pub fn offset(&mut self, columns: usize) { + self.column_offset = columns; + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof @@ -206,6 +216,12 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { + self.tokens.scanner().column(index) + self.column_offset + } + + /// Determine the column index for the given byte index while ignoring the + /// offset. + pub fn clean_column(&self, index: usize) -> usize { self.tokens.scanner().column(index) } @@ -244,7 +260,11 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - self.last_terminated = true; + if let Some(n) = self.last_unterminated { + if n != self.prev_end() { + self.last_unterminated = None; + } + } let mut rescan = self.tokens.mode() != group_mode; @@ -262,8 +282,14 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { + // FIXME The error has to be inserted before any space rolls + // around because the rescan will set the cursor back in front + // of the space and reconsume it. Supressing the rescan is not + // an option since additional rescans (e.g. for statements) can + // be triggered directly afterwards, without processing any + // other token. self.push_error(format_eco!("expected {}", end)); - self.last_terminated = false; + self.last_unterminated = Some(self.prev_end()); } } @@ -283,13 +309,21 @@ impl<'s> Parser<'s> { /// Check if the group processing was successfully terminated. pub fn group_success(&self) -> bool { - self.last_terminated && self.groups.is_empty() + self.last_unterminated.is_none() && self.groups.is_empty() } /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { let kind = self.current.take().unwrap(); + if match kind { + NodeKind::Space(n) if n > 0 => true, + NodeKind::Parbreak => true, + _ => false, + } { + self.column_offset = 0; + } + let len = self.tokens.index() - self.current_start; self.children.push(GreenData::new(kind, len).into()); self.current_start = self.tokens.index(); @@ -346,6 +380,13 @@ impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { let error = NodeKind::Error(ErrorPos::Full, msg.into()); + for i in (0 .. self.children.len()).rev() { + if Self::is_trivia_ext(self.children[i].kind(), false) { + self.children.remove(i); + } else { + break; + } + } self.children.push(GreenData::new(error, 0).into()); } @@ -445,6 +486,7 @@ impl Marker { } /// A logical group of tokens, e.g. `[...]`. +#[derive(Debug)] struct GroupEntry { /// The kind of group this is. This decides which tokens will end the group. /// For example, a [`Group::Paren`] will be ended by diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 836e8cf17..3a0ad1ade 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -293,10 +293,8 @@ impl<'s> Tokens<'s> { } else { NodeKind::EnDash } - } else if self.s.check_or(true, char::is_whitespace) { - NodeKind::Minus } else { - NodeKind::Text('-'.into()) + NodeKind::Minus } } @@ -312,11 +310,7 @@ impl<'s> Tokens<'s> { None }; - if self.s.check_or(true, char::is_whitespace) { - NodeKind::EnumNumbering(number) - } else { - NodeKind::Text(self.s.eaten_from(start).into()) - } + NodeKind::EnumNumbering(number) } fn raw(&mut self) -> NodeKind { @@ -742,12 +736,12 @@ mod tests { fn test_tokenize_text() { // Test basic text. t!(Markup[" /"]: "hello" => Text("hello")); - t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world")); + t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world")); // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():\"b")); t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+")); - t!(Markup[" /"]: "=-a" => Text("="), Text("-"), Text("a")); + t!(Markup[" /"]: "=-a" => Text("="), Minus, Text("a")); t!(Markup[" "]: "#123" => Text("#"), Text("123")); // Test text ends. @@ -804,7 +798,7 @@ mod tests { t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); - t!(Markup[" "]: "1.a" => Text("1."), Text("a")); + t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a")); t!(Markup[" /"]: "a1." => Text("a1.")); } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ed74dfe51..bea4ef000 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -53,7 +53,7 @@ macro_rules! node { node! { /// The syntactical root capable of representing a full parsed document. - Markup + Markup: NodeKind::Markup(_) } impl Markup { diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 21af060ff..9f7365a81 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -154,7 +154,7 @@ impl Category { NodeKind::Str(_) => Some(Category::String), NodeKind::Error(_, _) => Some(Category::Invalid), NodeKind::Unknown(_) => Some(Category::Invalid), - NodeKind::Markup => None, + NodeKind::Markup(_) => None, NodeKind::Space(_) => None, NodeKind::Parbreak => None, NodeKind::Text(_) => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index b72e58431..388d0bb0c 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -64,6 +64,14 @@ impl Green { } } + /// Whether the node is a leaf node in the green tree. + pub fn is_leaf(&self) -> bool { + match self { + Green::Node(n) => n.children().is_empty(), + Green::Token(_) => true, + } + } + /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { @@ -361,6 +369,11 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// Whether the node is a leaf node. + pub fn is_leaf(self) -> bool { + self.green.is_leaf() + } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -385,6 +398,14 @@ impl<'a> RedRef<'a> { } } + /// Perform a depth-first search starting at this node. + pub fn all_children(&self) -> Vec { + let mut res = vec![self.clone()]; + res.extend(self.children().flat_map(|child| child.all_children().into_iter())); + + res + } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where @@ -562,8 +583,8 @@ pub enum NodeKind { Include, /// The `from` keyword. From, - /// Template markup. - Markup, + /// Template markup of which all lines must start in some column. + Markup(usize), /// One or more whitespace characters. Space(usize), /// A forced line break: `\`. @@ -738,7 +759,7 @@ impl NodeKind { /// Whether this token appears in Markup. pub fn mode(&self) -> Option { match self { - Self::Markup + Self::Markup(_) | Self::Linebreak | Self::Parbreak | Self::Text(_) @@ -823,7 +844,7 @@ impl NodeKind { Self::Import => "keyword `import`", Self::Include => "keyword `include`", Self::From => "keyword `from`", - Self::Markup => "markup", + Self::Markup(_) => "markup", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", diff --git a/tests/typ/code/block.typ b/tests/typ/code/block.typ index 45ee92045..5939ba9c5 100644 --- a/tests/typ/code/block.typ +++ b/tests/typ/code/block.typ @@ -129,7 +129,7 @@ } --- -// Error: 2:1 expected closing brace +// Error: 2 expected closing brace { --- diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index 7fd6e0da7..a95d651aa 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -57,7 +57,7 @@ Three // Terminated by semicolon even though we are in a paren group. // Error: 18 expected expression -// Error: 19 expected closing paren +// Error: 18 expected closing paren #let v5 = (1, 2 + ; Five --- diff --git a/tests/typeset.rs b/tests/typeset.rs index 164ccc913..f23de5cd9 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -19,8 +19,8 @@ use typst::image::{Image, RasterImage, Svg}; use typst::library::{PageNode, TextNode}; use typst::loading::FsLoader; use typst::parse::Scanner; -use typst::source::SourceFile; -use typst::syntax::Span; +use typst::source::{SourceFile, SourceId}; +use typst::syntax::{RedNode, Span}; use typst::Context; #[cfg(feature = "layout-cache")] @@ -186,6 +186,7 @@ fn test( let mut line = 0; let mut compare_ref = true; let mut compare_ever = false; + let mut rng = LinearShift::new(); let parts: Vec<_> = src.split("\n---").collect(); for (i, &part) in parts.iter().enumerate() { @@ -202,8 +203,16 @@ fn test( } } } else { - let (part_ok, compare_here, part_frames) = - test_part(ctx, src_path, part.into(), i, compare_ref, line, debug); + let (part_ok, compare_here, part_frames) = test_part( + ctx, + src_path, + part.into(), + i, + compare_ref, + line, + debug, + &mut rng, + ); ok &= part_ok; compare_ever |= compare_here; frames.extend(part_frames); @@ -252,14 +261,16 @@ fn test_part( compare_ref: bool, line: usize, debug: bool, + rng: &mut LinearShift, ) -> (bool, bool, Vec>) { + let mut ok = test_reparse(&src, i, rng); + let id = ctx.sources.provide(src_path, src); let source = ctx.sources.get(id); let (local_compare_ref, mut ref_errors) = parse_metadata(&source); let compare_ref = local_compare_ref.unwrap_or(compare_ref); - let mut ok = true; let (frames, mut errors) = match ctx.evaluate(id) { Ok(module) => { let tree = module.into_root(); @@ -366,6 +377,108 @@ fn test_incremental( ok } +/// Pseudorandomly edit the source file and test whether a reparse produces the +/// same result as a clean parse. +/// +/// The method will first inject 10 strings once every 400 source characters +/// and then select 5 leaf node boundries to inject an additional, randomly +/// chosen string from the injection list. +fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { + let supplements = [ + "[", + ")", + "#rect()", + "a word", + ", a: 1", + "10.0", + ":", + "if i == 0 {true}", + "for", + "* hello *", + "//", + "/*", + "\\u{12e4}", + "```typst", + " ", + "trees", + "\\", + "$ a $", + "2.", + "-", + "5", + ]; + + let mut ok = true; + + let apply = |replace: std::ops::Range, with| { + let mut incr_source = SourceFile::detached(src); + + incr_source.edit(replace.clone(), with); + let edited_src = incr_source.src(); + + let ref_source = SourceFile::detached(edited_src); + let incr_root = incr_source.root(); + let ref_root = ref_source.root(); + if incr_root != ref_root { + println!( + " Subtest {} reparse differs from clean parse when inserting '{}' at {}-{} ❌", + i, with, replace.start, replace.end, + ); + println!( + "\n Expected reference tree:\n{:#?}\n\n Found incremental tree:\n{:#?}", + ref_root, incr_root + ); + println!("Full source ({}):\n\"{}\"", edited_src.len(), edited_src); + false + } else { + true + } + }; + + let mut in_range = |range: std::ops::Range| { + let full = rng.next().unwrap() as f64 / u64::MAX as f64; + (range.start as f64 + full * (range.end as f64 - range.start as f64)).floor() + as usize + }; + + let insertions = (src.len() as f64 / 400.0).ceil() as usize; + + for _ in 0 .. insertions { + let supplement = supplements[in_range(0 .. supplements.len())]; + let start = in_range(0 .. src.len()); + let end = in_range(start .. src.len()); + + if !src.is_char_boundary(start) || !src.is_char_boundary(end) { + continue; + } + + if !apply(start .. end, supplement) { + println!("original tree: {:#?}", SourceFile::detached(src).root()); + + ok = false; + } + } + + let red = RedNode::from_root( + SourceFile::detached(src).root().clone(), + SourceId::from_raw(0), + ); + + let leafs: Vec<_> = red + .as_ref() + .all_children() + .into_iter() + .filter(|red| red.is_leaf()) + .collect(); + + let leaf_start = leafs[in_range(0 .. leafs.len())].span().start; + let supplement = supplements[in_range(0 .. supplements.len())]; + + ok &= apply(leaf_start .. leaf_start, supplement); + + ok +} + fn parse_metadata(source: &SourceFile) -> (Option, Vec) { let mut compare_ref = None; let mut errors = vec![]; @@ -823,3 +936,33 @@ where FileDescriptor::redirect_stdio(&stdout, Stdout).unwrap(); result } + +/// This is an Linear-feedback shift register using XOR as its shifting +/// function. It can be used as PRNG. +struct LinearShift(u64); + +impl LinearShift { + /// Initialize the shift register with a pre-set seed. + pub fn new() -> Self { + Self(0xACE5) + } +} + +impl Iterator for LinearShift { + type Item = u64; + + /// Apply the shift. + fn next(&mut self) -> Option { + self.0 ^= self.0 >> 3; + self.0 ^= self.0 << 14; + self.0 ^= self.0 >> 28; + self.0 ^= self.0 << 36; + self.0 ^= self.0 >> 52; + Some(self.0) + } + + /// The iterator is endless but will repeat eventually. + fn size_hint(&self) -> (usize, Option) { + (usize::MAX, None) + } +} From 98c96ba1cb8a46e327de313118e4ce1a84795ae9 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 14:46:08 +0100 Subject: [PATCH 15/16] Fix parser / space / error bug --- src/parse/incremental.rs | 1 + src/parse/parser.rs | 16 ++-------------- src/parse/tokens.rs | 1 + tests/typ/code/let.typ | 1 + tests/typeset.rs | 12 ++++++++++-- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 1ee37a511..5cb016d2c 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -623,6 +623,7 @@ mod tests { test("x = y", 1 .. 1, " + y\n", 0 .. 10); test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); diff --git a/src/parse/parser.rs b/src/parse/parser.rs index b31f69d3b..f36155d5d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -282,12 +282,6 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { - // FIXME The error has to be inserted before any space rolls - // around because the rescan will set the cursor back in front - // of the space and reconsume it. Supressing the rescan is not - // an option since additional rescans (e.g. for statements) can - // be triggered directly afterwards, without processing any - // other token. self.push_error(format_eco!("expected {}", end)); self.last_unterminated = Some(self.prev_end()); } @@ -380,14 +374,8 @@ impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { let error = NodeKind::Error(ErrorPos::Full, msg.into()); - for i in (0 .. self.children.len()).rev() { - if Self::is_trivia_ext(self.children[i].kind(), false) { - self.children.remove(i); - } else { - break; - } - } - self.children.push(GreenData::new(error, 0).into()); + let idx = self.trivia_start(); + self.children.insert(idx.0, GreenData::new(error, 0).into()); } /// Eat the current token and add an error that it is unexpected. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 3a0ad1ade..7dfca2bf4 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -727,6 +727,7 @@ mod tests { t!(Both["a1/"]: " \n" => Space(1)); t!(Both["a1/"]: " \n " => Space(1)); t!(Both["a1/"]: "\r\n" => Space(1)); + t!(Both["a1/"]: "\r\n\r" => Space(2)); t!(Both["a1/"]: " \n\t \n " => Space(2)); t!(Both["a1/"]: "\n\r" => Space(2)); t!(Both["a1/"]: " \r\r\n \x0D" => Space(3)); diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index a95d651aa..d4765ea5d 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -59,6 +59,7 @@ Three // Error: 18 expected expression // Error: 18 expected closing paren #let v5 = (1, 2 + ; Five + ^^^^^ + \r\n --- // Error: 13 expected body diff --git a/tests/typeset.rs b/tests/typeset.rs index f23de5cd9..aa3bcf9d5 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -412,6 +412,15 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { let apply = |replace: std::ops::Range, with| { let mut incr_source = SourceFile::detached(src); + if incr_source.root().len() != src.len() { + println!( + " Subtest {} tree length {} does not match string length {} ❌", + i, + incr_source.root().len(), + src.len(), + ); + return false; + } incr_source.edit(replace.clone(), with); let edited_src = incr_source.src(); @@ -428,7 +437,7 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { "\n Expected reference tree:\n{:#?}\n\n Found incremental tree:\n{:#?}", ref_root, incr_root ); - println!("Full source ({}):\n\"{}\"", edited_src.len(), edited_src); + println!("Full source ({}):\n\"{:?}\"", edited_src.len(), edited_src); false } else { true @@ -454,7 +463,6 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { if !apply(start .. end, supplement) { println!("original tree: {:#?}", SourceFile::detached(src).root()); - ok = false; } } From c994cfa7d814e3909682b19322867ed5c676c453 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 3 Jan 2022 23:18:21 +0100 Subject: [PATCH 16/16] Code Review: Your parsers were so preoccupied with whether they could --- Cargo.toml | 11 +- src/parse/incremental.rs | 255 +++++++++++++++++++++------------------ src/parse/mod.rs | 147 +++++++++++++--------- src/parse/parser.rs | 81 ++++++------- src/parse/scanner.rs | 23 +++- src/parse/tokens.rs | 28 ++--- src/source.rs | 17 ++- src/syntax/mod.rs | 61 ++++------ tests/typ/code/let.typ | 1 - tests/typeset.rs | 57 +++------ 10 files changed, 344 insertions(+), 337 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8251a7fa6..0bf68d74f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,21 +20,14 @@ opt-level = 2 [dependencies] fxhash = "0.2" -image = { version = "0.23", default-features = false, features = [ - "png", - "jpeg", -] } +image = { version = "0.23", default-features = false, features = ["png", "jpeg"] } itertools = "0.10" miniz_oxide = "0.4" once_cell = "1" pdf-writer = "0.4" rustybuzz = "0.4" serde = { version = "1", features = ["derive", "rc"] } -svg2pdf = { version = "0.1", default-features = false, features = [ - "text", - "png", - "jpeg", -] } +svg2pdf = { version = "0.1", default-features = false, features = ["text", "png", "jpeg"] } ttf-parser = "0.12" typst-macros = { path = "./macros" } unicode-bidi = "0.3.5" diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 5cb016d2c..4c82f158b 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -4,8 +4,8 @@ use std::rc::Rc; use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ - parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, - parse_markup_elements, parse_template, Scanner, TokenMode, + is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment, + parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode, }; /// The conditions that a node has to fulfill in order to be replaced. @@ -13,21 +13,21 @@ use super::{ /// This can dictate if a node can be replaced at all and if yes, what can take /// its place. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Postcondition { +pub enum SuccessionRule { /// Changing this node can never have an influence on the other nodes. Safe, /// This node has to be replaced with a single token of the same kind. SameKind(Option), - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. + /// In code mode, this node can only be changed into a single atomic + /// expression, otherwise it is safe. AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the + /// Changing an unsafe layer node in code mode changes what the parents or + /// the surrounding nodes would be and is therefore disallowed. Change the /// parents or children instead. If it appears in Markup, however, it is /// safe to change. UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. + /// Changing an unsafe node or any of its children is not allowed. Change + /// the parents instead. Unsafe, } @@ -37,11 +37,12 @@ pub enum Postcondition { /// existence is plausible with them present. This can be used to encode some /// context-free language components for incremental parsing. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Precondition { +pub enum NeighbourRule { /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. Additionally, the indentation of the first right non-trivia, - /// non-whitespace sibling must not be greater than the current indentation. + /// left neighbors has to check this invariant. Additionally, when + /// exchanging the right sibling or inserting such a node the indentation of + /// the first right non-trivia, non-whitespace sibling must not be greater + /// than the current indentation. AtStart, /// These nodes depend on not being at the start of a line. Reparsing of /// safe left neighbors has to check this invariant. Otherwise, this node is @@ -77,8 +78,12 @@ impl<'a> Reparser<'a> { impl Reparser<'_> { /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Option> { - self.reparse_step(green, 0, TokenMode::Markup, true) + pub fn reparse(&self, green: &mut Rc) -> Range { + self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true) + .unwrap_or_else(|| { + *green = parse(self.src); + 0 .. self.src.len() + }) } fn reparse_step( @@ -90,7 +95,7 @@ impl Reparser<'_> { ) -> Option> { let mode = green.kind().mode().unwrap_or(parent_mode); let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); - let child_count = green.children().len(); + let original_count = green.children().len(); // Save the current indent if this is a markup node. let indent = match green.kind() { @@ -134,12 +139,14 @@ impl Reparser<'_> { // neighbor! if child_span.contains(&self.replace_range.end) || self.replace_range.end == child_span.end - && (mode != TokenMode::Markup || i + 1 == child_count) + && (mode != TokenMode::Markup || i + 1 == original_count) { - outermost &= i + 1 == child_count; + outermost &= i + 1 == original_count; last = Some((i, offset + child.len())); break; - } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() { + } else if mode != TokenMode::Markup + || !child.kind().succession_rule().safe_in_markup() + { break; } @@ -147,17 +154,17 @@ impl Reparser<'_> { } let (last_idx, last_end) = last?; - let children_range = first_idx .. last_idx + 1; - let children_span = first_start .. last_end; + let superseded_range = first_idx .. last_idx + 1; + let superseded_span = first_start .. last_end; let last_kind = green.children()[last_idx].kind().clone(); // First, we try if the child itself has another, more specific // applicable child. - if children_range.len() == 1 { - let child = &mut green.children_mut()[children_range.start]; + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; let prev_len = child.len(); - if last_kind.post() != Postcondition::Unsafe { + if last_kind.succession_rule() != SuccessionRule::Unsafe { if let Some(range) = match child { Green::Node(node) => self.reparse_step( Rc::make_mut(node), @@ -168,56 +175,64 @@ impl Reparser<'_> { Green::Token(_) => None, } { let new_len = child.len(); - green.update_child_len(new_len, prev_len); + green.update_parent(new_len, prev_len); return Some(range); } } } // We only replace multiple children in markup mode. - if children_range.len() > 1 && mode == TokenMode::Code { + if superseded_range.len() > 1 && mode == TokenMode::Code { return None; } // We now have a child that we can replace and a function to do so. let func = last_kind.reparsing_func(child_mode, indent)?; - let post = last_kind.post(); + let succession = last_kind.succession_rule(); - let mut column = if mode == TokenMode::Markup { - // In this case, we want to pass the indentation to the function. - Scanner::new(self.src).column(children_span.start) - } else { - 0 - }; + let mut markup_min_column = 0; // If this is a markup node, we want to save its indent instead to pass // the right indent argument. - if children_range.len() == 1 { - let child = &mut green.children_mut()[children_range.start]; + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; if let NodeKind::Markup(n) = child.kind() { - column = *n; + markup_min_column = *n; } } // The span of the to-be-reparsed children in the new source. - let replace_span = children_span.start + let newborn_span = superseded_span.start .. - children_span.end + self.replace_len - self.replace_range.len(); + superseded_span.end + self.replace_len - self.replace_range.len(); // For atomic primaries we need to pass in the whole remaining string to // check whether the parser would eat more stuff illicitly. - let reparse_span = if post == Postcondition::AtomicPrimary { - replace_span.start .. self.src.len() + let reparse_span = if succession == SuccessionRule::AtomicPrimary { + newborn_span.start .. self.src.len() } else { - replace_span.clone() + newborn_span.clone() }; + let mut prefix = ""; + for (i, c) in self.src[.. reparse_span.start].char_indices().rev() { + if is_newline(c) { + break; + } + prefix = &self.src[i .. reparse_span.start]; + } + // Do the reparsing! - let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?; + let (mut newborns, terminated) = func( + &prefix, + &self.src[reparse_span.clone()], + at_start, + markup_min_column, + )?; // Make sure that atomic primaries ate only what they were supposed to. - if post == Postcondition::AtomicPrimary { - let len = replace_span.len(); + if succession == SuccessionRule::AtomicPrimary { + let len = newborn_span.len(); if newborns.len() > 1 && newborns[0].len() == len { newborns.truncate(1); } else if newborns.iter().map(Green::len).sum::() != len { @@ -234,16 +249,16 @@ impl Reparser<'_> { // If all post- and preconditions match, we are good to go! if validate( green.children(), - children_range.clone(), + superseded_range.clone(), at_start, &newborns, mode, - post, - replace_span.clone(), + succession, + newborn_span.clone(), self.src, ) { - green.replace_child_range(children_range, newborns); - Some(replace_span) + green.replace_children(superseded_range, newborns); + Some(newborn_span) } else { None } @@ -252,27 +267,27 @@ impl Reparser<'_> { /// Validate that a node replacement is allowed by post- and preconditions. fn validate( - prev_children: &[Green], - children_range: Range, + superseded: &[Green], + superseded_range: Range, mut at_start: bool, newborns: &[Green], mode: TokenMode, - post: Postcondition, - replace_span: Range, + post: SuccessionRule, + newborn_span: Range, src: &str, ) -> bool { // Atomic primaries must only generate one new child. - if post == Postcondition::AtomicPrimary && newborns.len() != 1 { + if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 { return false; } // Same kind in mode `inside` must generate only one child and that child // must be of the same kind as previously. - if let Postcondition::SameKind(inside) = post { - let prev_kind = prev_children[children_range.start].kind(); - let prev_mode = prev_kind.mode().unwrap_or(mode); - if inside.map_or(true, |m| m == prev_mode) - && (newborns.len() != 1 || prev_kind != newborns[0].kind()) + if let SuccessionRule::SameKind(inside) = post { + let superseded_kind = superseded[superseded_range.start].kind(); + let superseded_mode = superseded_kind.mode().unwrap_or(mode); + if inside.map_or(true, |m| m == superseded_mode) + && (newborns.len() != 1 || superseded_kind != newborns[0].kind()) { return false; } @@ -286,15 +301,15 @@ fn validate( // Check if there are any `AtStart` predecessors which require a certain // indentation. let s = Scanner::new(src); - let mut prev_pos = replace_span.start; - for child in (&prev_children[.. children_range.start]).iter().rev() { + let mut prev_pos = newborn_span.start; + for child in (&superseded[.. superseded_range.start]).iter().rev() { prev_pos -= child.len(); if !child.kind().is_trivia() { - if child.kind().pre() == Precondition::AtStart { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { let left_col = s.column(prev_pos); // Search for the first non-trivia newborn. - let mut new_pos = replace_span.start; + let mut new_pos = newborn_span.start; let mut child_col = None; for child in newborns { if !child.kind().is_trivia() { @@ -323,15 +338,15 @@ fn validate( // Ensure that a possible at-start or not-at-start precondition of // a node after the replacement range is satisfied. - for child in &prev_children[children_range.end ..] { - if !child.kind().is_trivia() { - let pre = child.kind().pre(); - if (pre == Precondition::AtStart && !at_start) - || (pre == Precondition::NotAtStart && at_start) - { - return false; - } + for child in &superseded[superseded_range.end ..] { + let neighbour_rule = child.kind().neighbour_rule(); + if (neighbour_rule == NeighbourRule::AtStart && !at_start) + || (neighbour_rule == NeighbourRule::NotAtStart && at_start) + { + return false; + } + if !child.kind().is_trivia() { break; } @@ -339,42 +354,40 @@ fn validate( } // Verify that the last of the newborns is not `NotAtEnd`. - if newborns - .last() - .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd) - { + if newborns.last().map_or(false, |child| { + child.kind().neighbour_rule() == NeighbourRule::NotAtEnd + }) { return false; } // We have to check whether the last non-trivia newborn is `AtStart` and // verify the indent of its right neighbors in order to make sure its // indentation requirements are fulfilled. - let mut child_pos = replace_span.end; - let mut child_col = None; + let mut child_pos = newborn_span.end; for child in newborns.iter().rev() { child_pos -= child.len(); - if !child.kind().is_trivia() { - if child.kind().pre() == Precondition::AtStart { - child_col = Some(s.column(child_pos)); - } - break; + if child.kind().is_trivia() { + continue; } - } - if let Some(child_col) = child_col { - let mut right_pos = replace_span.end; - for child in &prev_children[children_range.end ..] { - if !child.kind().is_trivia() { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { + let child_col = s.column(child_pos); + + let mut right_pos = newborn_span.end; + for child in &superseded[superseded_range.end ..] { + if child.kind().is_trivia() { + right_pos += child.len(); + continue; + } + if s.column(right_pos) > child_col { return false; } - break; } - - right_pos += child.len(); } + break; } true @@ -387,13 +400,15 @@ impl NodeKind { &self, parent_mode: TokenMode, indent: usize, - ) -> Option Option<(Vec, bool)>> { + ) -> Option Option<(Vec, bool)>> { let mode = self.mode().unwrap_or(parent_mode); - match self.post() { - Postcondition::Unsafe | Postcondition::UnsafeLayer => None, - Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), - Postcondition::AtomicPrimary => Some(parse_atomic_markup), - Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + match self.succession_rule() { + SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, + SuccessionRule::AtomicPrimary if mode == TokenMode::Code => { + Some(parse_atomic) + } + SuccessionRule::AtomicPrimary => Some(parse_atomic_markup), + SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), NodeKind::Block => Some(parse_block), @@ -409,7 +424,7 @@ impl NodeKind { /// Whether it is safe to do incremental parsing on this node. Never allow /// non-termination errors if this is not already the last leaf node. - pub fn post(&self) -> Postcondition { + pub fn succession_rule(&self) -> SuccessionRule { match self { // Replacing parenthesis changes if the expression is balanced and // is therefore not safe. @@ -418,7 +433,7 @@ impl NodeKind { | Self::LeftBrace | Self::RightBrace | Self::LeftParen - | Self::RightParen => Postcondition::Unsafe, + | Self::RightParen => SuccessionRule::Unsafe, // Replacing an operator can change whether the parent is an // operation which makes it unsafe. The star can appear in markup. @@ -445,7 +460,7 @@ impl NodeKind { | Self::Or | Self::With | Self::Dots - | Self::Arrow => Postcondition::Unsafe, + | Self::Arrow => SuccessionRule::Unsafe, // These keywords change what kind of expression the parent is and // how far the expression would go. @@ -461,14 +476,14 @@ impl NodeKind { | Self::Return | Self::Import | Self::Include - | Self::From => Postcondition::Unsafe, + | Self::From => SuccessionRule::Unsafe, // Changing the heading level, enum numbering, or list bullet // changes the next layer. - Self::EnumNumbering(_) => Postcondition::Unsafe, + Self::EnumNumbering(_) => SuccessionRule::Unsafe, // This can be anything, so we don't make any promises. - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe, // These are complex expressions which may screw with their // environments. @@ -477,33 +492,33 @@ impl NodeKind { | Self::Binary | Self::CallArgs | Self::Named - | Self::Spread => Postcondition::UnsafeLayer, + | Self::Spread => SuccessionRule::UnsafeLayer, // The closure is a bit magic with the let expression, and also it // is not atomic. - Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer, // Missing these creates errors for the parents. Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer + SuccessionRule::UnsafeLayer } // Only markup is expected at the points where it does occur. The // indentation must be preserved as well, also for the children. - Self::Markup(_) => Postcondition::SameKind(None), + Self::Markup(_) => SuccessionRule::SameKind(None), // These can appear everywhere and must not change to other stuff // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None), // These can appear as bodies and would trigger an error if they // became something else. - Self::Template => Postcondition::SameKind(None), - Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + Self::Template => SuccessionRule::SameKind(None), + Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)), // Whitespace in code mode has to remain whitespace or else the type // of things would change. - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)), // These are expressions that can be replaced by other expressions. Self::Ident(_) @@ -519,7 +534,7 @@ impl NodeKind { | Self::Dict | Self::Group | Self::None - | Self::Auto => Postcondition::AtomicPrimary, + | Self::Auto => SuccessionRule::AtomicPrimary, // More complex, but still an expression. Self::ForExpr @@ -528,11 +543,11 @@ impl NodeKind { | Self::LetExpr | Self::SetExpr | Self::ImportExpr - | Self::IncludeExpr => Postcondition::AtomicPrimary, + | Self::IncludeExpr => SuccessionRule::AtomicPrimary, // This element always has to remain in the same column so better // reparse the whole parent. - Self::Raw(_) => Postcondition::Unsafe, + Self::Raw(_) => SuccessionRule::Unsafe, // These are all replaceable by other tokens. Self::Parbreak @@ -548,22 +563,22 @@ impl NodeKind { | Self::Heading | Self::Enum | Self::List - | Self::Math(_) => Postcondition::Safe, + | Self::Math(_) => SuccessionRule::Safe, } } /// The appropriate precondition for the type. - pub fn pre(&self) -> Precondition { + pub fn neighbour_rule(&self) -> NeighbourRule { match self { - Self::Heading | Self::Enum | Self::List => Precondition::AtStart, - Self::TextInLine(_) => Precondition::NotAtStart, - Self::Error(_, _) => Precondition::NotAtEnd, - _ => Precondition::None, + Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart, + Self::TextInLine(_) => NeighbourRule::NotAtStart, + Self::Error(_, _) => NeighbourRule::NotAtEnd, + _ => NeighbourRule::None, } } } -impl Postcondition { +impl SuccessionRule { /// Whether a node with this condition can be reparsed in markup mode. pub fn safe_in_markup(&self) -> bool { match self { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f48267300..a97526453 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -29,72 +29,102 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Code); +pub fn parse_atomic( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); primary(&mut p, true).ok()?; - p.eject_partial() + p.consume_unterminated() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); +pub fn parse_atomic_markup( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); markup_expr(&mut p); - p.eject_partial() + p.consume_unterminated() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); - if column == 0 { +pub fn parse_markup( + prefix: &str, + src: &str, + _: bool, + min_column: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + if min_column == 0 { markup(&mut p); } else { - markup_indented(&mut p, column); + markup_indented(&mut p, min_column); } - p.eject() + p.consume() } /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. pub fn parse_markup_elements( + prefix: &str, src: &str, mut at_start: bool, - column: usize, + _: usize, ) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); - p.offset(column); + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); } - p.eject() + p.consume() } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_template( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; } template(&mut p); - p.eject() + p.consume() } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_block( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; } block(&mut p); - p.eject() + p.consume() } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_comment( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); comment(&mut p).ok()?; - p.eject() + p.consume() } /// Parse markup. @@ -111,7 +141,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, column, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -170,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Eq if *at_start => heading(p), - NodeKind::Minus if *at_start => list_node(p), - NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - - // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::TextInLine(p.peek_src().into())) - } + NodeKind::Eq => heading(p, *at_start), + NodeKind::Minus => list_node(p, *at_start), + NodeKind::EnumNumbering(_) => enum_node(p, *at_start), // Hashtag + keyword / identifier. NodeKind::Ident(_) @@ -201,42 +226,49 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { } /// Parse a heading. -fn heading(p: &mut Parser) { - p.perform(NodeKind::Heading, |p| { - p.eat_assert(&NodeKind::Eq); - while p.eat_if(&NodeKind::Eq) {} +fn heading(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let current_start = p.current_start(); + p.eat_assert(&NodeKind::Eq); + while p.eat_if(&NodeKind::Eq) {} + + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Heading); + } else { + let text = p.get(current_start .. p.prev_end()).into(); + marker.convert(p, NodeKind::TextInLine(text)); + } } /// Parse a single list item. -fn list_node(p: &mut Parser) { +fn list_node(p: &mut Parser, at_start: bool) { let marker = p.marker(); - let src: EcoString = p.peek_src().into(); + let text: EcoString = p.peek_src().into(); p.eat_assert(&NodeKind::Minus); - if p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::List); } else { - marker.convert(p, NodeKind::TextInLine(src)); + marker.convert(p, NodeKind::TextInLine(text)); } } /// Parse a single enum item. -fn enum_node(p: &mut Parser) { +fn enum_node(p: &mut Parser, at_start: bool) { let marker = p.marker(); - let src: EcoString = p.peek_src().into(); + let text: EcoString = p.peek_src().into(); p.eat(); - if p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::Enum); } else { - marker.convert(p, NodeKind::TextInLine(src)); + marker.convert(p, NodeKind::TextInLine(text)); } } @@ -582,23 +614,18 @@ fn template(p: &mut Parser) { fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { p.start_group(Group::Brace); - expr_list(p); - p.end_group(); - }); -} + while !p.eof() { + p.start_group(Group::Stmt); + if expr(p).is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); -/// Parse a number of code expressions. -fn expr_list(p: &mut Parser) { - while !p.eof() { - p.start_group(Group::Stmt); - if expr(p).is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); - } + }); } /// Parse a function call. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f36155d5d..4e5b277d2 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,8 @@ +use core::slice::SliceIndex; use std::fmt::{self, Display, Formatter}; use std::mem; -use super::{TokenMode, Tokens}; +use super::{Scanner, TokenMode, Tokens}; use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -24,8 +25,7 @@ pub struct Parser<'s> { /// Is `Some` if there is an unterminated group at the last position where /// groups were terminated. last_unterminated: Option, - /// Offset the indentation. This can be used if the parser is processing a - /// subslice of the source and there was leading indent. + /// Offsets the indentation on the first line of the source. column_offset: usize, } @@ -47,18 +47,31 @@ impl<'s> Parser<'s> { } } + /// Create a new parser for the source string that is prefixed by some text + /// that does not need to be parsed but taken into account for column + /// calculation. + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { + let mut p = Self::new(src, mode); + p.column_offset = Scanner::new(prefix).column(prefix.len()); + p + } + /// End the parsing process and return the last child. pub fn finish(self) -> Vec { self.children } - /// End the parsing process and return multiple children. - pub fn eject(self) -> Option<(Vec, bool)> { - if self.eof() && self.group_success() { - Some((self.children, self.tokens.was_terminated())) - } else { - None - } + /// End the parsing process and return multiple children and whether the + /// last token was terminated. + pub fn consume(self) -> Option<(Vec, bool)> { + (self.eof() && self.terminated()) + .then(|| (self.children, self.tokens.terminated())) + } + + /// End the parsing process and return multiple children and whether the + /// last token was terminated, even if there remains stuff in the string. + pub fn consume_unterminated(self) -> Option<(Vec, bool)> { + self.terminated().then(|| (self.children, self.tokens.terminated())) } /// Create a new marker. @@ -100,18 +113,6 @@ impl<'s> Parser<'s> { output } - /// End the parsing process and return multiple children, even if there - /// remains stuff in the string. - pub fn eject_partial(self) -> Option<(Vec, bool)> { - self.group_success() - .then(|| (self.children, self.tokens.was_terminated())) - } - - /// Set an indentation offset. - pub fn offset(&mut self, columns: usize) { - self.column_offset = columns; - } - /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof @@ -199,6 +200,14 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } + /// Obtain a range of the source code. + pub fn get(&self, index: I) -> &'s str + where + I: SliceIndex, + { + self.tokens.scanner().get(index) + } + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end @@ -216,13 +225,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.column_offset - } - - /// Determine the column index for the given byte index while ignoring the - /// offset. - pub fn clean_column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.tokens.scanner().column_offset(index, self.column_offset) } /// Continue parsing in a group. @@ -260,10 +263,8 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - if let Some(n) = self.last_unterminated { - if n != self.prev_end() { - self.last_unterminated = None; - } + if self.last_unterminated != Some(self.prev_end()) { + self.last_unterminated = None; } let mut rescan = self.tokens.mode() != group_mode; @@ -301,23 +302,15 @@ impl<'s> Parser<'s> { } } - /// Check if the group processing was successfully terminated. - pub fn group_success(&self) -> bool { - self.last_unterminated.is_none() && self.groups.is_empty() + /// Checks if all groups were correctly terminated. + pub fn terminated(&self) -> bool { + self.groups.is_empty() && self.last_unterminated.is_none() } /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { let kind = self.current.take().unwrap(); - if match kind { - NodeKind::Space(n) if n > 0 => true, - NodeKind::Parbreak => true, - _ => false, - } { - self.column_offset = 0; - } - let len = self.tokens.index() - self.current_start; self.children.push(GreenData::new(kind, len).into()); self.current_start = self.tokens.index(); diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index c735be407..6db891323 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -162,11 +162,26 @@ impl<'s> Scanner<'s> { /// The column index of a given index in the source string. #[inline] pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() + self.column_offset(index, 0) + } + + /// The column index of a given index in the source string when an offset is + /// applied to the first line of the string. + #[inline] + pub fn column_offset(&self, index: usize, offset: usize) -> usize { + let mut apply_offset = false; + let res = self.src[.. index] + .char_indices() .rev() - .take_while(|&c| !is_newline(c)) - .count() + .take_while(|&(_, c)| !is_newline(c)) + .inspect(|&(i, _)| { + if i == 0 { + apply_offset = true + } + }) + .count(); + + if apply_offset { res + offset } else { res } } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7dfca2bf4..69c4d2dee 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -13,7 +13,7 @@ use crate::util::EcoString; pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, - was_terminated: bool, + terminated: bool, } /// What kind of tokens to emit. @@ -32,7 +32,7 @@ impl<'s> Tokens<'s> { Self { s: Scanner::new(src), mode, - was_terminated: true, + terminated: true, } } @@ -71,8 +71,8 @@ impl<'s> Tokens<'s> { /// Whether the last token was terminated. #[inline] - pub fn was_terminated(&self) -> bool { - self.was_terminated + pub fn terminated(&self) -> bool { + self.terminated } } @@ -128,9 +128,7 @@ impl<'s> Tokens<'s> { '`' => self.raw(), '$' => self.math(), '-' => self.hyph(), - '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => { - NodeKind::Eq - } + '=' => NodeKind::Eq, c if c == '.' || c.is_ascii_digit() => self.numbering(start, c), // Plain text. @@ -259,7 +257,7 @@ impl<'s> Tokens<'s> { ) } } else { - self.was_terminated = false; + self.terminated = false; NodeKind::Error( ErrorPos::End, "expected closing brace".into(), @@ -352,7 +350,7 @@ impl<'s> Tokens<'s> { let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; - self.was_terminated = false; + self.terminated = false; NodeKind::Error( ErrorPos::End, if found == 0 { @@ -400,7 +398,7 @@ impl<'s> Tokens<'s> { display, })) } else { - self.was_terminated = false; + self.terminated = false; NodeKind::Error( ErrorPos::End, if !display || (!escaped && dollar) { @@ -489,7 +487,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - self.was_terminated = false; + self.terminated = false; NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -497,7 +495,7 @@ impl<'s> Tokens<'s> { fn line_comment(&mut self) -> NodeKind { self.s.eat_until(is_newline); if self.s.peek().is_none() { - self.was_terminated = false; + self.terminated = false; } NodeKind::LineComment } @@ -505,7 +503,7 @@ impl<'s> Tokens<'s> { fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; - self.was_terminated = false; + self.terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -513,7 +511,7 @@ impl<'s> Tokens<'s> { ('*', '/') => { depth -= 1; if depth == 0 { - self.was_terminated = true; + self.terminated = true; break; } '_' @@ -742,7 +740,7 @@ mod tests { // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():\"b")); t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+")); - t!(Markup[" /"]: "=-a" => Text("="), Minus, Text("a")); + t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a")); t!(Markup[" "]: "#123" => Text("#"), Text("123")); // Test text ends. diff --git a/src/source.rs b/src/source.rs index 6cca9f751..7afeaa8a3 100644 --- a/src/source.rs +++ b/src/source.rs @@ -154,9 +154,14 @@ impl SourceFile { &self.root } + /// The root red node of the file's untyped red tree. + pub fn red(&self) -> RedNode { + RedNode::from_root(self.root.clone(), self.id) + } + /// The root node of the file's typed abstract syntax tree. pub fn ast(&self) -> TypResult { - let red = RedNode::from_root(self.root.clone(), self.id); + let red = self.red(); let errors = red.errors(); if errors.is_empty() { Ok(red.cast().unwrap()) @@ -284,14 +289,8 @@ impl SourceFile { self.line_starts .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); - // Update the root node. - let reparser = Reparser::new(&self.src, replace, with.len()); - if let Some(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { - range - } else { - self.root = parse(&self.src); - 0 .. self.src.len() - } + // Incrementally reparse the replaced range. + Reparser::new(&self.src, replace, with.len()).reparse(&mut self.root) } /// Provide highlighting categories for the given range of the source file. diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 388d0bb0c..3a0f3a5e0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -108,7 +108,7 @@ pub struct GreenNode { /// This node's children, losslessly make up this node. children: Vec, /// Whether this node or any of its children are erroneous. - pub erroneous: bool, + erroneous: bool, } impl GreenNode { @@ -139,7 +139,7 @@ impl GreenNode { } /// The node's metadata. - pub fn data(&self) -> &GreenData { + fn data(&self) -> &GreenData { &self.data } @@ -159,41 +159,29 @@ impl GreenNode { } /// Replaces a range of children with some replacement. - /// - /// This method updates the `erroneous` and `data.len` fields. - pub(crate) fn replace_child_range( + pub(crate) fn replace_children( &mut self, - child_idx_range: Range, + range: Range, replacement: Vec, ) { - let old_len: usize = - self.children[child_idx_range.clone()].iter().map(Green::len).sum(); - let new_len: usize = replacement.iter().map(Green::len).sum(); + let superseded = &self.children[range.clone()]; + let superseded_len: usize = superseded.iter().map(Green::len).sum(); + let replacement_len: usize = replacement.iter().map(Green::len).sum(); - if self.erroneous { - if self.children[child_idx_range.clone()].iter().any(Green::erroneous) { - // the old range was erroneous but we do not know if anywhere - // else was so we have to iterate over the whole thing. - self.erroneous = self.children[.. child_idx_range.start] - .iter() - .any(Green::erroneous) - || self.children[child_idx_range.end ..].iter().any(Green::erroneous); - } - // in this case nothing changes so we do not have to bother. - } + // If we're erroneous, but not due to the superseded range, then we will + // still be erroneous after the replacement. + let still_erroneous = self.erroneous && !superseded.iter().any(Green::erroneous); - // the or assignment operator is not lazy. - self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); - - self.children.splice(child_idx_range, replacement); - self.data.len = self.data.len + new_len - old_len; + self.children.splice(range, replacement); + self.data.len = self.data.len + replacement_len - superseded_len; + self.erroneous = still_erroneous || self.children.iter().any(Green::erroneous); } - /// Update the length of this node given the old and new length of a - /// replaced child. - pub(crate) fn update_child_len(&mut self, new_len: usize, old_len: usize) { + /// Update the length of this node given the old and new length of + /// replaced children. + pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; - self.erroneous = self.children.iter().any(|x| x.erroneous()); + self.erroneous = self.children.iter().any(Green::erroneous); } } @@ -255,7 +243,7 @@ impl From for Green { impl Debug for GreenData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", &self.kind, self.len) + write!(f, "{:?}: {}", self.kind, self.len) } } @@ -398,12 +386,13 @@ impl<'a> RedRef<'a> { } } - /// Perform a depth-first search starting at this node. - pub fn all_children(&self) -> Vec { - let mut res = vec![self.clone()]; - res.extend(self.children().flat_map(|child| child.all_children().into_iter())); - - res + /// Returns all leaf descendants of this node (may include itself). + pub fn leafs(self) -> Vec { + if self.is_leaf() { + vec![self] + } else { + self.children().flat_map(Self::leafs).collect() + } } /// Convert the node to a typed AST node. diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index d4765ea5d..a95d651aa 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -59,7 +59,6 @@ Three // Error: 18 expected expression // Error: 18 expected closing paren #let v5 = (1, 2 + ; Five - ^^^^^ + \r\n --- // Error: 13 expected body diff --git a/tests/typeset.rs b/tests/typeset.rs index aa3bcf9d5..b1296886a 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -1,6 +1,7 @@ use std::env; use std::ffi::OsStr; use std::fs; +use std::ops::Range; use std::path::Path; use std::rc::Rc; @@ -19,8 +20,8 @@ use typst::image::{Image, RasterImage, Svg}; use typst::library::{PageNode, TextNode}; use typst::loading::FsLoader; use typst::parse::Scanner; -use typst::source::{SourceFile, SourceId}; -use typst::syntax::{RedNode, Span}; +use typst::source::SourceFile; +use typst::syntax::Span; use typst::Context; #[cfg(feature = "layout-cache")] @@ -263,13 +264,12 @@ fn test_part( debug: bool, rng: &mut LinearShift, ) -> (bool, bool, Vec>) { - let mut ok = test_reparse(&src, i, rng); - let id = ctx.sources.provide(src_path, src); let source = ctx.sources.get(id); let (local_compare_ref, mut ref_errors) = parse_metadata(&source); let compare_ref = local_compare_ref.unwrap_or(compare_ref); + let mut ok = test_reparse(ctx.sources.get(id).src(), i, rng); let (frames, mut errors) = match ctx.evaluate(id) { Ok(module) => { @@ -444,43 +444,31 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { } }; - let mut in_range = |range: std::ops::Range| { - let full = rng.next().unwrap() as f64 / u64::MAX as f64; - (range.start as f64 + full * (range.end as f64 - range.start as f64)).floor() - as usize + let mut pick = |range: Range| { + let ratio = rng.next(); + (range.start as f64 + ratio * (range.end - range.start) as f64).floor() as usize }; let insertions = (src.len() as f64 / 400.0).ceil() as usize; for _ in 0 .. insertions { - let supplement = supplements[in_range(0 .. supplements.len())]; - let start = in_range(0 .. src.len()); - let end = in_range(start .. src.len()); + let supplement = supplements[pick(0 .. supplements.len())]; + let start = pick(0 .. src.len()); + let end = pick(start .. src.len()); if !src.is_char_boundary(start) || !src.is_char_boundary(end) { continue; } - if !apply(start .. end, supplement) { - println!("original tree: {:#?}", SourceFile::detached(src).root()); - ok = false; - } + ok &= apply(start .. end, supplement); } - let red = RedNode::from_root( - SourceFile::detached(src).root().clone(), - SourceId::from_raw(0), - ); + let red = SourceFile::detached(src).red(); - let leafs: Vec<_> = red - .as_ref() - .all_children() - .into_iter() - .filter(|red| red.is_leaf()) - .collect(); + let leafs = red.as_ref().leafs(); - let leaf_start = leafs[in_range(0 .. leafs.len())].span().start; - let supplement = supplements[in_range(0 .. supplements.len())]; + let leaf_start = leafs[pick(0 .. leafs.len())].span().start; + let supplement = supplements[pick(0 .. supplements.len())]; ok &= apply(leaf_start .. leaf_start, supplement); @@ -954,23 +942,14 @@ impl LinearShift { pub fn new() -> Self { Self(0xACE5) } -} -impl Iterator for LinearShift { - type Item = u64; - - /// Apply the shift. - fn next(&mut self) -> Option { + /// Return a pseudo-random number between `0.0` and `1.0`. + pub fn next(&mut self) -> f64 { self.0 ^= self.0 >> 3; self.0 ^= self.0 << 14; self.0 ^= self.0 >> 28; self.0 ^= self.0 << 36; self.0 ^= self.0 >> 52; - Some(self.0) - } - - /// The iterator is endless but will repeat eventually. - fn size_hint(&self) -> (usize, Option) { - (usize::MAX, None) + self.0 as f64 / u64::MAX as f64 } }