diff --git a/benches/oneshot.rs b/benches/oneshot.rs index d3e2ff8e4..c088a93c8 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -49,6 +49,11 @@ fn bench_parse(iai: &mut Iai) { iai.run(|| parse(SRC)); } +fn bench_edit(iai: &mut Iai) { + let (mut ctx, id) = context(); + iai.run(|| black_box(ctx.sources.edit(id, 1168 .. 1171, "_Uhr_"))); +} + fn bench_eval(iai: &mut Iai) { let (mut ctx, id) = context(); iai.run(|| ctx.evaluate(id).unwrap()); @@ -66,6 +71,7 @@ main!( bench_scan, bench_tokenize, bench_parse, + bench_edit, bench_eval, bench_layout ); diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs new file mode 100644 index 000000000..4c82f158b --- /dev/null +++ b/src/parse/incremental.rs @@ -0,0 +1,672 @@ +use std::ops::Range; +use std::rc::Rc; + +use crate::syntax::{Green, GreenNode, NodeKind}; + +use super::{ + is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment, + parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode, +}; + +/// The conditions that a node has to fulfill in order to be replaced. +/// +/// This can dictate if a node can be replaced at all and if yes, what can take +/// its place. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum SuccessionRule { + /// Changing this node can never have an influence on the other nodes. + Safe, + /// This node has to be replaced with a single token of the same kind. + SameKind(Option), + /// In code mode, this node can only be changed into a single atomic + /// expression, otherwise it is safe. + AtomicPrimary, + /// Changing an unsafe layer node in code mode changes what the parents or + /// the surrounding nodes would be and is therefore disallowed. Change the + /// parents or children instead. If it appears in Markup, however, it is + /// safe to change. + UnsafeLayer, + /// Changing an unsafe node or any of its children is not allowed. Change + /// the parents instead. + Unsafe, +} + +/// The conditions under which a node can be inserted or remain in a tree. +/// +/// These conditions all search the neighbors of the node and see if its +/// existence is plausible with them present. This can be used to encode some +/// context-free language components for incremental parsing. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum NeighbourRule { + /// These nodes depend on being at the start of a line. Reparsing of safe + /// left neighbors has to check this invariant. Additionally, when + /// exchanging the right sibling or inserting such a node the indentation of + /// the first right non-trivia, non-whitespace sibling must not be greater + /// than the current indentation. + AtStart, + /// These nodes depend on not being at the start of a line. Reparsing of + /// safe left neighbors has to check this invariant. Otherwise, this node is + /// safe. + NotAtStart, + /// These nodes could end up somewhere else up the tree if the parse was + /// happening from scratch. The parse result has to be checked for such + /// nodes. They are safe to add if followed up by other nodes. + NotAtEnd, + /// No additional requirements. + None, +} + +/// Allows partial refreshs of the [`Green`] node tree. +/// +/// This struct holds a description of a change. Its methods can be used to try +/// and apply the change to a green tree. +pub struct Reparser<'a> { + /// The new source code, with the change applied. + src: &'a str, + /// Which range in the old source file was changed. + replace_range: Range, + /// How many characters replaced the text in `replace_range`. + replace_len: usize, +} + +impl<'a> Reparser<'a> { + /// Create a new reparser. + pub fn new(src: &'a str, replace_range: Range, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn reparse(&self, green: &mut Rc) -> Range { + self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true) + .unwrap_or_else(|| { + *green = parse(self.src); + 0 .. self.src.len() + }) + } + + fn reparse_step( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + mut outermost: bool, + ) -> Option> { + let mode = green.kind().mode().unwrap_or(parent_mode); + let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); + let original_count = green.children().len(); + + // Save the current indent if this is a markup node. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => 0, + }; + + let mut first = None; + let mut at_start = true; + + // Find the the first child in the range of children to reparse. + for (i, child) in green.children_mut().iter_mut().enumerate() { + let child_span = offset .. offset + child.len(); + + // We look for the start in the element but we only take a position + // at the right border if this is markup or the last element. + // + // This is because in Markup mode, we want to examine all nodes + // touching a replacement but in code we want to atomically replace. + if child_span.contains(&self.replace_range.start) + || (mode == TokenMode::Markup + && self.replace_range.start == child_span.end) + { + first = Some((i, offset)); + break; + } + + offset += child.len(); + at_start = child.kind().is_at_start(at_start); + } + + let (first_idx, first_start) = first?; + let mut last = None; + + // Find the the last child in the range of children to reparse. + for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) { + let child_span = offset .. offset + child.len(); + + // Similarly to above, the end of the edit must be in the node but + // if it is at the edge and we are in markup node, we also want its + // neighbor! + if child_span.contains(&self.replace_range.end) + || self.replace_range.end == child_span.end + && (mode != TokenMode::Markup || i + 1 == original_count) + { + outermost &= i + 1 == original_count; + last = Some((i, offset + child.len())); + break; + } else if mode != TokenMode::Markup + || !child.kind().succession_rule().safe_in_markup() + { + break; + } + + offset += child.len(); + } + + let (last_idx, last_end) = last?; + let superseded_range = first_idx .. last_idx + 1; + let superseded_span = first_start .. last_end; + let last_kind = green.children()[last_idx].kind().clone(); + + // First, we try if the child itself has another, more specific + // applicable child. + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; + let prev_len = child.len(); + + if last_kind.succession_rule() != SuccessionRule::Unsafe { + if let Some(range) = match child { + Green::Node(node) => self.reparse_step( + Rc::make_mut(node), + first_start, + child_mode, + outermost, + ), + Green::Token(_) => None, + } { + let new_len = child.len(); + green.update_parent(new_len, prev_len); + return Some(range); + } + } + } + + // We only replace multiple children in markup mode. + if superseded_range.len() > 1 && mode == TokenMode::Code { + return None; + } + + // We now have a child that we can replace and a function to do so. + let func = last_kind.reparsing_func(child_mode, indent)?; + let succession = last_kind.succession_rule(); + + let mut markup_min_column = 0; + + // If this is a markup node, we want to save its indent instead to pass + // the right indent argument. + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; + if let NodeKind::Markup(n) = child.kind() { + markup_min_column = *n; + } + } + + // The span of the to-be-reparsed children in the new source. + let newborn_span = superseded_span.start + .. + superseded_span.end + self.replace_len - self.replace_range.len(); + + // For atomic primaries we need to pass in the whole remaining string to + // check whether the parser would eat more stuff illicitly. + let reparse_span = if succession == SuccessionRule::AtomicPrimary { + newborn_span.start .. self.src.len() + } else { + newborn_span.clone() + }; + + let mut prefix = ""; + for (i, c) in self.src[.. reparse_span.start].char_indices().rev() { + if is_newline(c) { + break; + } + prefix = &self.src[i .. reparse_span.start]; + } + + // Do the reparsing! + let (mut newborns, terminated) = func( + &prefix, + &self.src[reparse_span.clone()], + at_start, + markup_min_column, + )?; + + // Make sure that atomic primaries ate only what they were supposed to. + if succession == SuccessionRule::AtomicPrimary { + let len = newborn_span.len(); + if newborns.len() > 1 && newborns[0].len() == len { + newborns.truncate(1); + } else if newborns.iter().map(Green::len).sum::() != len { + return None; + } + } + + // Do not accept unclosed nodes if the old node wasn't at the right edge + // of the tree. + if !outermost && !terminated { + return None; + } + + // If all post- and preconditions match, we are good to go! + if validate( + green.children(), + superseded_range.clone(), + at_start, + &newborns, + mode, + succession, + newborn_span.clone(), + self.src, + ) { + green.replace_children(superseded_range, newborns); + Some(newborn_span) + } else { + None + } + } +} + +/// Validate that a node replacement is allowed by post- and preconditions. +fn validate( + superseded: &[Green], + superseded_range: Range, + mut at_start: bool, + newborns: &[Green], + mode: TokenMode, + post: SuccessionRule, + newborn_span: Range, + src: &str, +) -> bool { + // Atomic primaries must only generate one new child. + if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 { + return false; + } + + // Same kind in mode `inside` must generate only one child and that child + // must be of the same kind as previously. + if let SuccessionRule::SameKind(inside) = post { + let superseded_kind = superseded[superseded_range.start].kind(); + let superseded_mode = superseded_kind.mode().unwrap_or(mode); + if inside.map_or(true, |m| m == superseded_mode) + && (newborns.len() != 1 || superseded_kind != newborns[0].kind()) + { + return false; + } + } + + // Neighbor invariants are only relevant in markup mode. + if mode == TokenMode::Code { + return true; + } + + // Check if there are any `AtStart` predecessors which require a certain + // indentation. + let s = Scanner::new(src); + let mut prev_pos = newborn_span.start; + for child in (&superseded[.. superseded_range.start]).iter().rev() { + prev_pos -= child.len(); + if !child.kind().is_trivia() { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { + let left_col = s.column(prev_pos); + + // Search for the first non-trivia newborn. + let mut new_pos = newborn_span.start; + let mut child_col = None; + for child in newborns { + if !child.kind().is_trivia() { + child_col = Some(s.column(new_pos)); + break; + } + + new_pos += child.len(); + } + + if let Some(child_col) = child_col { + if child_col > left_col { + return false; + } + } + } + + break; + } + } + + // Compute the at_start state behind the new children. + for child in newborns { + at_start = child.kind().is_at_start(at_start); + } + + // Ensure that a possible at-start or not-at-start precondition of + // a node after the replacement range is satisfied. + for child in &superseded[superseded_range.end ..] { + let neighbour_rule = child.kind().neighbour_rule(); + if (neighbour_rule == NeighbourRule::AtStart && !at_start) + || (neighbour_rule == NeighbourRule::NotAtStart && at_start) + { + return false; + } + + if !child.kind().is_trivia() { + break; + } + + at_start = child.kind().is_at_start(at_start); + } + + // Verify that the last of the newborns is not `NotAtEnd`. + if newborns.last().map_or(false, |child| { + child.kind().neighbour_rule() == NeighbourRule::NotAtEnd + }) { + return false; + } + + // We have to check whether the last non-trivia newborn is `AtStart` and + // verify the indent of its right neighbors in order to make sure its + // indentation requirements are fulfilled. + let mut child_pos = newborn_span.end; + for child in newborns.iter().rev() { + child_pos -= child.len(); + + if child.kind().is_trivia() { + continue; + } + + if child.kind().neighbour_rule() == NeighbourRule::AtStart { + let child_col = s.column(child_pos); + + let mut right_pos = newborn_span.end; + for child in &superseded[superseded_range.end ..] { + if child.kind().is_trivia() { + right_pos += child.len(); + continue; + } + + if s.column(right_pos) > child_col { + return false; + } + break; + } + } + break; + } + + true +} + +impl NodeKind { + /// Return the correct reparsing function given the postconditions for the + /// type. + fn reparsing_func( + &self, + parent_mode: TokenMode, + indent: usize, + ) -> Option Option<(Vec, bool)>> { + let mode = self.mode().unwrap_or(parent_mode); + match self.succession_rule() { + SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, + SuccessionRule::AtomicPrimary if mode == TokenMode::Code => { + Some(parse_atomic) + } + SuccessionRule::AtomicPrimary => Some(parse_atomic_markup), + SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Markup(_) => Some(parse_markup), + NodeKind::Template => Some(parse_template), + NodeKind::Block => Some(parse_block), + NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), + _ => None, + }, + _ => match mode { + TokenMode::Markup if indent == 0 => Some(parse_markup_elements), + _ => return None, + }, + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn succession_rule(&self) -> SuccessionRule { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => SuccessionRule::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => SuccessionRule::Unsafe, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => SuccessionRule::Unsafe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => SuccessionRule::Unsafe, + + // This can be anything, so we don't make any promises. + Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe, + + // These are complex expressions which may screw with their + // environments. + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => SuccessionRule::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer, + + // Missing these creates errors for the parents. + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + SuccessionRule::UnsafeLayer + } + + // Only markup is expected at the points where it does occur. The + // indentation must be preserved as well, also for the children. + Self::Markup(_) => SuccessionRule::SameKind(None), + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None), + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => SuccessionRule::SameKind(None), + Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)), + + // Whitespace in code mode has to remain whitespace or else the type + // of things would change. + Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)), + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::None + | Self::Auto => SuccessionRule::AtomicPrimary, + + // More complex, but still an expression. + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => SuccessionRule::AtomicPrimary, + + // This element always has to remain in the same column so better + // reparse the whole parent. + Self::Raw(_) => SuccessionRule::Unsafe, + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Math(_) => SuccessionRule::Safe, + } + } + + /// The appropriate precondition for the type. + pub fn neighbour_rule(&self) -> NeighbourRule { + match self { + Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart, + Self::TextInLine(_) => NeighbourRule::NotAtStart, + Self::Error(_, _) => NeighbourRule::NotAtEnd, + _ => NeighbourRule::None, + } + } +} + +impl SuccessionRule { + /// Whether a node with this condition can be reparsed in markup mode. + pub fn safe_in_markup(&self) -> bool { + match self { + Self::Safe | Self::UnsafeLayer => true, + Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup), + _ => false, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parse::parse; + use crate::source::SourceFile; + + #[test] + #[rustfmt::skip] + fn test_incremental_parse() { + #[track_caller] + fn test(prev: &str, range: Range, with: &str, goal: Range) { + let mut source = SourceFile::detached(prev); + let range = source.edit(range, with); + assert_eq!(range, goal); + assert_eq!(parse(source.src()), *source.root()); + } + + // Test simple replacements. + test("hello world", 6 .. 11, "walkers", 5 .. 13); + test("some content", 0..12, "", 0..0); + test("", 0..0, "do it", 0..5); + test("a d e", 1 .. 3, " b c d", 0 .. 8); + test("a #f() e", 1 .. 6, " b c d", 0 .. 8); + test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); + test("= A heading", 3 .. 3, "n evocative", 2 .. 22); + test("your thing", 5 .. 5, "a", 4 .. 11); + test("a your thing a", 6 .. 7, "a", 2 .. 12); + test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); + test("#call() abc", 7 .. 7, "[]", 0 .. 10); + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 3 .. 34); + test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); + test("hello {x}", 6 .. 9, "#f()", 5 .. 10); + test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); + test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); + test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); + test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); + + // Test the whitespace invariants. + test("hello \\ world", 7 .. 8, "a ", 6 .. 14); + test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("x = y", 1 .. 1, " + y", 0 .. 6); + test("x = y", 1 .. 1, " + y\n", 0 .. 10); + test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); + test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22); + test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); + test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); + + // Test type invariants. + test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); + test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); + test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); + test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17); + test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); + test("a // b c #f()", 3 .. 4, "", 0 .. 12); + test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); + test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); + test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); + test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); + test("a b c", 1 .. 1, "{[}", 0 .. 8); + + // Test unclosed things. + test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); + test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); + test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); + test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); + test("a b c", 1 .. 1, " /* letters */", 0 .. 16); + test("a b c", 1 .. 1, " /* letters", 0 .. 16); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); + + // Test raw tokens. + test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); + test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); + } +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 10aaad234..a97526453 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,10 +1,12 @@ //! Parsing and tokenization. +mod incremental; mod parser; mod resolve; mod scanner; mod tokens; +pub use incremental::*; pub use parser::*; pub use resolve::*; pub use scanner::*; @@ -14,10 +16,11 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; +use crate::util::EcoString; /// Parse a source file. pub fn parse(src: &str) -> Rc { - let mut p = Parser::new(src); + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -25,9 +28,108 @@ pub fn parse(src: &str) -> Rc { } } +/// Parse an atomic primary. Returns `Some` if all of the input was consumed. +pub fn parse_atomic( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); + primary(&mut p, true).ok()?; + p.consume_unterminated() +} + +/// Parse an atomic primary. Returns `Some` if all of the input was consumed. +pub fn parse_atomic_markup( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + markup_expr(&mut p); + p.consume_unterminated() +} + +/// Parse some markup. Returns `Some` if all of the input was consumed. +pub fn parse_markup( + prefix: &str, + src: &str, + _: bool, + min_column: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + if min_column == 0 { + markup(&mut p); + } else { + markup_indented(&mut p, min_column); + } + p.consume() +} + +/// Parse some markup without the topmost node. Returns `Some` if all of the +/// input was consumed. +pub fn parse_markup_elements( + prefix: &str, + src: &str, + mut at_start: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + while !p.eof() { + markup_node(&mut p, &mut at_start); + } + p.consume() +} + +/// Parse a template literal. Returns `Some` if all of the input was consumed. +pub fn parse_template( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); + if !p.at(&NodeKind::LeftBracket) { + return None; + } + + template(&mut p); + p.consume() +} + +/// Parse a code block. Returns `Some` if all of the input was consumed. +pub fn parse_block( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); + if !p.at(&NodeKind::LeftBrace) { + return None; + } + + block(&mut p); + p.consume() +} + +/// Parse a comment. Returns `Some` if all of the input was consumed. +pub fn parse_comment( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); + comment(&mut p).ok()?; + p.consume() +} + /// Parse markup. fn markup(p: &mut Parser) { - markup_while(p, true, &mut |_| true) + markup_while(p, true, 0, &mut |_| true) } /// Parse markup that stays right of the given column. @@ -38,7 +140,7 @@ fn markup_indented(p: &mut Parser, column: usize) { _ => false, }); - markup_while(p, false, &mut |p| match p.peek() { + markup_while(p, false, column, &mut |p| match p.peek() { Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) @@ -48,11 +150,11 @@ fn markup_indented(p: &mut Parser, column: usize) { /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. -fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) +fn markup_while(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - p.perform(NodeKind::Markup, |p| { + p.perform(NodeKind::Markup(column), |p| { while !p.eof() && f(p) { markup_node(p, &mut at_start); } @@ -98,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Eq if *at_start => heading(p), - NodeKind::Minus if *at_start => list_node(p), - NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - - // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::Text(p.peek_src().into())); - } + NodeKind::Eq => heading(p, *at_start), + NodeKind::Minus => list_node(p, *at_start), + NodeKind::EnumNumbering(_) => enum_node(p, *at_start), // Hashtag + keyword / identifier. NodeKind::Ident(_) @@ -115,17 +212,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::While | NodeKind::For | NodeKind::Import - | NodeKind::Include => { - let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import); - let group = if stmt { Group::Stmt } else { Group::Expr }; - - p.start_group(group); - let res = expr_prec(p, true, 0); - if stmt && res.is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); - } - p.end_group(); - } + | NodeKind::Include => markup_expr(p), // Block and template. NodeKind::LeftBrace => block(p), @@ -139,31 +226,65 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { } /// Parse a heading. -fn heading(p: &mut Parser) { - p.perform(NodeKind::Heading, |p| { - p.eat_assert(&NodeKind::Eq); - while p.eat_if(&NodeKind::Eq) {} +fn heading(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let current_start = p.current_start(); + p.eat_assert(&NodeKind::Eq); + while p.eat_if(&NodeKind::Eq) {} + + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Heading); + } else { + let text = p.get(current_start .. p.prev_end()).into(); + marker.convert(p, NodeKind::TextInLine(text)); + } } /// Parse a single list item. -fn list_node(p: &mut Parser) { - p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::Minus); +fn list_node(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let text: EcoString = p.peek_src().into(); + p.eat_assert(&NodeKind::Minus); + + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::List); + } else { + marker.convert(p, NodeKind::TextInLine(text)); + } } /// Parse a single enum item. -fn enum_node(p: &mut Parser) { - p.perform(NodeKind::Enum, |p| { - p.eat(); +fn enum_node(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let text: EcoString = p.peek_src().into(); + p.eat(); + + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Enum); + } else { + marker.convert(p, NodeKind::TextInLine(text)); + } +} + +/// Parse an expression within markup mode. +fn markup_expr(p: &mut Parser) { + if let Some(token) = p.peek() { + let stmt = matches!(token, NodeKind::Let | NodeKind::Set | NodeKind::Import); + let group = if stmt { Group::Stmt } else { Group::Expr }; + + p.start_group(group); + let res = expr_prec(p, true, 0); + if stmt && res.is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); + } } /// Parse an expression. @@ -183,13 +304,13 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { // Start the unary expression. match p.peek().and_then(UnOp::from_token) { - Some(op) => { + Some(op) if !atomic => { p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } - None => primary(p, atomic)?, + _ => primary(p, atomic)?, }; loop { @@ -254,7 +375,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { } // Structures. - Some(NodeKind::LeftParen) => parenthesized(p), + Some(NodeKind::LeftParen) => parenthesized(p, atomic), Some(NodeKind::LeftBracket) => { template(p); Ok(()) @@ -315,7 +436,7 @@ fn literal(p: &mut Parser) -> bool { /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression -fn parenthesized(p: &mut Parser) -> ParseResult { +fn parenthesized(p: &mut Parser, atomic: bool) -> ParseResult { let marker = p.marker(); p.start_group(Group::Paren); @@ -330,7 +451,7 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.at(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { params(p, marker); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); @@ -706,3 +827,14 @@ fn body(p: &mut Parser) -> ParseResult { } Ok(()) } + +/// Parse a comment. +fn comment(p: &mut Parser) -> ParseResult { + match p.peek() { + Some(NodeKind::LineComment | NodeKind::BlockComment) => { + p.eat(); + Ok(()) + } + _ => Err(ParseError), + } +} diff --git a/src/parse/parser.rs b/src/parse/parser.rs index af8a7c5ca..4e5b277d2 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,8 @@ +use core::slice::SliceIndex; use std::fmt::{self, Display, Formatter}; use std::mem; -use super::{TokenMode, Tokens}; +use super::{Scanner, TokenMode, Tokens}; use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -21,12 +22,17 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, + /// Is `Some` if there is an unterminated group at the last position where + /// groups were terminated. + last_unterminated: Option, + /// Offsets the indentation on the first line of the source. + column_offset: usize, } impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(src: &'s str) -> Self { - let mut tokens = Tokens::new(src, TokenMode::Markup); + pub fn new(src: &'s str, mode: TokenMode) -> Self { + let mut tokens = Tokens::new(src, mode); let current = tokens.next(); Self { tokens, @@ -36,14 +42,38 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], + last_unterminated: None, + column_offset: 0, } } + /// Create a new parser for the source string that is prefixed by some text + /// that does not need to be parsed but taken into account for column + /// calculation. + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { + let mut p = Self::new(src, mode); + p.column_offset = Scanner::new(prefix).column(prefix.len()); + p + } + /// End the parsing process and return the last child. pub fn finish(self) -> Vec { self.children } + /// End the parsing process and return multiple children and whether the + /// last token was terminated. + pub fn consume(self) -> Option<(Vec, bool)> { + (self.eof() && self.terminated()) + .then(|| (self.children, self.tokens.terminated())) + } + + /// End the parsing process and return multiple children and whether the + /// last token was terminated, even if there remains stuff in the string. + pub fn consume_unterminated(self) -> Option<(Vec, bool)> { + self.terminated().then(|| (self.children, self.tokens.terminated())) + } + /// Create a new marker. pub fn marker(&mut self) -> Marker { Marker(self.children.len()) @@ -170,6 +200,14 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } + /// Obtain a range of the source code. + pub fn get(&self, index: I) -> &'s str + where + I: SliceIndex, + { + self.tokens.scanner().get(index) + } + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end @@ -187,7 +225,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.tokens.scanner().column_offset(index, self.column_offset) } /// Continue parsing in a group. @@ -225,6 +263,9 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); + if self.last_unterminated != Some(self.prev_end()) { + self.last_unterminated = None; + } let mut rescan = self.tokens.mode() != group_mode; @@ -243,6 +284,7 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); + self.last_unterminated = Some(self.prev_end()); } } @@ -260,6 +302,11 @@ impl<'s> Parser<'s> { } } + /// Checks if all groups were correctly terminated. + pub fn terminated(&self) -> bool { + self.groups.is_empty() && self.last_unterminated.is_none() + } + /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { @@ -320,7 +367,8 @@ impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { let error = NodeKind::Error(ErrorPos::Full, msg.into()); - self.children.push(GreenData::new(error, 0).into()); + let idx = self.trivia_start(); + self.children.insert(idx.0, GreenData::new(error, 0).into()); } /// Eat the current token and add an error that it is unexpected. @@ -419,6 +467,7 @@ impl Marker { } /// A logical group of tokens, e.g. `[...]`. +#[derive(Debug)] struct GroupEntry { /// The kind of group this is. This decides which tokens will end the group. /// For example, a [`Group::Paren`] will be ended by diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index c735be407..6db891323 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -162,11 +162,26 @@ impl<'s> Scanner<'s> { /// The column index of a given index in the source string. #[inline] pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() + self.column_offset(index, 0) + } + + /// The column index of a given index in the source string when an offset is + /// applied to the first line of the string. + #[inline] + pub fn column_offset(&self, index: usize, offset: usize) -> usize { + let mut apply_offset = false; + let res = self.src[.. index] + .char_indices() .rev() - .take_while(|&c| !is_newline(c)) - .count() + .take_while(|&(_, c)| !is_newline(c)) + .inspect(|&(i, _)| { + if i == 0 { + apply_offset = true + } + }) + .count(); + + if apply_offset { res + offset } else { res } } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 27ec046df..69c4d2dee 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -13,6 +13,7 @@ use crate::util::EcoString; pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, + terminated: bool, } /// What kind of tokens to emit. @@ -28,7 +29,11 @@ impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self { s: Scanner::new(src), mode } + Self { + s: Scanner::new(src), + mode, + terminated: true, + } } /// Get the current token mode. @@ -63,6 +68,12 @@ impl<'s> Tokens<'s> { pub fn scanner(&self) -> Scanner<'s> { self.s } + + /// Whether the last token was terminated. + #[inline] + pub fn terminated(&self) -> bool { + self.terminated + } } impl<'s> Iterator for Tokens<'s> { @@ -117,9 +128,7 @@ impl<'s> Tokens<'s> { '`' => self.raw(), '$' => self.math(), '-' => self.hyph(), - '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => { - NodeKind::Eq - } + '=' => NodeKind::Eq, c if c == '.' || c.is_ascii_digit() => self.numbering(start, c), // Plain text. @@ -248,6 +257,7 @@ impl<'s> Tokens<'s> { ) } } else { + self.terminated = false; NodeKind::Error( ErrorPos::End, "expected closing brace".into(), @@ -281,10 +291,8 @@ impl<'s> Tokens<'s> { } else { NodeKind::EnDash } - } else if self.s.check_or(true, char::is_whitespace) { - NodeKind::Minus } else { - NodeKind::Text('-'.into()) + NodeKind::Minus } } @@ -300,11 +308,7 @@ impl<'s> Tokens<'s> { None }; - if self.s.check_or(true, char::is_whitespace) { - NodeKind::EnumNumbering(number) - } else { - NodeKind::Text(self.s.eaten_from(start).into()) - } + NodeKind::EnumNumbering(number) } fn raw(&mut self) -> NodeKind { @@ -346,6 +350,7 @@ impl<'s> Tokens<'s> { let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; + self.terminated = false; NodeKind::Error( ErrorPos::End, if found == 0 { @@ -393,6 +398,7 @@ impl<'s> Tokens<'s> { display, })) } else { + self.terminated = false; NodeKind::Error( ErrorPos::End, if !display || (!escaped && dollar) { @@ -481,18 +487,23 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { + self.terminated = false; NodeKind::Error(ErrorPos::End, "expected quote".into()) } } fn line_comment(&mut self) -> NodeKind { self.s.eat_until(is_newline); + if self.s.peek().is_none() { + self.terminated = false; + } NodeKind::LineComment } fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; + self.terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -500,6 +511,7 @@ impl<'s> Tokens<'s> { ('*', '/') => { depth -= 1; if depth == 0 { + self.terminated = true; break; } '_' @@ -713,6 +725,7 @@ mod tests { t!(Both["a1/"]: " \n" => Space(1)); t!(Both["a1/"]: " \n " => Space(1)); t!(Both["a1/"]: "\r\n" => Space(1)); + t!(Both["a1/"]: "\r\n\r" => Space(2)); t!(Both["a1/"]: " \n\t \n " => Space(2)); t!(Both["a1/"]: "\n\r" => Space(2)); t!(Both["a1/"]: " \r\r\n \x0D" => Space(3)); @@ -722,12 +735,12 @@ mod tests { fn test_tokenize_text() { // Test basic text. t!(Markup[" /"]: "hello" => Text("hello")); - t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world")); + t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world")); // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():\"b")); t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+")); - t!(Markup[" /"]: "=-a" => Text("="), Text("-"), Text("a")); + t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a")); t!(Markup[" "]: "#123" => Text("#"), Text("123")); // Test text ends. @@ -784,7 +797,7 @@ mod tests { t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); - t!(Markup[" "]: "1.a" => Text("1."), Text("a")); + t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a")); t!(Markup[" /"]: "a1." => Text("a1.")); } diff --git a/src/source.rs b/src/source.rs index 432688a0b..7afeaa8a3 100644 --- a/src/source.rs +++ b/src/source.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; -use crate::parse::{is_newline, parse, Scanner}; +use crate::parse::{is_newline, parse, Reparser, Scanner}; use crate::syntax::ast::Markup; use crate::syntax::{self, Category, GreenNode, RedNode}; use crate::util::PathExt; @@ -154,9 +154,14 @@ impl SourceFile { &self.root } + /// The root red node of the file's untyped red tree. + pub fn red(&self) -> RedNode { + RedNode::from_root(self.root.clone(), self.id) + } + /// The root node of the file's typed abstract syntax tree. pub fn ast(&self) -> TypResult { - let red = RedNode::from_root(self.root.clone(), self.id); + let red = self.red(); let errors = red.errors(); if errors.is_empty() { Ok(red.cast().unwrap()) @@ -265,10 +270,11 @@ impl SourceFile { /// Edit the source file by replacing the given range. /// - /// This panics if the `replace` range is out of bounds. - pub fn edit(&mut self, replace: Range, with: &str) { + /// Returns the range of the section in the new source that was ultimately + /// reparsed. The method panics if the `replace` range is out of bounds. + pub fn edit(&mut self, replace: Range, with: &str) -> Range { let start = replace.start; - self.src.replace_range(replace, with); + self.src.replace_range(replace.clone(), with); // Remove invalidated line starts. let line = self.byte_to_line(start).unwrap(); @@ -283,8 +289,8 @@ impl SourceFile { self.line_starts .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); - // Reparse. - self.root = parse(&self.src); + // Incrementally reparse the replaced range. + Reparser::new(&self.src, replace, with.len()).reparse(&mut self.root) } /// Provide highlighting categories for the given range of the source file. diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ae8ecdc99..bea4ef000 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -53,7 +53,7 @@ macro_rules! node { node! { /// The syntactical root capable of representing a full parsed document. - Markup + Markup: NodeKind::Markup(_) } impl Markup { @@ -65,7 +65,9 @@ impl Markup { NodeKind::Parbreak => Some(MarkupNode::Parbreak), NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), - NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), + NodeKind::Text(s) | NodeKind::TextInLine(s) => { + Some(MarkupNode::Text(s.clone())) + } NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 85fbef12f..9f7365a81 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -154,10 +154,11 @@ impl Category { NodeKind::Str(_) => Some(Category::String), NodeKind::Error(_, _) => Some(Category::Invalid), NodeKind::Unknown(_) => Some(Category::Invalid), - NodeKind::Markup => None, + NodeKind::Markup(_) => None, NodeKind::Space(_) => None, NodeKind::Parbreak => None, NodeKind::Text(_) => None, + NodeKind::TextInLine(_) => None, NodeKind::List => None, NodeKind::Enum => None, NodeKind::Array => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index d9ad42a88..3a0f3a5e0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -6,6 +6,7 @@ mod pretty; mod span; use std::fmt::{self, Debug, Display, Formatter}; +use std::ops::Range; use std::rc::Rc; pub use highlight::*; @@ -15,6 +16,7 @@ pub use span::*; use self::ast::{MathNode, RawNode, TypedNode}; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::TokenMode; use crate::source::SourceId; use crate::util::EcoString; @@ -62,6 +64,14 @@ impl Green { } } + /// Whether the node is a leaf node in the green tree. + pub fn is_leaf(&self) -> bool { + match self { + Green::Node(n) => n.children().is_empty(), + Green::Token(_) => true, + } + } + /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { @@ -127,6 +137,52 @@ impl GreenNode { pub fn children(&self) -> &[Green] { &self.children } + + /// The node's metadata. + fn data(&self) -> &GreenData { + &self.data + } + + /// The node's type. + pub fn kind(&self) -> &NodeKind { + self.data().kind() + } + + /// The node's length. + pub fn len(&self) -> usize { + self.data().len() + } + + /// The node's children, mutably. + pub(crate) fn children_mut(&mut self) -> &mut [Green] { + &mut self.children + } + + /// Replaces a range of children with some replacement. + pub(crate) fn replace_children( + &mut self, + range: Range, + replacement: Vec, + ) { + let superseded = &self.children[range.clone()]; + let superseded_len: usize = superseded.iter().map(Green::len).sum(); + let replacement_len: usize = replacement.iter().map(Green::len).sum(); + + // If we're erroneous, but not due to the superseded range, then we will + // still be erroneous after the replacement. + let still_erroneous = self.erroneous && !superseded.iter().any(Green::erroneous); + + self.children.splice(range, replacement); + self.data.len = self.data.len + replacement_len - superseded_len; + self.erroneous = still_erroneous || self.children.iter().any(Green::erroneous); + } + + /// Update the length of this node given the old and new length of + /// replaced children. + pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) { + self.data.len = self.data.len() + new_len - old_len; + self.erroneous = self.children.iter().any(Green::erroneous); + } } impl From for Green { @@ -266,7 +322,7 @@ impl Debug for RedNode { } } -/// A borrowed wrapper for a green node with span information. +/// A borrowed wrapper for a [`GreenNode`] with span information. /// /// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. #[derive(Copy, Clone, PartialEq)] @@ -301,6 +357,11 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// Whether the node is a leaf node. + pub fn is_leaf(self) -> bool { + self.green.is_leaf() + } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -325,6 +386,15 @@ impl<'a> RedRef<'a> { } } + /// Returns all leaf descendants of this node (may include itself). + pub fn leafs(self) -> Vec { + if self.is_leaf() { + vec![self] + } else { + self.children().flat_map(Self::leafs).collect() + } + } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where @@ -502,8 +572,8 @@ pub enum NodeKind { Include, /// The `from` keyword. From, - /// Template markup. - Markup, + /// Template markup of which all lines must start in some column. + Markup(usize), /// One or more whitespace characters. Space(usize), /// A forced line break: `\`. @@ -512,6 +582,8 @@ pub enum NodeKind { Parbreak, /// A consecutive non-markup string. Text(EcoString), + /// A text node that cannot appear at the beginning of a source line. + TextInLine(EcoString), /// A non-breaking space: `~`. NonBreakingSpace, /// An en-dash: `--`. @@ -648,11 +720,71 @@ impl NodeKind { matches!(self, Self::LeftParen | Self::RightParen) } + /// Whether this is whitespace. + pub fn is_whitespace(&self) -> bool { + matches!(self, Self::Space(_) | Self::Parbreak) + } + + /// Whether this is trivia. + pub fn is_trivia(&self) -> bool { + self.is_whitespace() || matches!(self, Self::LineComment | Self::BlockComment) + } + /// Whether this is some kind of error. pub fn is_error(&self) -> bool { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } + /// Whether this node is `at_start` given the previous value of the property. + pub fn is_at_start(&self, prev: bool) -> bool { + match self { + Self::Space(n) if *n > 0 => true, + Self::Parbreak => true, + Self::LineComment | Self::BlockComment => prev, + _ => false, + } + } + + /// Whether this token appears in Markup. + pub fn mode(&self) -> Option { + match self { + Self::Markup(_) + | Self::Linebreak + | Self::Parbreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::EnumNumbering(_) + | Self::List + | Self::Raw(_) + | Self::Math(_) => Some(TokenMode::Markup), + Self::Template + | Self::Space(_) + | Self::Block + | Self::Ident(_) + | Self::LetExpr + | Self::IfExpr + | Self::WhileExpr + | Self::ForExpr + | Self::ImportExpr + | Self::Call + | Self::IncludeExpr + | Self::LineComment + | Self::BlockComment + | Self::Error(_, _) + | Self::Minus + | Self::Eq => None, + _ => Some(TokenMode::Code), + } + } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { @@ -701,11 +833,11 @@ impl NodeKind { Self::Import => "keyword `import`", Self::Include => "keyword `include`", Self::From => "keyword `from`", - Self::Markup => "markup", + Self::Markup(_) => "markup", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", - Self::Text(_) => "text", + Self::Text(_) | Self::TextInLine(_) => "text", Self::NonBreakingSpace => "non-breaking space", Self::EnDash => "en dash", Self::EmDash => "em dash", diff --git a/tests/typ/code/block.typ b/tests/typ/code/block.typ index 45ee92045..5939ba9c5 100644 --- a/tests/typ/code/block.typ +++ b/tests/typ/code/block.typ @@ -129,7 +129,7 @@ } --- -// Error: 2:1 expected closing brace +// Error: 2 expected closing brace { --- diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index 7fd6e0da7..a95d651aa 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -57,7 +57,7 @@ Three // Terminated by semicolon even though we are in a paren group. // Error: 18 expected expression -// Error: 19 expected closing paren +// Error: 18 expected closing paren #let v5 = (1, 2 + ; Five --- diff --git a/tests/typeset.rs b/tests/typeset.rs index 164ccc913..b1296886a 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -1,6 +1,7 @@ use std::env; use std::ffi::OsStr; use std::fs; +use std::ops::Range; use std::path::Path; use std::rc::Rc; @@ -186,6 +187,7 @@ fn test( let mut line = 0; let mut compare_ref = true; let mut compare_ever = false; + let mut rng = LinearShift::new(); let parts: Vec<_> = src.split("\n---").collect(); for (i, &part) in parts.iter().enumerate() { @@ -202,8 +204,16 @@ fn test( } } } else { - let (part_ok, compare_here, part_frames) = - test_part(ctx, src_path, part.into(), i, compare_ref, line, debug); + let (part_ok, compare_here, part_frames) = test_part( + ctx, + src_path, + part.into(), + i, + compare_ref, + line, + debug, + &mut rng, + ); ok &= part_ok; compare_ever |= compare_here; frames.extend(part_frames); @@ -252,14 +262,15 @@ fn test_part( compare_ref: bool, line: usize, debug: bool, + rng: &mut LinearShift, ) -> (bool, bool, Vec>) { let id = ctx.sources.provide(src_path, src); let source = ctx.sources.get(id); let (local_compare_ref, mut ref_errors) = parse_metadata(&source); let compare_ref = local_compare_ref.unwrap_or(compare_ref); + let mut ok = test_reparse(ctx.sources.get(id).src(), i, rng); - let mut ok = true; let (frames, mut errors) = match ctx.evaluate(id) { Ok(module) => { let tree = module.into_root(); @@ -366,6 +377,104 @@ fn test_incremental( ok } +/// Pseudorandomly edit the source file and test whether a reparse produces the +/// same result as a clean parse. +/// +/// The method will first inject 10 strings once every 400 source characters +/// and then select 5 leaf node boundries to inject an additional, randomly +/// chosen string from the injection list. +fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { + let supplements = [ + "[", + ")", + "#rect()", + "a word", + ", a: 1", + "10.0", + ":", + "if i == 0 {true}", + "for", + "* hello *", + "//", + "/*", + "\\u{12e4}", + "```typst", + " ", + "trees", + "\\", + "$ a $", + "2.", + "-", + "5", + ]; + + let mut ok = true; + + let apply = |replace: std::ops::Range, with| { + let mut incr_source = SourceFile::detached(src); + if incr_source.root().len() != src.len() { + println!( + " Subtest {} tree length {} does not match string length {} ❌", + i, + incr_source.root().len(), + src.len(), + ); + return false; + } + + incr_source.edit(replace.clone(), with); + let edited_src = incr_source.src(); + + let ref_source = SourceFile::detached(edited_src); + let incr_root = incr_source.root(); + let ref_root = ref_source.root(); + if incr_root != ref_root { + println!( + " Subtest {} reparse differs from clean parse when inserting '{}' at {}-{} ❌", + i, with, replace.start, replace.end, + ); + println!( + "\n Expected reference tree:\n{:#?}\n\n Found incremental tree:\n{:#?}", + ref_root, incr_root + ); + println!("Full source ({}):\n\"{:?}\"", edited_src.len(), edited_src); + false + } else { + true + } + }; + + let mut pick = |range: Range| { + let ratio = rng.next(); + (range.start as f64 + ratio * (range.end - range.start) as f64).floor() as usize + }; + + let insertions = (src.len() as f64 / 400.0).ceil() as usize; + + for _ in 0 .. insertions { + let supplement = supplements[pick(0 .. supplements.len())]; + let start = pick(0 .. src.len()); + let end = pick(start .. src.len()); + + if !src.is_char_boundary(start) || !src.is_char_boundary(end) { + continue; + } + + ok &= apply(start .. end, supplement); + } + + let red = SourceFile::detached(src).red(); + + let leafs = red.as_ref().leafs(); + + let leaf_start = leafs[pick(0 .. leafs.len())].span().start; + let supplement = supplements[pick(0 .. supplements.len())]; + + ok &= apply(leaf_start .. leaf_start, supplement); + + ok +} + fn parse_metadata(source: &SourceFile) -> (Option, Vec) { let mut compare_ref = None; let mut errors = vec![]; @@ -823,3 +932,24 @@ where FileDescriptor::redirect_stdio(&stdout, Stdout).unwrap(); result } + +/// This is an Linear-feedback shift register using XOR as its shifting +/// function. It can be used as PRNG. +struct LinearShift(u64); + +impl LinearShift { + /// Initialize the shift register with a pre-set seed. + pub fn new() -> Self { + Self(0xACE5) + } + + /// Return a pseudo-random number between `0.0` and `1.0`. + pub fn next(&mut self) -> f64 { + self.0 ^= self.0 >> 3; + self.0 ^= self.0 << 14; + self.0 ^= self.0 >> 28; + self.0 ^= self.0 << 36; + self.0 ^= self.0 >> 52; + self.0 as f64 / u64::MAX as f64 + } +}