From 5f114e18eb76a1937941b2ea64842b908c9ad89e Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 2 Jan 2022 00:46:19 +0100 Subject: [PATCH] Added a test framework for incremental parsing Fix several errors: - Indented markup is now reparsed right - All end group errors will now fail a reparse - Rightmost errors will always fail a reparse --- src/parse/incremental.rs | 69 ++++++++++++++---- src/parse/mod.rs | 55 +++++++++----- src/parse/parser.rs | 54 ++++++++++++-- src/parse/tokens.rs | 16 ++-- src/syntax/ast.rs | 2 +- src/syntax/highlight.rs | 2 +- src/syntax/mod.rs | 29 +++++++- tests/typ/code/block.typ | 2 +- tests/typ/code/let.typ | 2 +- tests/typeset.rs | 153 +++++++++++++++++++++++++++++++++++++-- 10 files changed, 322 insertions(+), 62 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 0e2d196c4..1ee37a511 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -47,6 +47,10 @@ pub enum Precondition { /// safe left neighbors has to check this invariant. Otherwise, this node is /// safe. NotAtStart, + /// These nodes could end up somewhere else up the tree if the parse was + /// happening from scratch. The parse result has to be checked for such + /// nodes. They are safe to add if followed up by other nodes. + NotAtEnd, /// No additional requirements. None, } @@ -88,6 +92,12 @@ impl Reparser<'_> { let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let child_count = green.children().len(); + // Save the current indent if this is a markup node. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => 0, + }; + let mut first = None; let mut at_start = true; @@ -170,12 +180,29 @@ impl Reparser<'_> { } // We now have a child that we can replace and a function to do so. - let func = last_kind.reparsing_func(child_mode)?; + let func = last_kind.reparsing_func(child_mode, indent)?; let post = last_kind.post(); + let mut column = if mode == TokenMode::Markup { + // In this case, we want to pass the indentation to the function. + Scanner::new(self.src).column(children_span.start) + } else { + 0 + }; + + // If this is a markup node, we want to save its indent instead to pass + // the right indent argument. + if children_range.len() == 1 { + let child = &mut green.children_mut()[children_range.start]; + if let NodeKind::Markup(n) = child.kind() { + column = *n; + } + } + // The span of the to-be-reparsed children in the new source. let replace_span = children_span.start - .. children_span.end + self.replace_len - self.replace_range.len(); + .. + children_span.end + self.replace_len - self.replace_range.len(); // For atomic primaries we need to pass in the whole remaining string to // check whether the parser would eat more stuff illicitly. @@ -186,7 +213,7 @@ impl Reparser<'_> { }; // Do the reparsing! - let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?; + let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?; // Make sure that atomic primaries ate only what they were supposed to. if post == Postcondition::AtomicPrimary { @@ -311,6 +338,14 @@ fn validate( at_start = child.kind().is_at_start(at_start); } + // Verify that the last of the newborns is not `NotAtEnd`. + if newborns + .last() + .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd) + { + return false; + } + // We have to check whether the last non-trivia newborn is `AtStart` and // verify the indent of its right neighbors in order to make sure its // indentation requirements are fulfilled. @@ -351,21 +386,22 @@ impl NodeKind { fn reparsing_func( &self, parent_mode: TokenMode, - ) -> Option Option<(Vec, bool)>> { + indent: usize, + ) -> Option Option<(Vec, bool)>> { let mode = self.mode().unwrap_or(parent_mode); match self.post() { Postcondition::Unsafe | Postcondition::UnsafeLayer => None, Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), Postcondition::AtomicPrimary => Some(parse_atomic_markup), Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), NodeKind::Block => Some(parse_block), NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), _ => None, }, _ => match mode { - TokenMode::Markup if self == &Self::Markup => Some(parse_markup), - TokenMode::Markup => Some(parse_markup_elements), + TokenMode::Markup if indent == 0 => Some(parse_markup_elements), _ => return None, }, } @@ -452,8 +488,9 @@ impl NodeKind { Postcondition::UnsafeLayer } - // Only markup is expected at the points where it does occur. - Self::Markup => Postcondition::SameKind(None), + // Only markup is expected at the points where it does occur. The + // indentation must be preserved as well, also for the children. + Self::Markup(_) => Postcondition::SameKind(None), // These can appear everywhere and must not change to other stuff // because that could change the outer expression. @@ -493,6 +530,10 @@ impl NodeKind { | Self::ImportExpr | Self::IncludeExpr => Postcondition::AtomicPrimary, + // This element always has to remain in the same column so better + // reparse the whole parent. + Self::Raw(_) => Postcondition::Unsafe, + // These are all replaceable by other tokens. Self::Parbreak | Self::Linebreak @@ -507,7 +548,6 @@ impl NodeKind { | Self::Heading | Self::Enum | Self::List - | Self::Raw(_) | Self::Math(_) => Postcondition::Safe, } } @@ -517,6 +557,7 @@ impl NodeKind { match self { Self::Heading | Self::Enum | Self::List => Precondition::AtStart, Self::TextInLine(_) => Precondition::NotAtStart, + Self::Error(_, _) => Precondition::NotAtEnd, _ => Precondition::None, } } @@ -557,12 +598,12 @@ mod tests { test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); - test("= A heading", 3 .. 3, "n evocative", 2 .. 15); + test("= A heading", 3 .. 3, "n evocative", 2 .. 22); test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); test("#call() abc", 7 .. 7, "[]", 0 .. 10); - test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 3 .. 34); test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); @@ -571,7 +612,7 @@ mod tests { test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); - test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); + test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); @@ -596,7 +637,7 @@ mod tests { test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); - test("a b c", 1 .. 1, "{[}", 0 .. 5); + test("a b c", 1 .. 1, "{[}", 0 .. 8); // Test unclosed things. test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); @@ -610,6 +651,6 @@ mod tests { // Test raw tokens. test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); - test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); + test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2c5afb6b3..f48267300 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -16,6 +16,7 @@ use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; +use crate::util::EcoString; /// Parse a source file. pub fn parse(src: &str) -> Rc { @@ -28,23 +29,27 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject_partial() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); markup_expr(&mut p); p.eject_partial() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); - markup(&mut p); + if column == 0 { + markup(&mut p); + } else { + markup_indented(&mut p, column); + } p.eject() } @@ -53,8 +58,10 @@ pub fn parse_markup(src: &str, _: bool) -> Option<(Vec, bool)> { pub fn parse_markup_elements( src: &str, mut at_start: bool, + column: usize, ) -> Option<(Vec, bool)> { let mut p = Parser::new(src, TokenMode::Markup); + p.offset(column); while !p.eof() { markup_node(&mut p, &mut at_start); } @@ -62,7 +69,7 @@ pub fn parse_markup_elements( } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; @@ -73,7 +80,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; @@ -84,7 +91,7 @@ pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { +pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); comment(&mut p).ok()?; p.eject() @@ -92,7 +99,7 @@ pub fn parse_comment(source: &str, _: bool) -> Option<(Vec, bool)> { /// Parse markup. fn markup(p: &mut Parser) { - markup_while(p, true, &mut |_| true) + markup_while(p, true, 0, &mut |_| true) } /// Parse markup that stays right of the given column. @@ -103,8 +110,8 @@ fn markup_indented(p: &mut Parser, column: usize) { _ => false, }); - markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, + markup_while(p, false, column, &mut |p| match p.peek() { + Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column, _ => true, }) } @@ -113,11 +120,11 @@ fn markup_indented(p: &mut Parser, column: usize) { /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. -fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) +fn markup_while(p: &mut Parser, mut at_start: bool, column: usize, f: &mut F) where F: FnMut(&mut Parser) -> bool, { - p.perform(NodeKind::Markup, |p| { + p.perform(NodeKind::Markup(column), |p| { while !p.eof() && f(p) { markup_node(p, &mut at_start); } @@ -205,20 +212,32 @@ fn heading(p: &mut Parser) { /// Parse a single list item. fn list_node(p: &mut Parser) { - p.perform(NodeKind::List, |p| { - p.eat_assert(&NodeKind::Minus); + let marker = p.marker(); + let src: EcoString = p.peek_src().into(); + p.eat_assert(&NodeKind::Minus); + + if p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::List); + } else { + marker.convert(p, NodeKind::TextInLine(src)); + } } /// Parse a single enum item. fn enum_node(p: &mut Parser) { - p.perform(NodeKind::Enum, |p| { - p.eat(); + let marker = p.marker(); + let src: EcoString = p.peek_src().into(); + p.eat(); + + if p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Enum); + } else { + marker.convert(p, NodeKind::TextInLine(src)); + } } /// Parse an expression within markup mode. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index ade9b5df5..b31f69d3b 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -21,8 +21,12 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, - /// Whether the last group was terminated. - last_terminated: bool, + /// Is `Some` if there is an unterminated group at the last position where + /// groups were terminated. + last_unterminated: Option, + /// Offset the indentation. This can be used if the parser is processing a + /// subslice of the source and there was leading indent. + column_offset: usize, } impl<'s> Parser<'s> { @@ -38,7 +42,8 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_terminated: true, + last_unterminated: None, + column_offset: 0, } } @@ -102,6 +107,11 @@ impl<'s> Parser<'s> { .then(|| (self.children, self.tokens.was_terminated())) } + /// Set an indentation offset. + pub fn offset(&mut self, columns: usize) { + self.column_offset = columns; + } + /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof @@ -206,6 +216,12 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { + self.tokens.scanner().column(index) + self.column_offset + } + + /// Determine the column index for the given byte index while ignoring the + /// offset. + pub fn clean_column(&self, index: usize) -> usize { self.tokens.scanner().column(index) } @@ -244,7 +260,11 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - self.last_terminated = true; + if let Some(n) = self.last_unterminated { + if n != self.prev_end() { + self.last_unterminated = None; + } + } let mut rescan = self.tokens.mode() != group_mode; @@ -262,8 +282,14 @@ impl<'s> Parser<'s> { self.eat(); rescan = false; } else if required { + // FIXME The error has to be inserted before any space rolls + // around because the rescan will set the cursor back in front + // of the space and reconsume it. Supressing the rescan is not + // an option since additional rescans (e.g. for statements) can + // be triggered directly afterwards, without processing any + // other token. self.push_error(format_eco!("expected {}", end)); - self.last_terminated = false; + self.last_unterminated = Some(self.prev_end()); } } @@ -283,13 +309,21 @@ impl<'s> Parser<'s> { /// Check if the group processing was successfully terminated. pub fn group_success(&self) -> bool { - self.last_terminated && self.groups.is_empty() + self.last_unterminated.is_none() && self.groups.is_empty() } /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { let kind = self.current.take().unwrap(); + if match kind { + NodeKind::Space(n) if n > 0 => true, + NodeKind::Parbreak => true, + _ => false, + } { + self.column_offset = 0; + } + let len = self.tokens.index() - self.current_start; self.children.push(GreenData::new(kind, len).into()); self.current_start = self.tokens.index(); @@ -346,6 +380,13 @@ impl Parser<'_> { /// Push an error into the children list. pub fn push_error(&mut self, msg: impl Into) { let error = NodeKind::Error(ErrorPos::Full, msg.into()); + for i in (0 .. self.children.len()).rev() { + if Self::is_trivia_ext(self.children[i].kind(), false) { + self.children.remove(i); + } else { + break; + } + } self.children.push(GreenData::new(error, 0).into()); } @@ -445,6 +486,7 @@ impl Marker { } /// A logical group of tokens, e.g. `[...]`. +#[derive(Debug)] struct GroupEntry { /// The kind of group this is. This decides which tokens will end the group. /// For example, a [`Group::Paren`] will be ended by diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 836e8cf17..3a0ad1ade 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -293,10 +293,8 @@ impl<'s> Tokens<'s> { } else { NodeKind::EnDash } - } else if self.s.check_or(true, char::is_whitespace) { - NodeKind::Minus } else { - NodeKind::Text('-'.into()) + NodeKind::Minus } } @@ -312,11 +310,7 @@ impl<'s> Tokens<'s> { None }; - if self.s.check_or(true, char::is_whitespace) { - NodeKind::EnumNumbering(number) - } else { - NodeKind::Text(self.s.eaten_from(start).into()) - } + NodeKind::EnumNumbering(number) } fn raw(&mut self) -> NodeKind { @@ -742,12 +736,12 @@ mod tests { fn test_tokenize_text() { // Test basic text. t!(Markup[" /"]: "hello" => Text("hello")); - t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world")); + t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world")); // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():\"b")); t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+")); - t!(Markup[" /"]: "=-a" => Text("="), Text("-"), Text("a")); + t!(Markup[" /"]: "=-a" => Text("="), Minus, Text("a")); t!(Markup[" "]: "#123" => Text("#"), Text("123")); // Test text ends. @@ -804,7 +798,7 @@ mod tests { t!(Markup["a1/"]: "- " => Minus, Space(0)); t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); - t!(Markup[" "]: "1.a" => Text("1."), Text("a")); + t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a")); t!(Markup[" /"]: "a1." => Text("a1.")); } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ed74dfe51..bea4ef000 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -53,7 +53,7 @@ macro_rules! node { node! { /// The syntactical root capable of representing a full parsed document. - Markup + Markup: NodeKind::Markup(_) } impl Markup { diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 21af060ff..9f7365a81 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -154,7 +154,7 @@ impl Category { NodeKind::Str(_) => Some(Category::String), NodeKind::Error(_, _) => Some(Category::Invalid), NodeKind::Unknown(_) => Some(Category::Invalid), - NodeKind::Markup => None, + NodeKind::Markup(_) => None, NodeKind::Space(_) => None, NodeKind::Parbreak => None, NodeKind::Text(_) => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index b72e58431..388d0bb0c 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -64,6 +64,14 @@ impl Green { } } + /// Whether the node is a leaf node in the green tree. + pub fn is_leaf(&self) -> bool { + match self { + Green::Node(n) => n.children().is_empty(), + Green::Token(_) => true, + } + } + /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { @@ -361,6 +369,11 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } + /// Whether the node is a leaf node. + pub fn is_leaf(self) -> bool { + self.green.is_leaf() + } + /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -385,6 +398,14 @@ impl<'a> RedRef<'a> { } } + /// Perform a depth-first search starting at this node. + pub fn all_children(&self) -> Vec { + let mut res = vec![self.clone()]; + res.extend(self.children().flat_map(|child| child.all_children().into_iter())); + + res + } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where @@ -562,8 +583,8 @@ pub enum NodeKind { Include, /// The `from` keyword. From, - /// Template markup. - Markup, + /// Template markup of which all lines must start in some column. + Markup(usize), /// One or more whitespace characters. Space(usize), /// A forced line break: `\`. @@ -738,7 +759,7 @@ impl NodeKind { /// Whether this token appears in Markup. pub fn mode(&self) -> Option { match self { - Self::Markup + Self::Markup(_) | Self::Linebreak | Self::Parbreak | Self::Text(_) @@ -823,7 +844,7 @@ impl NodeKind { Self::Import => "keyword `import`", Self::Include => "keyword `include`", Self::From => "keyword `from`", - Self::Markup => "markup", + Self::Markup(_) => "markup", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Parbreak => "paragraph break", diff --git a/tests/typ/code/block.typ b/tests/typ/code/block.typ index 45ee92045..5939ba9c5 100644 --- a/tests/typ/code/block.typ +++ b/tests/typ/code/block.typ @@ -129,7 +129,7 @@ } --- -// Error: 2:1 expected closing brace +// Error: 2 expected closing brace { --- diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index 7fd6e0da7..a95d651aa 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -57,7 +57,7 @@ Three // Terminated by semicolon even though we are in a paren group. // Error: 18 expected expression -// Error: 19 expected closing paren +// Error: 18 expected closing paren #let v5 = (1, 2 + ; Five --- diff --git a/tests/typeset.rs b/tests/typeset.rs index 164ccc913..f23de5cd9 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -19,8 +19,8 @@ use typst::image::{Image, RasterImage, Svg}; use typst::library::{PageNode, TextNode}; use typst::loading::FsLoader; use typst::parse::Scanner; -use typst::source::SourceFile; -use typst::syntax::Span; +use typst::source::{SourceFile, SourceId}; +use typst::syntax::{RedNode, Span}; use typst::Context; #[cfg(feature = "layout-cache")] @@ -186,6 +186,7 @@ fn test( let mut line = 0; let mut compare_ref = true; let mut compare_ever = false; + let mut rng = LinearShift::new(); let parts: Vec<_> = src.split("\n---").collect(); for (i, &part) in parts.iter().enumerate() { @@ -202,8 +203,16 @@ fn test( } } } else { - let (part_ok, compare_here, part_frames) = - test_part(ctx, src_path, part.into(), i, compare_ref, line, debug); + let (part_ok, compare_here, part_frames) = test_part( + ctx, + src_path, + part.into(), + i, + compare_ref, + line, + debug, + &mut rng, + ); ok &= part_ok; compare_ever |= compare_here; frames.extend(part_frames); @@ -252,14 +261,16 @@ fn test_part( compare_ref: bool, line: usize, debug: bool, + rng: &mut LinearShift, ) -> (bool, bool, Vec>) { + let mut ok = test_reparse(&src, i, rng); + let id = ctx.sources.provide(src_path, src); let source = ctx.sources.get(id); let (local_compare_ref, mut ref_errors) = parse_metadata(&source); let compare_ref = local_compare_ref.unwrap_or(compare_ref); - let mut ok = true; let (frames, mut errors) = match ctx.evaluate(id) { Ok(module) => { let tree = module.into_root(); @@ -366,6 +377,108 @@ fn test_incremental( ok } +/// Pseudorandomly edit the source file and test whether a reparse produces the +/// same result as a clean parse. +/// +/// The method will first inject 10 strings once every 400 source characters +/// and then select 5 leaf node boundries to inject an additional, randomly +/// chosen string from the injection list. +fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { + let supplements = [ + "[", + ")", + "#rect()", + "a word", + ", a: 1", + "10.0", + ":", + "if i == 0 {true}", + "for", + "* hello *", + "//", + "/*", + "\\u{12e4}", + "```typst", + " ", + "trees", + "\\", + "$ a $", + "2.", + "-", + "5", + ]; + + let mut ok = true; + + let apply = |replace: std::ops::Range, with| { + let mut incr_source = SourceFile::detached(src); + + incr_source.edit(replace.clone(), with); + let edited_src = incr_source.src(); + + let ref_source = SourceFile::detached(edited_src); + let incr_root = incr_source.root(); + let ref_root = ref_source.root(); + if incr_root != ref_root { + println!( + " Subtest {} reparse differs from clean parse when inserting '{}' at {}-{} ❌", + i, with, replace.start, replace.end, + ); + println!( + "\n Expected reference tree:\n{:#?}\n\n Found incremental tree:\n{:#?}", + ref_root, incr_root + ); + println!("Full source ({}):\n\"{}\"", edited_src.len(), edited_src); + false + } else { + true + } + }; + + let mut in_range = |range: std::ops::Range| { + let full = rng.next().unwrap() as f64 / u64::MAX as f64; + (range.start as f64 + full * (range.end as f64 - range.start as f64)).floor() + as usize + }; + + let insertions = (src.len() as f64 / 400.0).ceil() as usize; + + for _ in 0 .. insertions { + let supplement = supplements[in_range(0 .. supplements.len())]; + let start = in_range(0 .. src.len()); + let end = in_range(start .. src.len()); + + if !src.is_char_boundary(start) || !src.is_char_boundary(end) { + continue; + } + + if !apply(start .. end, supplement) { + println!("original tree: {:#?}", SourceFile::detached(src).root()); + + ok = false; + } + } + + let red = RedNode::from_root( + SourceFile::detached(src).root().clone(), + SourceId::from_raw(0), + ); + + let leafs: Vec<_> = red + .as_ref() + .all_children() + .into_iter() + .filter(|red| red.is_leaf()) + .collect(); + + let leaf_start = leafs[in_range(0 .. leafs.len())].span().start; + let supplement = supplements[in_range(0 .. supplements.len())]; + + ok &= apply(leaf_start .. leaf_start, supplement); + + ok +} + fn parse_metadata(source: &SourceFile) -> (Option, Vec) { let mut compare_ref = None; let mut errors = vec![]; @@ -823,3 +936,33 @@ where FileDescriptor::redirect_stdio(&stdout, Stdout).unwrap(); result } + +/// This is an Linear-feedback shift register using XOR as its shifting +/// function. It can be used as PRNG. +struct LinearShift(u64); + +impl LinearShift { + /// Initialize the shift register with a pre-set seed. + pub fn new() -> Self { + Self(0xACE5) + } +} + +impl Iterator for LinearShift { + type Item = u64; + + /// Apply the shift. + fn next(&mut self) -> Option { + self.0 ^= self.0 >> 3; + self.0 ^= self.0 << 14; + self.0 ^= self.0 >> 28; + self.0 ^= self.0 << 36; + self.0 ^= self.0 >> 52; + Some(self.0) + } + + /// The iterator is endless but will repeat eventually. + fn size_hint(&self) -> (usize, Option) { + (usize::MAX, None) + } +}