From 8f37189d6fb2e3c75e4e6a168e1ae17c568fdc4c Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 31 Jan 2022 13:26:40 +0100 Subject: [PATCH] Fix incremental parsing bugs --- src/layout/incremental.rs | 4 +-- src/parse/incremental.rs | 62 +++++++++++++++++++++++---------------- src/parse/mod.rs | 22 ++++++++++++-- src/parse/parser.rs | 49 ++++++++++++++++++++----------- src/parse/tokens.rs | 16 +--------- tests/typeset.rs | 4 +++ 6 files changed, 95 insertions(+), 62 deletions(-) diff --git a/src/layout/incremental.rs b/src/layout/incremental.rs index f737a3ef3..63915b530 100644 --- a/src/layout/incremental.rs +++ b/src/layout/incremental.rs @@ -408,7 +408,7 @@ mod tests { } #[test] - fn test_incremental_temperature() { + fn test_layout_incremental_temperature() { let mut cache = LayoutCache::new(EvictionPolicy::None, 20); let regions = zero_regions(); cache.policy = EvictionPolicy::None; @@ -447,7 +447,7 @@ mod tests { } #[test] - fn test_incremental_properties() { + fn test_layout_incremental_properties() { let mut cache = LayoutCache::new(EvictionPolicy::None, 20); cache.policy = EvictionPolicy::None; cache.insert(0, FramesEntry::new(empty_frames(), 1)); diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 1272c9b99..7ddad3cd1 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -116,7 +116,7 @@ impl Reparser<'_> { // This is because in Markup mode, we want to examine all nodes // touching a replacement but in code we want to atomically replace. if child_span.contains(&self.replace_range.start) - || (mode == TokenMode::Markup + || (child_mode == TokenMode::Markup && self.replace_range.start == child_span.end) { first = Some((i, offset)); @@ -139,12 +139,12 @@ impl Reparser<'_> { // neighbor! if child_span.contains(&self.replace_range.end) || self.replace_range.end == child_span.end - && (mode != TokenMode::Markup || i + 1 == original_count) + && (child_mode != TokenMode::Markup || i + 1 == original_count) { outermost &= i + 1 == original_count; last = Some((i, offset + child.len())); break; - } else if mode != TokenMode::Markup + } else if child_mode != TokenMode::Markup || !child.kind().succession_rule().safe_in_markup() { break; @@ -404,10 +404,10 @@ impl NodeKind { let mode = self.mode().unwrap_or(parent_mode); match self.succession_rule() { SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, - SuccessionRule::AtomicPrimary if mode == TokenMode::Code => { - Some(parse_atomic) - } - SuccessionRule::AtomicPrimary => Some(parse_atomic_markup), + SuccessionRule::AtomicPrimary => match mode { + TokenMode::Code => Some(parse_atomic), + TokenMode::Markup => Some(parse_atomic_markup), + }, SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), @@ -601,28 +601,29 @@ impl SuccessionRule { } #[cfg(test)] +#[rustfmt::skip] mod tests { use super::*; use crate::parse::parse; + use crate::parse::tests::check; use crate::source::SourceFile; - #[test] - #[rustfmt::skip] - fn test_incremental_parse() { - #[track_caller] - fn test(prev: &str, range: Range, with: &str, goal: Range) { - let mut source = SourceFile::detached(prev); - let range = source.edit(range, with); - assert_eq!(range, goal); - assert_eq!(parse(source.src()), *source.root()); - } + #[track_caller] + fn test(prev: &str, range: Range, with: &str, goal: Range) { + let mut source = SourceFile::detached(prev); + let range = source.edit(range, with); + check(source.src(), source.root(), &parse(source.src())); + assert_eq!(range, goal); + } - // Test simple replacements. + #[test] + fn test_parse_incremental_simple_replacements() { test("hello world", 6 .. 11, "walkers", 5 .. 13); test("some content", 0..12, "", 0..0); test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); + test("{a}", 1 .. 2, "b", 1 .. 2); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); test("= A heading", 3 .. 3, "n evocative", 2 .. 22); test("your thing", 5 .. 5, "a", 4 .. 11); @@ -641,8 +642,12 @@ mod tests { test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); + test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); + test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); + } - // Test the whitespace invariants. + #[test] + fn test_parse_incremental_whitespace_invariants() { test("hello \\ world", 7 .. 8, "a ", 6 .. 14); test("hello \\ world", 7 .. 8, " a", 6 .. 14); test("x = y", 1 .. 1, " + y", 0 .. 6); @@ -652,8 +657,10 @@ mod tests { test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); + } - // Test type invariants. + #[test] + fn test_parse_incremental_type_invariants() { test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); @@ -664,9 +671,11 @@ mod tests { test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); - test("a b c", 1 .. 1, "{[}", 0 .. 8); + test("a b c", 1 .. 1, "{[}", 0 .. 5); + } - // Test unclosed things. + #[test] + fn test_parse_incremental_wrongly_or_unclosed_things() { test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); @@ -675,9 +684,10 @@ mod tests { test("a b c", 1 .. 1, " /* letters", 0 .. 16); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); - - // Test raw tokens. - test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); - test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); + test("~~~~", 2 .. 2, "[]", 1 .. 5); + test("a[]b", 2 .. 2, "{", 1 .. 4); + test("[hello]", 2 .. 3, "]", 0 .. 7); + test("{a}", 1 .. 2, "b", 1 .. 2); + test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13); } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1131fec80..f666f4ad7 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -68,7 +68,7 @@ pub fn parse_atomic( ) -> Option<(Vec, bool)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); primary(&mut p, true).ok()?; - p.consume_unterminated() + p.consume_open_ended() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. @@ -80,7 +80,7 @@ pub fn parse_atomic_markup( ) -> Option<(Vec, bool)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); markup_expr(&mut p); - p.consume_unterminated() + p.consume_open_ended() } /// Parse a template literal. Returns `Some` if all of the input was consumed. @@ -919,3 +919,21 @@ fn comment(p: &mut Parser) -> ParseResult { _ => Err(ParseError), } } + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + #[track_caller] + pub fn check(src: &str, found: T, expected: T) + where + T: Debug + PartialEq, + { + if found != expected { + println!("source: {src:?}"); + println!("expected: {expected:#?}"); + println!("found: {found:#?}"); + panic!("test failed"); + } + } +} diff --git a/src/parse/parser.rs b/src/parse/parser.rs index db003e726..6771d007b 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -8,6 +8,8 @@ use crate::util::EcoString; /// A convenient token-based parser. pub struct Parser<'s> { + /// Offsets the indentation on the first line of the source. + column_offset: usize, /// An iterator over the source tokens. tokens: Tokens<'s>, /// Whether we are at the end of the file or of a group. @@ -22,11 +24,10 @@ pub struct Parser<'s> { groups: Vec, /// The children of the currently built node. children: Vec, - /// Is `Some` if there is an unterminated group at the last position where - /// groups were terminated. - last_unterminated: Option, - /// Offsets the indentation on the first line of the source. - column_offset: usize, + /// Whether the last group was not correctly terminated. + unterminated_group: bool, + /// Whether a group terminator was found, that did not close a group. + stray_terminator: bool, } impl<'s> Parser<'s> { @@ -35,6 +36,7 @@ impl<'s> Parser<'s> { let mut tokens = Tokens::new(src, mode); let current = tokens.next(); Self { + column_offset: 0, tokens, eof: current.is_none(), current, @@ -42,8 +44,8 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_unterminated: None, - column_offset: 0, + unterminated_group: false, + stray_terminator: false, } } @@ -70,7 +72,7 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children and whether the /// last token was terminated, even if there remains stuff in the string. - pub fn consume_unterminated(self) -> Option<(Vec, bool)> { + pub fn consume_open_ended(self) -> Option<(Vec, bool)> { self.terminated().then(|| (self.children, self.tokens.terminated())) } @@ -120,6 +122,13 @@ impl<'s> Parser<'s> { /// Consume the current token and also trailing trivia. pub fn eat(&mut self) { + self.stray_terminator |= match self.current { + Some(NodeKind::RightParen) => !self.inside(Group::Paren), + Some(NodeKind::RightBracket) => !self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => !self.inside(Group::Brace), + _ => false, + }; + self.prev_end = self.tokens.index(); self.bump(); @@ -259,13 +268,14 @@ impl<'s> Parser<'s> { /// This panics if no group was started. #[track_caller] pub fn end_group(&mut self) { + // If another group closes after a group with the missing terminator, + // its scope of influence ends here and no longer taints the rest of the + // reparse. + self.unterminated_group = false; + let group_mode = self.tokens.mode(); let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); - self.repeek(); - if self.last_unterminated != Some(self.prev_end()) { - self.last_unterminated = None; - } let mut rescan = self.tokens.mode() != group_mode; @@ -280,12 +290,16 @@ impl<'s> Parser<'s> { Group::Imports => None, } { if self.current.as_ref() == Some(&end) { - // Bump the delimeter and return. No need to rescan in this case. + // Bump the delimeter and return. No need to rescan in this + // case. Also, we know that the delimiter is not stray even + // though we already removed the group. + let s = self.stray_terminator; self.eat(); + self.stray_terminator = s; rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); - self.last_unterminated = Some(self.prev_end()); + self.unterminated_group = true; } } @@ -299,13 +313,14 @@ impl<'s> Parser<'s> { self.prev_end = self.tokens.index(); self.current_start = self.tokens.index(); self.current = self.tokens.next(); - self.repeek(); } + + self.repeek(); } /// Checks if all groups were correctly terminated. - pub fn terminated(&self) -> bool { - self.groups.is_empty() && self.last_unterminated.is_none() + fn terminated(&self) -> bool { + self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator } /// Low-level bump that consumes exactly one token without special trivia diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index d741dea1a..27da3b80c 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -568,9 +568,8 @@ fn keyword(ident: &str) -> Option { #[cfg(test)] #[allow(non_snake_case)] mod tests { - use std::fmt::Debug; - use super::*; + use crate::parse::tests::check; use ErrorPos::*; use NodeKind::*; @@ -687,19 +686,6 @@ mod tests { }}; } - #[track_caller] - fn check(src: &str, found: T, expected: T) - where - T: Debug + PartialEq, - { - if found != expected { - println!("source: {src:?}"); - println!("expected: {expected:#?}"); - println!("found: {found:#?}"); - panic!("test failed"); - } - } - #[test] fn test_tokenize_brackets() { // Test in markup. diff --git a/tests/typeset.rs b/tests/typeset.rs index 647499750..e65375bb1 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -392,6 +392,10 @@ fn print_error(source: &SourceFile, line: usize, error: &Error) { fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { let supplements = [ "[", + "]", + "{", + "}", + "(", ")", "#rect()", "a word",