From 8f37189d6fb2e3c75e4e6a168e1ae17c568fdc4c Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Mon, 31 Jan 2022 13:26:40 +0100
Subject: [PATCH] Fix incremental parsing bugs

---
 src/layout/incremental.rs |  4 +--
 src/parse/incremental.rs  | 62 +++++++++++++++++++++++----------------
 src/parse/mod.rs          | 22 ++++++++++++--
 src/parse/parser.rs       | 49 ++++++++++++++++++++-----------
 src/parse/tokens.rs       | 16 +---------
 tests/typeset.rs          |  4 +++
 6 files changed, 95 insertions(+), 62 deletions(-)
diff --git a/src/layout/incremental.rs b/src/layout/incremental.rs
index f737a3ef3..63915b530 100644
--- a/src/layout/incremental.rs
+++ b/src/layout/incremental.rs
@@ -408,7 +408,7 @@ mod tests {
     }
 
     #[test]
-    fn test_incremental_temperature() {
+    fn test_layout_incremental_temperature() {
         let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
         let regions = zero_regions();
         cache.policy = EvictionPolicy::None;
@@ -447,7 +447,7 @@ mod tests {
     }
 
     #[test]
-    fn test_incremental_properties() {
+    fn test_layout_incremental_properties() {
         let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
         cache.policy = EvictionPolicy::None;
         cache.insert(0, FramesEntry::new(empty_frames(), 1));
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 1272c9b99..7ddad3cd1 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -116,7 +116,7 @@ impl Reparser<'_> {
             // This is because in Markup mode, we want to examine all nodes
             // touching a replacement but in code we want to atomically replace.
             if child_span.contains(&self.replace_range.start)
-                || (mode == TokenMode::Markup
+                || (child_mode == TokenMode::Markup
                     && self.replace_range.start == child_span.end)
             {
                 first = Some((i, offset));
@@ -139,12 +139,12 @@ impl Reparser<'_> {
             // neighbor!
             if child_span.contains(&self.replace_range.end)
                 || self.replace_range.end == child_span.end
-                    && (mode != TokenMode::Markup || i + 1 == original_count)
+                    && (child_mode != TokenMode::Markup || i + 1 == original_count)
             {
                 outermost &= i + 1 == original_count;
                 last = Some((i, offset + child.len()));
                 break;
-            } else if mode != TokenMode::Markup
+            } else if child_mode != TokenMode::Markup
                 || !child.kind().succession_rule().safe_in_markup()
             {
                 break;
@@ -404,10 +404,10 @@ impl NodeKind {
         let mode = self.mode().unwrap_or(parent_mode);
         match self.succession_rule() {
             SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
-            SuccessionRule::AtomicPrimary if mode == TokenMode::Code => {
-                Some(parse_atomic)
-            }
-            SuccessionRule::AtomicPrimary => Some(parse_atomic_markup),
+            SuccessionRule::AtomicPrimary => match mode {
+                TokenMode::Code => Some(parse_atomic),
+                TokenMode::Markup => Some(parse_atomic_markup),
+            },
             SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
                 NodeKind::Markup(_) => Some(parse_markup),
                 NodeKind::Template => Some(parse_template),
@@ -601,28 +601,29 @@ impl SuccessionRule {
 }
 
 #[cfg(test)]
+#[rustfmt::skip]
 mod tests {
     use super::*;
     use crate::parse::parse;
+    use crate::parse::tests::check;
     use crate::source::SourceFile;
 
-    #[test]
-    #[rustfmt::skip]
-    fn test_incremental_parse() {
-        #[track_caller]
-        fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
-            let mut source = SourceFile::detached(prev);
-            let range = source.edit(range, with);
-            assert_eq!(range, goal);
-            assert_eq!(parse(source.src()), *source.root());
-        }
+    #[track_caller]
+    fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
+        let mut source = SourceFile::detached(prev);
+        let range = source.edit(range, with);
+        check(source.src(), source.root(), &parse(source.src()));
+        assert_eq!(range, goal);
+    }
 
-        // Test simple replacements.
+    #[test]
+    fn test_parse_incremental_simple_replacements() {
         test("hello world", 6 .. 11, "walkers", 5 .. 13);
         test("some content", 0..12, "", 0..0);
         test("", 0..0, "do it", 0..5);
         test("a d e", 1 .. 3, " b c d", 0 .. 8);
         test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
+        test("{a}", 1 .. 2, "b", 1 .. 2);
         test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
         test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
         test("your thing", 5 .. 5, "a", 4 .. 11);
@@ -641,8 +642,12 @@ mod tests {
         test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
         test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
         test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
+        test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
+        test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
+    }
 
-        // Test the whitespace invariants.
+    #[test]
+    fn test_parse_incremental_whitespace_invariants() {
         test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
         test("hello \\ world", 7 .. 8, " a", 6 .. 14);
         test("x = y", 1 .. 1, " + y", 0 .. 6);
@@ -652,8 +657,10 @@ mod tests {
         test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22);
         test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
         test("hey  #myfriend", 4 .. 4, "\\", 3 .. 6);
+    }
 
-        // Test type invariants.
+    #[test]
+    fn test_parse_incremental_type_invariants() {
         test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
         test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
         test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
@@ -664,9 +671,11 @@ mod tests {
         test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
         test("a #while x {\n g(x) \n}  b", 11 .. 11, "//", 0 .. 26);
         test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
-        test("a b c", 1 .. 1, "{[}", 0 .. 8);
+        test("a b c", 1 .. 1, "{[}", 0 .. 5);
+    }
 
-        // Test unclosed things.
+    #[test]
+    fn test_parse_incremental_wrongly_or_unclosed_things() {
         test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
         test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
         test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
@@ -675,9 +684,10 @@ mod tests {
         test("a b c", 1 .. 1, " /* letters", 0 .. 16);
         test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
         test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
-
-        // Test raw tokens.
-        test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
-        test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
+        test("~~~~", 2 .. 2, "[]", 1 .. 5);
+        test("a[]b", 2 .. 2, "{", 1 .. 4);
+        test("[hello]", 2 .. 3, "]", 0 .. 7);
+        test("{a}", 1 .. 2, "b", 1 .. 2);
+        test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
     }
 }
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 1131fec80..f666f4ad7 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -68,7 +68,7 @@ pub fn parse_atomic(
 ) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
     primary(&mut p, true).ok()?;
-    p.consume_unterminated()
+    p.consume_open_ended()
 }
 
 /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
@@ -80,7 +80,7 @@ pub fn parse_atomic_markup(
 ) -> Option<(Vec<Green>, bool)> {
     let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
     markup_expr(&mut p);
-    p.consume_unterminated()
+    p.consume_open_ended()
 }
 
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
@@ -919,3 +919,21 @@ fn comment(p: &mut Parser) -> ParseResult {
         _ => Err(ParseError),
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::fmt::Debug;
+
+    #[track_caller]
+    pub fn check<T>(src: &str, found: T, expected: T)
+    where
+        T: Debug + PartialEq,
+    {
+        if found != expected {
+            println!("source:   {src:?}");
+            println!("expected: {expected:#?}");
+            println!("found:    {found:#?}");
+            panic!("test failed");
+        }
+    }
+}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index db003e726..6771d007b 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -8,6 +8,8 @@ use crate::util::EcoString;
 
 /// A convenient token-based parser.
 pub struct Parser<'s> {
+    /// Offsets the indentation on the first line of the source.
+    column_offset: usize,
     /// An iterator over the source tokens.
     tokens: Tokens<'s>,
     /// Whether we are at the end of the file or of a group.
@@ -22,11 +24,10 @@ pub struct Parser<'s> {
     groups: Vec<GroupEntry>,
     /// The children of the currently built node.
     children: Vec<Green>,
-    /// Is `Some` if there is an unterminated group at the last position where
-    /// groups were terminated.
-    last_unterminated: Option<usize>,
-    /// Offsets the indentation on the first line of the source.
-    column_offset: usize,
+    /// Whether the last group was not correctly terminated.
+    unterminated_group: bool,
+    /// Whether a group terminator was found, that did not close a group.
+    stray_terminator: bool,
 }
 
 impl<'s> Parser<'s> {
@@ -35,6 +36,7 @@ impl<'s> Parser<'s> {
         let mut tokens = Tokens::new(src, mode);
         let current = tokens.next();
         Self {
+            column_offset: 0,
             tokens,
             eof: current.is_none(),
             current,
@@ -42,8 +44,8 @@ impl<'s> Parser<'s> {
             current_start: 0,
             groups: vec![],
             children: vec![],
-            last_unterminated: None,
-            column_offset: 0,
+            unterminated_group: false,
+            stray_terminator: false,
         }
     }
 
@@ -70,7 +72,7 @@ impl<'s> Parser<'s> {
 
     /// End the parsing process and return multiple children and whether the
     /// last token was terminated, even if there remains stuff in the string.
-    pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
+    pub fn consume_open_ended(self) -> Option<(Vec<Green>, bool)> {
         self.terminated().then(|| (self.children, self.tokens.terminated()))
     }
 
@@ -120,6 +122,13 @@ impl<'s> Parser<'s> {
 
     /// Consume the current token and also trailing trivia.
     pub fn eat(&mut self) {
+        self.stray_terminator |= match self.current {
+            Some(NodeKind::RightParen) => !self.inside(Group::Paren),
+            Some(NodeKind::RightBracket) => !self.inside(Group::Bracket),
+            Some(NodeKind::RightBrace) => !self.inside(Group::Brace),
+            _ => false,
+        };
+
         self.prev_end = self.tokens.index();
         self.bump();
 
@@ -259,13 +268,14 @@ impl<'s> Parser<'s> {
     /// This panics if no group was started.
     #[track_caller]
     pub fn end_group(&mut self) {
+        // If another group closes after a group with the missing terminator,
+        // its scope of influence ends here and no longer taints the rest of the
+        // reparse.
+        self.unterminated_group = false;
+
         let group_mode = self.tokens.mode();
         let group = self.groups.pop().expect("no started group");
         self.tokens.set_mode(group.prev_mode);
-        self.repeek();
-        if self.last_unterminated != Some(self.prev_end()) {
-            self.last_unterminated = None;
-        }
 
         let mut rescan = self.tokens.mode() != group_mode;
 
@@ -280,12 +290,16 @@ impl<'s> Parser<'s> {
             Group::Imports => None,
         } {
             if self.current.as_ref() == Some(&end) {
-                // Bump the delimeter and return. No need to rescan in this case.
+                // Bump the delimeter and return. No need to rescan in this
+                // case. Also, we know that the delimiter is not stray even
+                // though we already removed the group.
+                let s = self.stray_terminator;
                 self.eat();
+                self.stray_terminator = s;
                 rescan = false;
             } else if required {
                 self.push_error(format_eco!("expected {}", end));
-                self.last_unterminated = Some(self.prev_end());
+                self.unterminated_group = true;
             }
         }
 
@@ -299,13 +313,14 @@ impl<'s> Parser<'s> {
             self.prev_end = self.tokens.index();
             self.current_start = self.tokens.index();
             self.current = self.tokens.next();
-            self.repeek();
         }
+
+        self.repeek();
     }
 
     /// Checks if all groups were correctly terminated.
-    pub fn terminated(&self) -> bool {
-        self.groups.is_empty() && self.last_unterminated.is_none()
+    fn terminated(&self) -> bool {
+        self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
     }
 
     /// Low-level bump that consumes exactly one token without special trivia
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index d741dea1a..27da3b80c 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -568,9 +568,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
 #[cfg(test)]
 #[allow(non_snake_case)]
 mod tests {
-    use std::fmt::Debug;
-
     use super::*;
+    use crate::parse::tests::check;
 
     use ErrorPos::*;
     use NodeKind::*;
@@ -687,19 +686,6 @@ mod tests {
         }};
     }
 
-    #[track_caller]
-    fn check<T>(src: &str, found: T, expected: T)
-    where
-        T: Debug + PartialEq,
-    {
-        if found != expected {
-            println!("source:   {src:?}");
-            println!("expected: {expected:#?}");
-            println!("found:    {found:#?}");
-            panic!("test failed");
-        }
-    }
-
     #[test]
     fn test_tokenize_brackets() {
         // Test in markup.
diff --git a/tests/typeset.rs b/tests/typeset.rs
index 647499750..e65375bb1 100644
--- a/tests/typeset.rs
+++ b/tests/typeset.rs
@@ -392,6 +392,10 @@ fn print_error(source: &SourceFile, line: usize, error: &Error) {
 fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
     let supplements = [
         "[",
+        "]",
+        "{",
+        "}",
+        "(",
         ")",
         "#rect()",
         "a word",