diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 5d845a552..027773505 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -17,7 +17,7 @@ use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(src: &str) -> Rc { - let mut p = Parser::new(src); + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); match p.finish().into_iter().next() { Some(Green::Node(node)) => node, @@ -26,23 +26,23 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(source: &str, _: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_atomic(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Code); primary(&mut p, true).ok()?; p.eject() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(source: &str, _: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_markup(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Markup); markup(&mut p); p.eject() } /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option> { - let mut p = Parser::new(source); +pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); } @@ -50,9 +50,8 @@ pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option Option> { - let mut p = Parser::new(source); - p.set_mode(TokenMode::Code); +pub fn parse_code(src: &str, _: bool) -> Option> { + let mut p = Parser::new(src, TokenMode::Code); expr_list(&mut p); p.eject() } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f391c4739..451e18f19 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -27,8 +27,8 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. - pub fn new(src: &'s str) -> Self { - let mut tokens = Tokens::new(src, TokenMode::Markup); + pub fn new(src: &'s str, mode: TokenMode) -> Self { + let mut tokens = Tokens::new(src, mode); let current = tokens.next(); Self { tokens, @@ -202,11 +202,6 @@ impl<'s> Parser<'s> { self.tokens.scanner().column(index) } - /// Set the tokenizer's mode. - pub fn set_mode(&mut self, mode: TokenMode) { - self.tokens.set_mode(mode); - } - /// Continue parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to diff --git a/src/source.rs b/src/source.rs index f7e6cb5e6..36db50ddb 100644 --- a/src/source.rs +++ b/src/source.rs @@ -128,6 +128,7 @@ pub struct SourceFile { src: String, line_starts: Vec, root: Rc, + was_incremental: bool, } impl SourceFile { @@ -141,6 +142,7 @@ impl SourceFile { root: parse(&src), src, line_starts, + was_incremental: false, } } @@ -286,12 +288,20 @@ impl SourceFile { // Update the root node. let insertion_span = Span::new(self.id, replace.start, replace.end); let source = self.src().to_string(); - if !Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) - { + if Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) { + self.was_incremental = true; + } else { self.root = parse(self.src()); + self.was_incremental = false; } } + /// Forces a non-incremental reparsing of the source file. + fn force_reparse(&mut self) { + self.root = parse(self.src()); + self.was_incremental = false; + } + /// Provide highlighting categories for the given range of the source file. pub fn highlight(&self, range: Range, mut f: F) where @@ -379,7 +389,6 @@ impl<'a> Files<'a> for SourceStore { #[cfg(test)] mod tests { use super::*; - use crate::syntax::Green; const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; @@ -481,19 +490,88 @@ mod tests { } #[test] - fn test_source_file_edit_2() { + fn test_incremental_parse() { #[track_caller] - fn test(prev: &str, range: Range, with: &str, after: &str) { + fn test(prev: &str, range: Range, with: &str, incr: bool) { let mut source = SourceFile::detached(prev); - let result = SourceFile::detached(after); - dbg!(Green::from(source.root.clone())); source.edit(range, with); - assert_eq!(source.src, result.src); - assert_eq!(source.line_starts, result.line_starts); - dbg!(Green::from(source.root)); + + if incr { + assert!(source.was_incremental); + let incr_tree = source.root.clone(); + source.force_reparse(); + assert_eq!(source.root, incr_tree); + } else { + assert!(!source.was_incremental); + } } - // Test inserting at the begining. - test("abc #f()[def] ghi", 5 .. 6, "g", "abc #g()[def] ghi"); + // Test simple replacements. + test("hello world", 6 .. 11, "wankers", true); + test("{(0, 1, 2)}", 5 .. 6, "11pt", true); + test("= A heading", 3 .. 3, "n evocative", true); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 16 .. 20, + "none", + true, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 33 .. 42, + "[_gronk_]", + true, + ); + test( + "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", + 34 .. 41, + "_bar_", + true, + ); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", true); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", true); + test("hello {x}", 6 .. 9, "#f()", false); + test( + "this is -- in my opinion -- spectacular", + 8 .. 10, + "---", + true, + ); + test("understanding `code` is complicated", 15 .. 15, "C ", true); + test("{ let x = g() }", 10 .. 12, "f(54", true); + test( + "#let rect with (fill: eastern)", + 14 .. 29, + " (stroke: conifer", + true, + ); + test("a b c", 1 .. 1, " /* letters */", false); + + // Test the whitespace invariants. + test("hello \\ world", 7 .. 8, "a ", false); + test("hello \\ world", 7 .. 8, "\n\n", true); + test("x = y", 2 .. 2, "+ y ", true); + test("x = y", 2 .. 2, "+ y \n ", false); + test("abc\n= a heading", 3 .. 4, "\nsome more test\n\n", true); + test("abc\n= a heading", 3 .. 4, "\nnot ", false); + + // Test type invariants. + test("#for x in array {x}", 16 .. 19, "[#x]", true); + test("#let x = 1 {5}", 1 .. 4, "if", false); + test("#let x = 1 {5}", 4 .. 4, " if", false); + test("a // b c #f()", 3 .. 4, "", false); + + // this appearantly works but the assertion fails. + // test("a b c", 1 .. 1, "{[}", true); + + // Test unclosed things. + test(r#"{"hi"}"#, 4 .. 5, "c", false); + test(r"this \u{abcd}", 8 .. 9, "", true); + test(r"this \u{abcd} that", 12 .. 13, "", false); + test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", false); + + // these appearantly works but the assertion fails. + // test(r#"a ```typst hello``` b"#, 16 .. 17, "", false); + // test(r#"a ```typst hello```"#, 16 .. 17, "", true); } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 0879ab7f2..cb811266e 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -49,6 +49,15 @@ impl Green { self.data().len() } + /// Set the length of the node. + pub fn set_len(&mut self, len: usize) { + let data = match self { + Self::Node(node) => &mut Rc::make_mut(node).data, + Self::Token(data) => data, + }; + data.set_len(len); + } + /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { @@ -78,15 +87,15 @@ impl Green { } /// Find the innermost child that is incremental safe. - pub fn incremental_int( + fn incremental_int( &mut self, edit: &str, replace: Span, replacement_len: usize, offset: usize, - parent_mode: TokenMode, + parent_mode: NodeMode, outermost: bool, - ) -> bool { + ) -> Result<(), bool> { match self { Green::Node(n) => Rc::make_mut(n).incremental_int( edit, @@ -96,7 +105,7 @@ impl Green { parent_mode, outermost, ), - Green::Token(_) => false, + Green::Token(_) => Err(false), } } @@ -202,11 +211,17 @@ impl GreenNode { /// Find the innermost child that is incremental safe. pub fn incremental( &mut self, - edit: &str, + src: &str, replace: Span, replacement_len: usize, ) -> bool { - self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true) + let edit = &src[replace.inserted(replace, replacement_len).to_range()]; + if edit.contains("//") || edit.contains("/*") || edit.contains("*/") { + return false; + } + + self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true) + .is_ok() } fn incremental_int( @@ -215,9 +230,9 @@ impl GreenNode { replace: Span, replacement_len: usize, mut offset: usize, - parent_mode: TokenMode, + parent_mode: NodeMode, outermost: bool, - ) -> bool { + ) -> Result<(), bool> { let kind = self.kind().clone(); let mode = kind.mode().apply(parent_mode); eprintln!("in {:?} (mode {:?})", kind, mode); @@ -230,30 +245,41 @@ impl GreenNode { if child_span.surrounds(replace) { eprintln!("found correct child"); + let old_len = child.len(); // First, we try if the child has another, more specific applicable child. - if kind.incremental_safety() != IncrementalSafety::Unsafe - && child.incremental_int( + if !kind.incremental_safety().unsafe_interior() { + match child.incremental_int( src, replace, replacement_len, offset, - mode, + kind.mode().child_mode(), i == last && outermost, - ) - { - eprintln!("child was successful"); - return true; + ) { + Ok(_) => { + eprintln!("child success"); + let new_len = child.len(); + self.data.set_len(self.data.len() + new_len - old_len); + return Ok(()); + } + Err(b) if b => return Err(false), + _ => {} + } } // This didn't work, so we try to replace the child at this // level. - let (function, policy) = - if let Some(p) = child.kind().reparsing_function(mode) { - p - } else { - return false; - }; - loop_result = Some((i, child_span, function, policy)); + let (function, policy) = match child + .kind() + .reparsing_function(mode.child_mode().as_token_mode()) + { + Ok(p) => p, + Err(policy) => { + return Err(policy == IncrementalSafety::VeryUnsafe); + } + }; + loop_result = + Some((i, child_span, i == last && outermost, function, policy)); break; } @@ -264,14 +290,14 @@ impl GreenNode { // We now have a child that we can replace and a function to do so if // the loop found any results at all. - let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result - { - loop_result - } else { - // No child fully contains the replacement. - eprintln!("no child match"); - return false; - }; + let (child_idx, child_span, child_outermost, func, policy) = + if let Some(loop_result) = loop_result { + loop_result + } else { + // No child fully contains the replacement. + eprintln!("no child match"); + return Err(false); + }; eprintln!("aquired function, policy {:?}", policy); @@ -282,9 +308,10 @@ impl GreenNode { new_children } else { eprintln!("function failed"); - return false; + return Err(false); }; - let child_mode = self.children[child_idx].kind().mode().apply(mode); + let child_mode = + self.children[child_idx].kind().mode().child_mode().as_token_mode(); eprintln!("child mode {:?}", child_mode); // Check if the children / child has the right type. @@ -298,7 +325,7 @@ impl GreenNode { eprintln!("must be a single replacement"); if new_children.len() != 1 { eprintln!("not a single replacement"); - return false; + return Err(false); } if match policy { @@ -310,32 +337,32 @@ impl GreenNode { } { if self.children[child_idx].kind() != new_children[0].kind() { eprintln!("not the same kind"); - return false; + return Err(false); } } } // Do not accept unclosed nodes if the old node did not use to be at the // right edge of the tree. - if !outermost + if !child_outermost && new_children .iter() .flat_map(|x| x.errors()) .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) { eprintln!("unclosed node"); - return false; + return Err(false); } // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { + if mode.as_token_mode() == TokenMode::Markup { if child_idx > 0 { if self.children[child_idx - 1].kind().incremental_safety() == IncrementalSafety::EnsureRightWhitespace && !new_children[0].kind().is_whitespace() { eprintln!("left whitespace missing"); - return false; + return Err(false); } } @@ -351,8 +378,12 @@ impl GreenNode { } match child.kind().incremental_safety() { - IncrementalSafety::EnsureAtStart if !new_at_start => return false, - IncrementalSafety::EnsureNotAtStart if new_at_start => return false, + IncrementalSafety::EnsureAtStart if !new_at_start => { + return Err(false); + } + IncrementalSafety::EnsureNotAtStart if new_at_start => { + return Err(false); + } _ => {} } break; @@ -361,8 +392,12 @@ impl GreenNode { eprintln!("... replacing"); + let old_len = self.children[child_idx].len(); + let new_len: usize = new_children.iter().map(Green::len).sum(); + self.children.splice(child_idx .. child_idx + 1, new_children); - true + self.data.set_len(self.data.len + new_len - old_len); + Ok(()) } } @@ -414,6 +449,11 @@ impl GreenData { pub fn len(&self) -> usize { self.len } + + /// Set the length of the node. + pub fn set_len(&mut self, len: usize) { + self.len = len; + } } impl From for Green { @@ -939,24 +979,18 @@ impl NodeKind { | Self::Escape(_) | Self::Strong | Self::Emph + | Self::Raw(_) | Self::Math(_) => NodeMode::Markup, Self::Template | Self::Block - | Self::None - | Self::Auto | Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group + | Self::LetExpr + | Self::IfExpr + | Self::WhileExpr + | Self::ForExpr + | Self::ImportExpr | Self::Call + | Self::IncludeExpr | Self::LineComment | Self::BlockComment | Self::Error(_, _) @@ -969,22 +1003,25 @@ impl NodeKind { pub fn reparsing_function( &self, parent_mode: TokenMode, - ) -> Option<(fn(&str, bool) -> Option>, IncrementalSafety)> { + ) -> Result< + (fn(&str, bool) -> Option>, IncrementalSafety), + IncrementalSafety, + > { let policy = self.incremental_safety(); - if policy == IncrementalSafety::Unsafe { - return None; + if policy.unsafe_interior() { + return Err(policy); } let mode = self.mode(); if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { - return None; + return Err(policy); } if mode != NodeMode::Markup && parent_mode == TokenMode::Code && policy == IncrementalSafety::AtomicPrimary { - return Some((parse_atomic, policy)); + return Ok((parse_atomic, policy)); } let parser: fn(&str, bool) -> _ = match mode { @@ -995,7 +1032,7 @@ impl NodeKind { NodeMode::Universal => parse_markup_elements, }; - Some((parser, policy)) + Ok((parser, policy)) } /// Whether it is safe to do incremental parsing on this node. Never allow @@ -1042,7 +1079,8 @@ impl NodeKind { // other expressions. Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, - // These keywords change what kind of expression the parent is. + // These keywords change what kind of expression the parent is and + // how far the expression would go. Self::Let | Self::If | Self::Else @@ -1055,7 +1093,7 @@ impl NodeKind { | Self::Set | Self::Import | Self::Include - | Self::From => IncrementalSafety::Unsafe, + | Self::From => IncrementalSafety::VeryUnsafe, // This is a backslash followed by a space. But changing it to // anything else is fair game. @@ -1309,6 +1347,17 @@ pub enum IncrementalSafety { /// Changing an unsafe node or any of its children will trigger undefined /// behavior. Change the parents instead. Unsafe, + /// Its unsafe for two! + VeryUnsafe, +} + +impl IncrementalSafety { + pub fn unsafe_interior(&self) -> bool { + match self { + Self::Unsafe | Self::VeryUnsafe => true, + _ => false, + } + } } /// This enum describes which mode a token of [`NodeKind`] can appear in. @@ -1319,17 +1368,34 @@ pub enum NodeMode { /// The token can only appear in code mode. Code, /// The token can appear in either mode. Look at the parent node to decide - /// which mode it is in. + /// which mode it is in. After an apply, this is equivalent to Markup. Universal, } impl NodeMode { - /// Returns the new [`TokenMode`] given the old one. - pub fn apply(&self, old: TokenMode) -> TokenMode { + /// Returns a new mode considering the parent node. + pub fn apply(&self, old: Self) -> Self { match self { - Self::Markup => TokenMode::Markup, + Self::Markup => Self::Markup, + Self::Code => Self::Code, + Self::Universal if old != Self::Markup => Self::Code, + Self::Universal => Self::Universal, + } + } + + /// Return the corresponding token mode. + pub fn as_token_mode(&self) -> TokenMode { + match self { + Self::Markup | Self::Universal => TokenMode::Markup, Self::Code => TokenMode::Code, - Self::Universal => old, + } + } + + /// The mode of the children of this node. + pub fn child_mode(&self) -> Self { + match self { + Self::Markup => Self::Markup, + Self::Code | Self::Universal => Self::Code, } } }