Fix incremental parsing bugs

This commit is contained in:
Laurenz 2022-01-31 13:26:40 +01:00
parent b1a82ae22d
commit 8f37189d6f
6 changed files with 95 additions and 62 deletions

View File

@ -408,7 +408,7 @@ mod tests {
} }
#[test] #[test]
fn test_incremental_temperature() { fn test_layout_incremental_temperature() {
let mut cache = LayoutCache::new(EvictionPolicy::None, 20); let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
let regions = zero_regions(); let regions = zero_regions();
cache.policy = EvictionPolicy::None; cache.policy = EvictionPolicy::None;
@ -447,7 +447,7 @@ mod tests {
} }
#[test] #[test]
fn test_incremental_properties() { fn test_layout_incremental_properties() {
let mut cache = LayoutCache::new(EvictionPolicy::None, 20); let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
cache.policy = EvictionPolicy::None; cache.policy = EvictionPolicy::None;
cache.insert(0, FramesEntry::new(empty_frames(), 1)); cache.insert(0, FramesEntry::new(empty_frames(), 1));

View File

@ -116,7 +116,7 @@ impl Reparser<'_> {
// This is because in Markup mode, we want to examine all nodes // This is because in Markup mode, we want to examine all nodes
// touching a replacement but in code we want to atomically replace. // touching a replacement but in code we want to atomically replace.
if child_span.contains(&self.replace_range.start) if child_span.contains(&self.replace_range.start)
|| (mode == TokenMode::Markup || (child_mode == TokenMode::Markup
&& self.replace_range.start == child_span.end) && self.replace_range.start == child_span.end)
{ {
first = Some((i, offset)); first = Some((i, offset));
@ -139,12 +139,12 @@ impl Reparser<'_> {
// neighbor! // neighbor!
if child_span.contains(&self.replace_range.end) if child_span.contains(&self.replace_range.end)
|| self.replace_range.end == child_span.end || self.replace_range.end == child_span.end
&& (mode != TokenMode::Markup || i + 1 == original_count) && (child_mode != TokenMode::Markup || i + 1 == original_count)
{ {
outermost &= i + 1 == original_count; outermost &= i + 1 == original_count;
last = Some((i, offset + child.len())); last = Some((i, offset + child.len()));
break; break;
} else if mode != TokenMode::Markup } else if child_mode != TokenMode::Markup
|| !child.kind().succession_rule().safe_in_markup() || !child.kind().succession_rule().safe_in_markup()
{ {
break; break;
@ -404,10 +404,10 @@ impl NodeKind {
let mode = self.mode().unwrap_or(parent_mode); let mode = self.mode().unwrap_or(parent_mode);
match self.succession_rule() { match self.succession_rule() {
SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
SuccessionRule::AtomicPrimary if mode == TokenMode::Code => { SuccessionRule::AtomicPrimary => match mode {
Some(parse_atomic) TokenMode::Code => Some(parse_atomic),
} TokenMode::Markup => Some(parse_atomic_markup),
SuccessionRule::AtomicPrimary => Some(parse_atomic_markup), },
SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
NodeKind::Markup(_) => Some(parse_markup), NodeKind::Markup(_) => Some(parse_markup),
NodeKind::Template => Some(parse_template), NodeKind::Template => Some(parse_template),
@ -601,28 +601,29 @@ impl SuccessionRule {
} }
#[cfg(test)] #[cfg(test)]
#[rustfmt::skip]
mod tests { mod tests {
use super::*; use super::*;
use crate::parse::parse; use crate::parse::parse;
use crate::parse::tests::check;
use crate::source::SourceFile; use crate::source::SourceFile;
#[test] #[track_caller]
#[rustfmt::skip] fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
fn test_incremental_parse() { let mut source = SourceFile::detached(prev);
#[track_caller] let range = source.edit(range, with);
fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) { check(source.src(), source.root(), &parse(source.src()));
let mut source = SourceFile::detached(prev); assert_eq!(range, goal);
let range = source.edit(range, with); }
assert_eq!(range, goal);
assert_eq!(parse(source.src()), *source.root());
}
// Test simple replacements. #[test]
fn test_parse_incremental_simple_replacements() {
test("hello world", 6 .. 11, "walkers", 5 .. 13); test("hello world", 6 .. 11, "walkers", 5 .. 13);
test("some content", 0..12, "", 0..0); test("some content", 0..12, "", 0..0);
test("", 0..0, "do it", 0..5); test("", 0..0, "do it", 0..5);
test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a d e", 1 .. 3, " b c d", 0 .. 8);
test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
test("{a}", 1 .. 2, "b", 1 .. 2);
test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
test("= A heading", 3 .. 3, "n evocative", 2 .. 22); test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
test("your thing", 5 .. 5, "a", 4 .. 11); test("your thing", 5 .. 5, "a", 4 .. 11);
@ -641,8 +642,12 @@ mod tests {
test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
}
// Test the whitespace invariants. #[test]
fn test_parse_incremental_whitespace_invariants() {
test("hello \\ world", 7 .. 8, "a ", 6 .. 14); test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
test("hello \\ world", 7 .. 8, " a", 6 .. 14); test("hello \\ world", 7 .. 8, " a", 6 .. 14);
test("x = y", 1 .. 1, " + y", 0 .. 6); test("x = y", 1 .. 1, " + y", 0 .. 6);
@ -652,8 +657,10 @@ mod tests {
test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22); test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22);
test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6);
}
// Test type invariants. #[test]
fn test_parse_incremental_type_invariants() {
test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
@ -664,9 +671,11 @@ mod tests {
test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14); test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
test("a b c", 1 .. 1, "{[}", 0 .. 8); test("a b c", 1 .. 1, "{[}", 0 .. 5);
}
// Test unclosed things. #[test]
fn test_parse_incremental_wrongly_or_unclosed_things() {
test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6); test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12); test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17); test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
@ -675,9 +684,10 @@ mod tests {
test("a b c", 1 .. 1, " /* letters", 0 .. 16); test("a b c", 1 .. 1, " /* letters", 0 .. 16);
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
test("~~~~", 2 .. 2, "[]", 1 .. 5);
// Test raw tokens. test("a[]b", 2 .. 2, "{", 1 .. 4);
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test("[hello]", 2 .. 3, "]", 0 .. 7);
test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); test("{a}", 1 .. 2, "b", 1 .. 2);
test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
} }
} }

View File

@ -68,7 +68,7 @@ pub fn parse_atomic(
) -> Option<(Vec<Green>, bool)> { ) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
primary(&mut p, true).ok()?; primary(&mut p, true).ok()?;
p.consume_unterminated() p.consume_open_ended()
} }
/// Parse an atomic primary. Returns `Some` if all of the input was consumed. /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
@ -80,7 +80,7 @@ pub fn parse_atomic_markup(
) -> Option<(Vec<Green>, bool)> { ) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
markup_expr(&mut p); markup_expr(&mut p);
p.consume_unterminated() p.consume_open_ended()
} }
/// Parse a template literal. Returns `Some` if all of the input was consumed. /// Parse a template literal. Returns `Some` if all of the input was consumed.
@ -919,3 +919,21 @@ fn comment(p: &mut Parser) -> ParseResult {
_ => Err(ParseError), _ => Err(ParseError),
} }
} }
#[cfg(test)]
mod tests {
use std::fmt::Debug;
#[track_caller]
pub fn check<T>(src: &str, found: T, expected: T)
where
T: Debug + PartialEq,
{
if found != expected {
println!("source: {src:?}");
println!("expected: {expected:#?}");
println!("found: {found:#?}");
panic!("test failed");
}
}
}

View File

@ -8,6 +8,8 @@ use crate::util::EcoString;
/// A convenient token-based parser. /// A convenient token-based parser.
pub struct Parser<'s> { pub struct Parser<'s> {
/// Offsets the indentation on the first line of the source.
column_offset: usize,
/// An iterator over the source tokens. /// An iterator over the source tokens.
tokens: Tokens<'s>, tokens: Tokens<'s>,
/// Whether we are at the end of the file or of a group. /// Whether we are at the end of the file or of a group.
@ -22,11 +24,10 @@ pub struct Parser<'s> {
groups: Vec<GroupEntry>, groups: Vec<GroupEntry>,
/// The children of the currently built node. /// The children of the currently built node.
children: Vec<Green>, children: Vec<Green>,
/// Is `Some` if there is an unterminated group at the last position where /// Whether the last group was not correctly terminated.
/// groups were terminated. unterminated_group: bool,
last_unterminated: Option<usize>, /// Whether a group terminator was found, that did not close a group.
/// Offsets the indentation on the first line of the source. stray_terminator: bool,
column_offset: usize,
} }
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
@ -35,6 +36,7 @@ impl<'s> Parser<'s> {
let mut tokens = Tokens::new(src, mode); let mut tokens = Tokens::new(src, mode);
let current = tokens.next(); let current = tokens.next();
Self { Self {
column_offset: 0,
tokens, tokens,
eof: current.is_none(), eof: current.is_none(),
current, current,
@ -42,8 +44,8 @@ impl<'s> Parser<'s> {
current_start: 0, current_start: 0,
groups: vec![], groups: vec![],
children: vec![], children: vec![],
last_unterminated: None, unterminated_group: false,
column_offset: 0, stray_terminator: false,
} }
} }
@ -70,7 +72,7 @@ impl<'s> Parser<'s> {
/// End the parsing process and return multiple children and whether the /// End the parsing process and return multiple children and whether the
/// last token was terminated, even if there remains stuff in the string. /// last token was terminated, even if there remains stuff in the string.
pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> { pub fn consume_open_ended(self) -> Option<(Vec<Green>, bool)> {
self.terminated().then(|| (self.children, self.tokens.terminated())) self.terminated().then(|| (self.children, self.tokens.terminated()))
} }
@ -120,6 +122,13 @@ impl<'s> Parser<'s> {
/// Consume the current token and also trailing trivia. /// Consume the current token and also trailing trivia.
pub fn eat(&mut self) { pub fn eat(&mut self) {
self.stray_terminator |= match self.current {
Some(NodeKind::RightParen) => !self.inside(Group::Paren),
Some(NodeKind::RightBracket) => !self.inside(Group::Bracket),
Some(NodeKind::RightBrace) => !self.inside(Group::Brace),
_ => false,
};
self.prev_end = self.tokens.index(); self.prev_end = self.tokens.index();
self.bump(); self.bump();
@ -259,13 +268,14 @@ impl<'s> Parser<'s> {
/// This panics if no group was started. /// This panics if no group was started.
#[track_caller] #[track_caller]
pub fn end_group(&mut self) { pub fn end_group(&mut self) {
// If another group closes after a group with the missing terminator,
// its scope of influence ends here and no longer taints the rest of the
// reparse.
self.unterminated_group = false;
let group_mode = self.tokens.mode(); let group_mode = self.tokens.mode();
let group = self.groups.pop().expect("no started group"); let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode); self.tokens.set_mode(group.prev_mode);
self.repeek();
if self.last_unterminated != Some(self.prev_end()) {
self.last_unterminated = None;
}
let mut rescan = self.tokens.mode() != group_mode; let mut rescan = self.tokens.mode() != group_mode;
@ -280,12 +290,16 @@ impl<'s> Parser<'s> {
Group::Imports => None, Group::Imports => None,
} { } {
if self.current.as_ref() == Some(&end) { if self.current.as_ref() == Some(&end) {
// Bump the delimeter and return. No need to rescan in this case. // Bump the delimeter and return. No need to rescan in this
// case. Also, we know that the delimiter is not stray even
// though we already removed the group.
let s = self.stray_terminator;
self.eat(); self.eat();
self.stray_terminator = s;
rescan = false; rescan = false;
} else if required { } else if required {
self.push_error(format_eco!("expected {}", end)); self.push_error(format_eco!("expected {}", end));
self.last_unterminated = Some(self.prev_end()); self.unterminated_group = true;
} }
} }
@ -299,13 +313,14 @@ impl<'s> Parser<'s> {
self.prev_end = self.tokens.index(); self.prev_end = self.tokens.index();
self.current_start = self.tokens.index(); self.current_start = self.tokens.index();
self.current = self.tokens.next(); self.current = self.tokens.next();
self.repeek();
} }
self.repeek();
} }
/// Checks if all groups were correctly terminated. /// Checks if all groups were correctly terminated.
pub fn terminated(&self) -> bool { fn terminated(&self) -> bool {
self.groups.is_empty() && self.last_unterminated.is_none() self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
} }
/// Low-level bump that consumes exactly one token without special trivia /// Low-level bump that consumes exactly one token without special trivia

View File

@ -568,9 +568,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
#[cfg(test)] #[cfg(test)]
#[allow(non_snake_case)] #[allow(non_snake_case)]
mod tests { mod tests {
use std::fmt::Debug;
use super::*; use super::*;
use crate::parse::tests::check;
use ErrorPos::*; use ErrorPos::*;
use NodeKind::*; use NodeKind::*;
@ -687,19 +686,6 @@ mod tests {
}}; }};
} }
#[track_caller]
fn check<T>(src: &str, found: T, expected: T)
where
T: Debug + PartialEq,
{
if found != expected {
println!("source: {src:?}");
println!("expected: {expected:#?}");
println!("found: {found:#?}");
panic!("test failed");
}
}
#[test] #[test]
fn test_tokenize_brackets() { fn test_tokenize_brackets() {
// Test in markup. // Test in markup.

View File

@ -392,6 +392,10 @@ fn print_error(source: &SourceFile, line: usize, error: &Error) {
fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
let supplements = [ let supplements = [
"[", "[",
"]",
"{",
"}",
"(",
")", ")",
"#rect()", "#rect()",
"a word", "a word",