Fix incremental parsing bugs

This commit is contained in:
Laurenz 2022-01-31 13:26:40 +01:00
parent b1a82ae22d
commit 8f37189d6f
6 changed files with 95 additions and 62 deletions

View File

@ -408,7 +408,7 @@ mod tests {
}
#[test]
fn test_incremental_temperature() {
fn test_layout_incremental_temperature() {
let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
let regions = zero_regions();
cache.policy = EvictionPolicy::None;
@ -447,7 +447,7 @@ mod tests {
}
#[test]
fn test_incremental_properties() {
fn test_layout_incremental_properties() {
let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
cache.policy = EvictionPolicy::None;
cache.insert(0, FramesEntry::new(empty_frames(), 1));

View File

@ -116,7 +116,7 @@ impl Reparser<'_> {
// This is because in Markup mode, we want to examine all nodes
// touching a replacement but in code we want to atomically replace.
if child_span.contains(&self.replace_range.start)
|| (mode == TokenMode::Markup
|| (child_mode == TokenMode::Markup
&& self.replace_range.start == child_span.end)
{
first = Some((i, offset));
@ -139,12 +139,12 @@ impl Reparser<'_> {
// neighbor!
if child_span.contains(&self.replace_range.end)
|| self.replace_range.end == child_span.end
&& (mode != TokenMode::Markup || i + 1 == original_count)
&& (child_mode != TokenMode::Markup || i + 1 == original_count)
{
outermost &= i + 1 == original_count;
last = Some((i, offset + child.len()));
break;
} else if mode != TokenMode::Markup
} else if child_mode != TokenMode::Markup
|| !child.kind().succession_rule().safe_in_markup()
{
break;
@ -404,10 +404,10 @@ impl NodeKind {
let mode = self.mode().unwrap_or(parent_mode);
match self.succession_rule() {
SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
SuccessionRule::AtomicPrimary if mode == TokenMode::Code => {
Some(parse_atomic)
}
SuccessionRule::AtomicPrimary => Some(parse_atomic_markup),
SuccessionRule::AtomicPrimary => match mode {
TokenMode::Code => Some(parse_atomic),
TokenMode::Markup => Some(parse_atomic_markup),
},
SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
NodeKind::Markup(_) => Some(parse_markup),
NodeKind::Template => Some(parse_template),
@ -601,28 +601,29 @@ impl SuccessionRule {
}
#[cfg(test)]
#[rustfmt::skip]
mod tests {
use super::*;
use crate::parse::parse;
use crate::parse::tests::check;
use crate::source::SourceFile;
#[test]
#[rustfmt::skip]
fn test_incremental_parse() {
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
let mut source = SourceFile::detached(prev);
let range = source.edit(range, with);
assert_eq!(range, goal);
assert_eq!(parse(source.src()), *source.root());
}
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
let mut source = SourceFile::detached(prev);
let range = source.edit(range, with);
check(source.src(), source.root(), &parse(source.src()));
assert_eq!(range, goal);
}
// Test simple replacements.
#[test]
fn test_parse_incremental_simple_replacements() {
test("hello world", 6 .. 11, "walkers", 5 .. 13);
test("some content", 0..12, "", 0..0);
test("", 0..0, "do it", 0..5);
test("a d e", 1 .. 3, " b c d", 0 .. 8);
test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
test("{a}", 1 .. 2, "b", 1 .. 2);
test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
test("your thing", 5 .. 5, "a", 4 .. 11);
@ -641,8 +642,12 @@ mod tests {
test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
}
// Test the whitespace invariants.
#[test]
fn test_parse_incremental_whitespace_invariants() {
test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
test("hello \\ world", 7 .. 8, " a", 6 .. 14);
test("x = y", 1 .. 1, " + y", 0 .. 6);
@ -652,8 +657,10 @@ mod tests {
test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22);
test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
test("hey #myfriend", 4 .. 4, "\\", 3 .. 6);
}
// Test type invariants.
#[test]
fn test_parse_incremental_type_invariants() {
test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
@ -664,9 +671,11 @@ mod tests {
test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
test("a b c", 1 .. 1, "{[}", 0 .. 8);
test("a b c", 1 .. 1, "{[}", 0 .. 5);
}
// Test unclosed things.
#[test]
fn test_parse_incremental_wrongly_or_unclosed_things() {
test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
@ -675,9 +684,10 @@ mod tests {
test("a b c", 1 .. 1, " /* letters", 0 .. 16);
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
// Test raw tokens.
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
test("~~~~", 2 .. 2, "[]", 1 .. 5);
test("a[]b", 2 .. 2, "{", 1 .. 4);
test("[hello]", 2 .. 3, "]", 0 .. 7);
test("{a}", 1 .. 2, "b", 1 .. 2);
test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
}
}

View File

@ -68,7 +68,7 @@ pub fn parse_atomic(
) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
primary(&mut p, true).ok()?;
p.consume_unterminated()
p.consume_open_ended()
}
/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
@ -80,7 +80,7 @@ pub fn parse_atomic_markup(
) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
markup_expr(&mut p);
p.consume_unterminated()
p.consume_open_ended()
}
/// Parse a template literal. Returns `Some` if all of the input was consumed.
@ -919,3 +919,21 @@ fn comment(p: &mut Parser) -> ParseResult {
_ => Err(ParseError),
}
}
#[cfg(test)]
mod tests {
use std::fmt::Debug;
#[track_caller]
pub fn check<T>(src: &str, found: T, expected: T)
where
T: Debug + PartialEq,
{
if found != expected {
println!("source: {src:?}");
println!("expected: {expected:#?}");
println!("found: {found:#?}");
panic!("test failed");
}
}
}

View File

@ -8,6 +8,8 @@ use crate::util::EcoString;
/// A convenient token-based parser.
pub struct Parser<'s> {
/// Offsets the indentation on the first line of the source.
column_offset: usize,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
/// Whether we are at the end of the file or of a group.
@ -22,11 +24,10 @@ pub struct Parser<'s> {
groups: Vec<GroupEntry>,
/// The children of the currently built node.
children: Vec<Green>,
/// Is `Some` if there is an unterminated group at the last position where
/// groups were terminated.
last_unterminated: Option<usize>,
/// Offsets the indentation on the first line of the source.
column_offset: usize,
/// Whether the last group was not correctly terminated.
unterminated_group: bool,
/// Whether a group terminator was found, that did not close a group.
stray_terminator: bool,
}
impl<'s> Parser<'s> {
@ -35,6 +36,7 @@ impl<'s> Parser<'s> {
let mut tokens = Tokens::new(src, mode);
let current = tokens.next();
Self {
column_offset: 0,
tokens,
eof: current.is_none(),
current,
@ -42,8 +44,8 @@ impl<'s> Parser<'s> {
current_start: 0,
groups: vec![],
children: vec![],
last_unterminated: None,
column_offset: 0,
unterminated_group: false,
stray_terminator: false,
}
}
@ -70,7 +72,7 @@ impl<'s> Parser<'s> {
/// End the parsing process and return multiple children and whether the
/// last token was terminated, even if there remains stuff in the string.
pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
pub fn consume_open_ended(self) -> Option<(Vec<Green>, bool)> {
self.terminated().then(|| (self.children, self.tokens.terminated()))
}
@ -120,6 +122,13 @@ impl<'s> Parser<'s> {
/// Consume the current token and also trailing trivia.
pub fn eat(&mut self) {
self.stray_terminator |= match self.current {
Some(NodeKind::RightParen) => !self.inside(Group::Paren),
Some(NodeKind::RightBracket) => !self.inside(Group::Bracket),
Some(NodeKind::RightBrace) => !self.inside(Group::Brace),
_ => false,
};
self.prev_end = self.tokens.index();
self.bump();
@ -259,13 +268,14 @@ impl<'s> Parser<'s> {
/// This panics if no group was started.
#[track_caller]
pub fn end_group(&mut self) {
// If another group closes after a group with the missing terminator,
// its scope of influence ends here and no longer taints the rest of the
// reparse.
self.unterminated_group = false;
let group_mode = self.tokens.mode();
let group = self.groups.pop().expect("no started group");
self.tokens.set_mode(group.prev_mode);
self.repeek();
if self.last_unterminated != Some(self.prev_end()) {
self.last_unterminated = None;
}
let mut rescan = self.tokens.mode() != group_mode;
@ -280,12 +290,16 @@ impl<'s> Parser<'s> {
Group::Imports => None,
} {
if self.current.as_ref() == Some(&end) {
// Bump the delimeter and return. No need to rescan in this case.
// Bump the delimeter and return. No need to rescan in this
// case. Also, we know that the delimiter is not stray even
// though we already removed the group.
let s = self.stray_terminator;
self.eat();
self.stray_terminator = s;
rescan = false;
} else if required {
self.push_error(format_eco!("expected {}", end));
self.last_unterminated = Some(self.prev_end());
self.unterminated_group = true;
}
}
@ -299,13 +313,14 @@ impl<'s> Parser<'s> {
self.prev_end = self.tokens.index();
self.current_start = self.tokens.index();
self.current = self.tokens.next();
self.repeek();
}
self.repeek();
}
/// Checks if all groups were correctly terminated.
pub fn terminated(&self) -> bool {
self.groups.is_empty() && self.last_unterminated.is_none()
fn terminated(&self) -> bool {
self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
}
/// Low-level bump that consumes exactly one token without special trivia

View File

@ -568,9 +568,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
use std::fmt::Debug;
use super::*;
use crate::parse::tests::check;
use ErrorPos::*;
use NodeKind::*;
@ -687,19 +686,6 @@ mod tests {
}};
}
#[track_caller]
fn check<T>(src: &str, found: T, expected: T)
where
T: Debug + PartialEq,
{
if found != expected {
println!("source: {src:?}");
println!("expected: {expected:#?}");
println!("found: {found:#?}");
panic!("test failed");
}
}
#[test]
fn test_tokenize_brackets() {
// Test in markup.

View File

@ -392,6 +392,10 @@ fn print_error(source: &SourceFile, line: usize, error: &Error) {
fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
let supplements = [
"[",
"]",
"{",
"}",
"(",
")",
"#rect()",
"a word",