mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Fix incremental parsing bugs
This commit is contained in:
parent
b1a82ae22d
commit
8f37189d6f
@ -408,7 +408,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incremental_temperature() {
|
||||
fn test_layout_incremental_temperature() {
|
||||
let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
|
||||
let regions = zero_regions();
|
||||
cache.policy = EvictionPolicy::None;
|
||||
@ -447,7 +447,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incremental_properties() {
|
||||
fn test_layout_incremental_properties() {
|
||||
let mut cache = LayoutCache::new(EvictionPolicy::None, 20);
|
||||
cache.policy = EvictionPolicy::None;
|
||||
cache.insert(0, FramesEntry::new(empty_frames(), 1));
|
||||
|
@ -116,7 +116,7 @@ impl Reparser<'_> {
|
||||
// This is because in Markup mode, we want to examine all nodes
|
||||
// touching a replacement but in code we want to atomically replace.
|
||||
if child_span.contains(&self.replace_range.start)
|
||||
|| (mode == TokenMode::Markup
|
||||
|| (child_mode == TokenMode::Markup
|
||||
&& self.replace_range.start == child_span.end)
|
||||
{
|
||||
first = Some((i, offset));
|
||||
@ -139,12 +139,12 @@ impl Reparser<'_> {
|
||||
// neighbor!
|
||||
if child_span.contains(&self.replace_range.end)
|
||||
|| self.replace_range.end == child_span.end
|
||||
&& (mode != TokenMode::Markup || i + 1 == original_count)
|
||||
&& (child_mode != TokenMode::Markup || i + 1 == original_count)
|
||||
{
|
||||
outermost &= i + 1 == original_count;
|
||||
last = Some((i, offset + child.len()));
|
||||
break;
|
||||
} else if mode != TokenMode::Markup
|
||||
} else if child_mode != TokenMode::Markup
|
||||
|| !child.kind().succession_rule().safe_in_markup()
|
||||
{
|
||||
break;
|
||||
@ -404,10 +404,10 @@ impl NodeKind {
|
||||
let mode = self.mode().unwrap_or(parent_mode);
|
||||
match self.succession_rule() {
|
||||
SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None,
|
||||
SuccessionRule::AtomicPrimary if mode == TokenMode::Code => {
|
||||
Some(parse_atomic)
|
||||
}
|
||||
SuccessionRule::AtomicPrimary => Some(parse_atomic_markup),
|
||||
SuccessionRule::AtomicPrimary => match mode {
|
||||
TokenMode::Code => Some(parse_atomic),
|
||||
TokenMode::Markup => Some(parse_atomic_markup),
|
||||
},
|
||||
SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self {
|
||||
NodeKind::Markup(_) => Some(parse_markup),
|
||||
NodeKind::Template => Some(parse_template),
|
||||
@ -601,28 +601,29 @@ impl SuccessionRule {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[rustfmt::skip]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::parse::parse;
|
||||
use crate::parse::tests::check;
|
||||
use crate::source::SourceFile;
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn test_incremental_parse() {
|
||||
#[track_caller]
|
||||
fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
|
||||
let mut source = SourceFile::detached(prev);
|
||||
let range = source.edit(range, with);
|
||||
assert_eq!(range, goal);
|
||||
assert_eq!(parse(source.src()), *source.root());
|
||||
}
|
||||
#[track_caller]
|
||||
fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
|
||||
let mut source = SourceFile::detached(prev);
|
||||
let range = source.edit(range, with);
|
||||
check(source.src(), source.root(), &parse(source.src()));
|
||||
assert_eq!(range, goal);
|
||||
}
|
||||
|
||||
// Test simple replacements.
|
||||
#[test]
|
||||
fn test_parse_incremental_simple_replacements() {
|
||||
test("hello world", 6 .. 11, "walkers", 5 .. 13);
|
||||
test("some content", 0..12, "", 0..0);
|
||||
test("", 0..0, "do it", 0..5);
|
||||
test("a d e", 1 .. 3, " b c d", 0 .. 8);
|
||||
test("a #f() e", 1 .. 6, " b c d", 0 .. 8);
|
||||
test("{a}", 1 .. 2, "b", 1 .. 2);
|
||||
test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9);
|
||||
test("= A heading", 3 .. 3, "n evocative", 2 .. 22);
|
||||
test("your thing", 5 .. 5, "a", 4 .. 11);
|
||||
@ -641,8 +642,12 @@ mod tests {
|
||||
test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37);
|
||||
test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
|
||||
test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
|
||||
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
|
||||
test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
|
||||
}
|
||||
|
||||
// Test the whitespace invariants.
|
||||
#[test]
|
||||
fn test_parse_incremental_whitespace_invariants() {
|
||||
test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
|
||||
test("hello \\ world", 7 .. 8, " a", 6 .. 14);
|
||||
test("x = y", 1 .. 1, " + y", 0 .. 6);
|
||||
@ -652,8 +657,10 @@ mod tests {
|
||||
test("#let x = (1, 2 + ; Five\r\n\r", 19..22, "2.", 18..22);
|
||||
test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
|
||||
test("hey #myfriend", 4 .. 4, "\\", 3 .. 6);
|
||||
}
|
||||
|
||||
// Test type invariants.
|
||||
#[test]
|
||||
fn test_parse_incremental_type_invariants() {
|
||||
test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
|
||||
test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15);
|
||||
test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
|
||||
@ -664,9 +671,11 @@ mod tests {
|
||||
test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13);
|
||||
test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26);
|
||||
test("{(1, 2)}", 1 .. 1, "while ", 0 .. 14);
|
||||
test("a b c", 1 .. 1, "{[}", 0 .. 8);
|
||||
test("a b c", 1 .. 1, "{[}", 0 .. 5);
|
||||
}
|
||||
|
||||
// Test unclosed things.
|
||||
#[test]
|
||||
fn test_parse_incremental_wrongly_or_unclosed_things() {
|
||||
test(r#"{"hi"}"#, 4 .. 5, "c", 0 .. 6);
|
||||
test(r"this \u{abcd}", 8 .. 9, "", 5 .. 12);
|
||||
test(r"this \u{abcd} that", 12 .. 13, "", 0 .. 17);
|
||||
@ -675,9 +684,10 @@ mod tests {
|
||||
test("a b c", 1 .. 1, " /* letters", 0 .. 16);
|
||||
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
|
||||
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
|
||||
|
||||
// Test raw tokens.
|
||||
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
|
||||
test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18);
|
||||
test("~~~~", 2 .. 2, "[]", 1 .. 5);
|
||||
test("a[]b", 2 .. 2, "{", 1 .. 4);
|
||||
test("[hello]", 2 .. 3, "]", 0 .. 7);
|
||||
test("{a}", 1 .. 2, "b", 1 .. 2);
|
||||
test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13);
|
||||
}
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ pub fn parse_atomic(
|
||||
) -> Option<(Vec<Green>, bool)> {
|
||||
let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
|
||||
primary(&mut p, true).ok()?;
|
||||
p.consume_unterminated()
|
||||
p.consume_open_ended()
|
||||
}
|
||||
|
||||
/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
|
||||
@ -80,7 +80,7 @@ pub fn parse_atomic_markup(
|
||||
) -> Option<(Vec<Green>, bool)> {
|
||||
let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
|
||||
markup_expr(&mut p);
|
||||
p.consume_unterminated()
|
||||
p.consume_open_ended()
|
||||
}
|
||||
|
||||
/// Parse a template literal. Returns `Some` if all of the input was consumed.
|
||||
@ -919,3 +919,21 @@ fn comment(p: &mut Parser) -> ParseResult {
|
||||
_ => Err(ParseError),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[track_caller]
|
||||
pub fn check<T>(src: &str, found: T, expected: T)
|
||||
where
|
||||
T: Debug + PartialEq,
|
||||
{
|
||||
if found != expected {
|
||||
println!("source: {src:?}");
|
||||
println!("expected: {expected:#?}");
|
||||
println!("found: {found:#?}");
|
||||
panic!("test failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,8 @@ use crate::util::EcoString;
|
||||
|
||||
/// A convenient token-based parser.
|
||||
pub struct Parser<'s> {
|
||||
/// Offsets the indentation on the first line of the source.
|
||||
column_offset: usize,
|
||||
/// An iterator over the source tokens.
|
||||
tokens: Tokens<'s>,
|
||||
/// Whether we are at the end of the file or of a group.
|
||||
@ -22,11 +24,10 @@ pub struct Parser<'s> {
|
||||
groups: Vec<GroupEntry>,
|
||||
/// The children of the currently built node.
|
||||
children: Vec<Green>,
|
||||
/// Is `Some` if there is an unterminated group at the last position where
|
||||
/// groups were terminated.
|
||||
last_unterminated: Option<usize>,
|
||||
/// Offsets the indentation on the first line of the source.
|
||||
column_offset: usize,
|
||||
/// Whether the last group was not correctly terminated.
|
||||
unterminated_group: bool,
|
||||
/// Whether a group terminator was found, that did not close a group.
|
||||
stray_terminator: bool,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
@ -35,6 +36,7 @@ impl<'s> Parser<'s> {
|
||||
let mut tokens = Tokens::new(src, mode);
|
||||
let current = tokens.next();
|
||||
Self {
|
||||
column_offset: 0,
|
||||
tokens,
|
||||
eof: current.is_none(),
|
||||
current,
|
||||
@ -42,8 +44,8 @@ impl<'s> Parser<'s> {
|
||||
current_start: 0,
|
||||
groups: vec![],
|
||||
children: vec![],
|
||||
last_unterminated: None,
|
||||
column_offset: 0,
|
||||
unterminated_group: false,
|
||||
stray_terminator: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -70,7 +72,7 @@ impl<'s> Parser<'s> {
|
||||
|
||||
/// End the parsing process and return multiple children and whether the
|
||||
/// last token was terminated, even if there remains stuff in the string.
|
||||
pub fn consume_unterminated(self) -> Option<(Vec<Green>, bool)> {
|
||||
pub fn consume_open_ended(self) -> Option<(Vec<Green>, bool)> {
|
||||
self.terminated().then(|| (self.children, self.tokens.terminated()))
|
||||
}
|
||||
|
||||
@ -120,6 +122,13 @@ impl<'s> Parser<'s> {
|
||||
|
||||
/// Consume the current token and also trailing trivia.
|
||||
pub fn eat(&mut self) {
|
||||
self.stray_terminator |= match self.current {
|
||||
Some(NodeKind::RightParen) => !self.inside(Group::Paren),
|
||||
Some(NodeKind::RightBracket) => !self.inside(Group::Bracket),
|
||||
Some(NodeKind::RightBrace) => !self.inside(Group::Brace),
|
||||
_ => false,
|
||||
};
|
||||
|
||||
self.prev_end = self.tokens.index();
|
||||
self.bump();
|
||||
|
||||
@ -259,13 +268,14 @@ impl<'s> Parser<'s> {
|
||||
/// This panics if no group was started.
|
||||
#[track_caller]
|
||||
pub fn end_group(&mut self) {
|
||||
// If another group closes after a group with the missing terminator,
|
||||
// its scope of influence ends here and no longer taints the rest of the
|
||||
// reparse.
|
||||
self.unterminated_group = false;
|
||||
|
||||
let group_mode = self.tokens.mode();
|
||||
let group = self.groups.pop().expect("no started group");
|
||||
self.tokens.set_mode(group.prev_mode);
|
||||
self.repeek();
|
||||
if self.last_unterminated != Some(self.prev_end()) {
|
||||
self.last_unterminated = None;
|
||||
}
|
||||
|
||||
let mut rescan = self.tokens.mode() != group_mode;
|
||||
|
||||
@ -280,12 +290,16 @@ impl<'s> Parser<'s> {
|
||||
Group::Imports => None,
|
||||
} {
|
||||
if self.current.as_ref() == Some(&end) {
|
||||
// Bump the delimeter and return. No need to rescan in this case.
|
||||
// Bump the delimeter and return. No need to rescan in this
|
||||
// case. Also, we know that the delimiter is not stray even
|
||||
// though we already removed the group.
|
||||
let s = self.stray_terminator;
|
||||
self.eat();
|
||||
self.stray_terminator = s;
|
||||
rescan = false;
|
||||
} else if required {
|
||||
self.push_error(format_eco!("expected {}", end));
|
||||
self.last_unterminated = Some(self.prev_end());
|
||||
self.unterminated_group = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -299,13 +313,14 @@ impl<'s> Parser<'s> {
|
||||
self.prev_end = self.tokens.index();
|
||||
self.current_start = self.tokens.index();
|
||||
self.current = self.tokens.next();
|
||||
self.repeek();
|
||||
}
|
||||
|
||||
self.repeek();
|
||||
}
|
||||
|
||||
/// Checks if all groups were correctly terminated.
|
||||
pub fn terminated(&self) -> bool {
|
||||
self.groups.is_empty() && self.last_unterminated.is_none()
|
||||
fn terminated(&self) -> bool {
|
||||
self.groups.is_empty() && !self.unterminated_group && !self.stray_terminator
|
||||
}
|
||||
|
||||
/// Low-level bump that consumes exactly one token without special trivia
|
||||
|
@ -568,9 +568,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
|
||||
#[cfg(test)]
|
||||
#[allow(non_snake_case)]
|
||||
mod tests {
|
||||
use std::fmt::Debug;
|
||||
|
||||
use super::*;
|
||||
use crate::parse::tests::check;
|
||||
|
||||
use ErrorPos::*;
|
||||
use NodeKind::*;
|
||||
@ -687,19 +686,6 @@ mod tests {
|
||||
}};
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn check<T>(src: &str, found: T, expected: T)
|
||||
where
|
||||
T: Debug + PartialEq,
|
||||
{
|
||||
if found != expected {
|
||||
println!("source: {src:?}");
|
||||
println!("expected: {expected:#?}");
|
||||
println!("found: {found:#?}");
|
||||
panic!("test failed");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_brackets() {
|
||||
// Test in markup.
|
||||
|
@ -392,6 +392,10 @@ fn print_error(source: &SourceFile, line: usize, error: &Error) {
|
||||
fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool {
|
||||
let supplements = [
|
||||
"[",
|
||||
"]",
|
||||
"{",
|
||||
"}",
|
||||
"(",
|
||||
")",
|
||||
"#rect()",
|
||||
"a word",
|
||||
|
Loading…
x
Reference in New Issue
Block a user