Clarity and bugfix

Fixes a bug where validation would wrongly reject an atomic primary reparse due to trailing whitespace.

Co-Authored-By: Martin <mhaug@live.de>
This commit is contained in:
Laurenz 2021-11-28 22:32:20 +01:00 committed by Martin Haug
parent e05eb5fda5
commit 12f7335ac3

View File

@ -85,13 +85,14 @@ impl Reparser<'_> {
parent_mode: TokenMode, parent_mode: TokenMode,
mut outermost: bool, mut outermost: bool,
) -> Option<Range<usize>> { ) -> Option<Range<usize>> {
let kind = green.kind().clone(); let mode = green.kind().mode().unwrap_or(parent_mode);
let mode = kind.mode().unwrap_or(parent_mode); let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
let child_count = green.children().len();
let mut child_at_start = true; let mut first = None;
let last = green.children().len().saturating_sub(1); let mut at_start = true;
let mut start = None;
// Find the the first child in the range of children to reparse.
for (i, child) in green.children_mut().iter_mut().enumerate() { for (i, child) in green.children_mut().iter_mut().enumerate() {
let child_span = offset .. offset + child.len(); let child_span = offset .. offset + child.len();
@ -104,18 +105,19 @@ impl Reparser<'_> {
|| (mode == TokenMode::Markup || (mode == TokenMode::Markup
&& self.replace_range.start == child_span.end) && self.replace_range.start == child_span.end)
{ {
start = Some((i, offset)); first = Some((i, offset));
break; break;
} }
offset += child.len(); offset += child.len();
child_at_start = child.kind().is_at_start(child_at_start); at_start = child.kind().is_at_start(at_start);
} }
let (start_idx, start_offset) = start?; let (first_idx, first_start) = first?;
let mut end = None; let mut last = None;
for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { // Find the the last child in the range of children to reparse.
for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) {
let child_span = offset .. offset + child.len(); let child_span = offset .. offset + child.len();
// Similarly to above, the end of the edit must be in the node but // Similarly to above, the end of the edit must be in the node but
@ -123,35 +125,35 @@ impl Reparser<'_> {
// neighbor! // neighbor!
if child_span.contains(&self.replace_range.end) if child_span.contains(&self.replace_range.end)
|| self.replace_range.end == child_span.end || self.replace_range.end == child_span.end
&& (mode != TokenMode::Markup || i == last) && (mode != TokenMode::Markup || i + 1 == child_count)
{ {
outermost &= i == last; outermost &= i + 1 == child_count;
end = Some(i); last = Some((i, offset + child.len()));
break; break;
} else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() {
break; break;
} }
offset += child.len(); offset += child.len();
} }
let end = end?; let (last_idx, last_end) = last?;
let child_idx_range = start_idx .. end + 1; let children_range = first_idx .. last_idx + 1;
let child_span = start_offset .. offset + green.children()[end].len(); let children_span = first_start .. last_end;
let child_kind = green.children()[end].kind().clone(); let last_kind = green.children()[last_idx].kind().clone();
if child_idx_range.len() == 1 { // First, we try if the child itself has another, more specific
let idx = child_idx_range.start; // applicable child.
let child = &mut green.children_mut()[idx]; if children_range.len() == 1 {
let child = &mut green.children_mut()[children_range.start];
let prev_len = child.len(); let prev_len = child.len();
// First, we try if the child has another, more specific applicable child. if last_kind.post() != Postcondition::Unsafe {
if !child_kind.post().unsafe_interior() {
if let Some(range) = match child { if let Some(range) = match child {
Green::Node(n) => self.reparse_step( Green::Node(node) => self.reparse_step(
Rc::make_mut(n), Rc::make_mut(node),
start_offset, first_start,
kind.mode().unwrap_or(TokenMode::Code), child_mode,
outermost, outermost,
), ),
Green::Token(_) => None, Green::Token(_) => None,
@ -163,159 +165,147 @@ impl Reparser<'_> {
} }
} }
debug_assert_ne!(child_idx_range.len(), 0); // We only replace multiple children in markup mode.
if children_range.len() > 1 && mode == TokenMode::Code {
if mode == TokenMode::Code && child_idx_range.len() > 1 {
return None; return None;
} }
// We now have a child that we can replace and a function to do so. // We now have a child that we can replace and a function to do so.
let func = let func = last_kind.reparsing_func(child_mode)?;
child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; let post = last_kind.post();
let policy = child_kind.post();
let len_change = self.replace_len as isize - self.replace_range.len() as isize; // The span of the to-be-reparsed children in the new source.
let mut src_span = child_span; let replace_span = children_span.start
src_span.end = (src_span.end as isize + len_change) as usize; .. children_span.end + self.replace_len - self.replace_range.len();
let recompile_range = if policy == Postcondition::AtomicPrimary { // For atomic primaries we need to pass in the whole remaining string to
src_span.start .. self.src.len() // check whether the parser would eat more stuff illicitly.
let reparse_span = if post == Postcondition::AtomicPrimary {
replace_span.start .. self.src.len()
} else { } else {
src_span.clone() replace_span.clone()
}; };
let (mut new_children, terminated) = // Do the reparsing!
func(&self.src[recompile_range], child_at_start)?; let (mut newborns, terminated) = func(&self.src[reparse_span], at_start)?;
// Do not accept unclosed nodes if the old node did not use to be at the // Make sure that atomic primaries ate only what they were supposed to.
// right edge of the tree. if post == Postcondition::AtomicPrimary {
let len = replace_span.len();
if newborns.len() > 1 && newborns[0].len() == len {
newborns.truncate(1);
} else if newborns.iter().map(Green::len).sum::<usize>() != len {
return None;
}
}
// Do not accept unclosed nodes if the old node wasn't at the right edge
// of the tree.
if !outermost && !terminated { if !outermost && !terminated {
return None; return None;
} }
let insertion = match check_invariants( // If all post- and preconditions match, we are good to go!
&new_children, if validate(
green.children(), green.children(),
child_idx_range.clone(), children_range.clone(),
child_at_start, at_start,
&newborns,
mode, mode,
src_span.clone(), post,
policy,
) { ) {
InvariantResult::Ok => Some(new_children), green.replace_child_range(children_range, newborns);
InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), Some(replace_span)
InvariantResult::Error => None, } else {
}?; None
}
green.replace_child_range(child_idx_range, insertion);
Some(src_span)
} }
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] /// Validate that a node replacement is allowed by post- and preconditions.
enum InvariantResult { fn validate(
Ok, prev_children: &[Green],
UseFirst, children_range: Range<usize>,
Error, mut at_start: bool,
} newborns: &[Green],
fn check_invariants(
use_children: &[Green],
old_children: &[Green],
child_idx_range: Range<usize>,
child_at_start: bool,
mode: TokenMode, mode: TokenMode,
src_span: Range<usize>, post: Postcondition,
policy: Postcondition, ) -> bool {
) -> InvariantResult { // Atomic primaries must only generate one new child.
let (new_children, ok) = if policy == Postcondition::AtomicPrimary { if post == Postcondition::AtomicPrimary && newborns.len() != 1 {
if use_children.iter().map(Green::len).sum::<usize>() == src_span.len() { return false;
(use_children, InvariantResult::Ok)
} else if use_children.len() == 1 && use_children[0].len() == src_span.len() {
(&use_children[0 .. 1], InvariantResult::UseFirst)
} else {
return InvariantResult::Error;
}
} else {
(use_children, InvariantResult::Ok)
};
let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode);
// Check if the children / child has the right type.
let same_kind = match policy {
Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode),
_ => false,
};
if same_kind || policy == Postcondition::AtomicPrimary {
if new_children.len() != 1 {
return InvariantResult::Error;
} }
if same_kind { // Same kind in mode `inside` must generate only one child and that child
if old_children[child_idx_range.start].kind() != new_children[0].kind() { // must be of the same kind as previously.
return InvariantResult::Error; if let Postcondition::SameKind(inside) = post {
} let prev_kind = prev_children[children_range.start].kind();
let prev_mode = prev_kind.mode().unwrap_or(mode);
if inside.map_or(true, |m| m == prev_mode)
&& (newborns.len() != 1 || prev_kind != newborns[0].kind())
{
return false;
} }
} }
// Check if the neighbor invariants are still true. // Neighbor invariants are only relevant in markup mode.
if mode == TokenMode::Markup { if mode == TokenMode::Code {
if child_idx_range.start > 0 { return true;
if old_children[child_idx_range.start - 1].kind().pre() }
// Ensure that a possible right-whitespace precondition of a node before the
// replacement range is satisfied.
if children_range.start > 0
&& prev_children[children_range.start - 1].kind().pre()
== Precondition::RightWhitespace == Precondition::RightWhitespace
&& !new_children[0].kind().is_whitespace() && !newborns[0].kind().is_whitespace()
{ {
return InvariantResult::Error; return false;
}
} }
if new_children.last().map(|x| x.kind().pre()) // Ensure that a possible right-whitespace precondition of a new node at the
== Some(Precondition::RightWhitespace) // end of the replacement range is satisfied.
&& old_children.len() > child_idx_range.end if newborns.last().map(|x| x.kind().pre()) == Some(Precondition::RightWhitespace)
&& children_range.end < prev_children.len()
&& !prev_children[children_range.end].kind().is_whitespace()
{ {
if !old_children[child_idx_range.end].kind().is_whitespace() { return false;
return InvariantResult::Error;
}
} }
let mut post_at_start = child_at_start; // Compute the at_start state behind the new children.
for child in new_children { for child in newborns {
post_at_start = child.kind().is_at_start(post_at_start); at_start = child.kind().is_at_start(at_start);
}
for child in &old_children[child_idx_range.end ..] {
if child.kind().is_trivia() {
post_at_start = child.kind().is_at_start(post_at_start);
continue;
} }
// Ensure that a possible at-start or not-at-start precondition of
// a node after the replacement range is satisfied.
for child in &prev_children[children_range.end ..] {
if !child.kind().is_trivia() {
let pre = child.kind().pre(); let pre = child.kind().pre();
if pre == Precondition::AtStart && !post_at_start if (pre == Precondition::AtStart && !at_start)
|| pre == Precondition::NotAtStart && post_at_start || (pre == Precondition::NotAtStart && at_start)
{ {
return InvariantResult::Error; return false;
} }
break; break;
} }
at_start = child.kind().is_at_start(at_start);
} }
ok true
} }
impl NodeKind { impl NodeKind {
/// Return the correct reparsing function given the postconditions for the /// Return the correct reparsing function given the postconditions for the
/// type. /// type.
fn reparsing_function( fn reparsing_func(
&self, &self,
parent_mode: TokenMode, parent_mode: TokenMode,
) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> { ) -> Option<fn(&str, bool) -> Option<(Vec<Green>, bool)>> {
let policy = self.post();
let mode = self.mode().unwrap_or(parent_mode); let mode = self.mode().unwrap_or(parent_mode);
match self.post() {
match policy {
Postcondition::Unsafe | Postcondition::UnsafeLayer => None, Postcondition::Unsafe | Postcondition::UnsafeLayer => None,
Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic),
Postcondition::AtomicPrimary => Some(parse_atomic_markup), Postcondition::AtomicPrimary => Some(parse_atomic_markup),
@ -393,6 +383,7 @@ impl NodeKind {
// changes the next layer. // changes the next layer.
Self::EnumNumbering(_) => Postcondition::Unsafe, Self::EnumNumbering(_) => Postcondition::Unsafe,
// This can be anything, so we don't make any promises.
Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe,
// These are complex expressions which may screw with their // These are complex expressions which may screw with their
@ -485,17 +476,11 @@ impl NodeKind {
} }
impl Postcondition { impl Postcondition {
pub fn unsafe_interior(&self) -> bool { /// Whether a node with this condition can be reparsed in markup mode.
match self { pub fn safe_in_markup(&self) -> bool {
Self::Unsafe => true,
_ => false,
}
}
pub fn markup_safe(&self) -> bool {
match self { match self {
Self::Safe | Self::UnsafeLayer => true, Self::Safe | Self::UnsafeLayer => true,
Self::SameKind(tm) => tm.map_or(false, |tm| tm != TokenMode::Markup), Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup),
_ => false, _ => false,
} }
} }
@ -503,22 +488,19 @@ impl Postcondition {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use crate::parse::parse; use crate::parse::parse;
use crate::source::SourceFile; use crate::source::SourceFile;
use super::*;
#[test] #[test]
#[rustfmt::skip] #[rustfmt::skip]
fn test_incremental_parse() { fn test_incremental_parse() {
#[track_caller] #[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, incr: Range<usize>) { fn test(prev: &str, range: Range<usize>, with: &str, goal: Range<usize>) {
let mut source = SourceFile::detached(prev); let mut source = SourceFile::detached(prev);
let range = source.edit(range, with); let range = source.edit(range, with);
assert_eq!(range, incr); assert_eq!(range, goal);
assert_eq!(parse(source.src()), *source.root());
let incr_tree = source.root().clone();
assert_eq!(parse(source.src()), incr_tree);
} }
// Test simple replacements. // Test simple replacements.
@ -542,7 +524,7 @@ mod tests {
test("hello {x}", 6 .. 9, "#f()", 5 .. 10); test("hello {x}", 6 .. 9, "#f()", 5 .. 10);
test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12);
test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15);
test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34);
// Test the whitespace invariants. // Test the whitespace invariants.
@ -578,6 +560,7 @@ mod tests {
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35);
test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38);
// Test raw tokens.
test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20);
test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18);
} }