From aac3afcba8ee9b3692f784c78626aa0596aaf612 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 21 Feb 2022 13:48:21 +0100 Subject: [PATCH 1/4] Remove `Parbreak` as a `NodeKind` --- src/parse/incremental.rs | 5 ++--- src/parse/mod.rs | 6 +----- src/parse/parser.rs | 7 ------- src/syntax/ast.rs | 2 +- src/syntax/highlight.rs | 1 - src/syntax/mod.rs | 7 +------ 6 files changed, 5 insertions(+), 23 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 7418dd584..bb5288dc1 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -138,7 +138,7 @@ impl Reparser<'_> { // Similarly to above, the end of the edit must be in the // reconsidered range. However, in markup mode, we need to extend - // the reconsidered range by up to two nodes so that spaceing etc. + // the reconsidered range by up to two nodes so that spacing etc. // results in the same tree. // // Therefore, there are two cases: @@ -400,7 +400,7 @@ fn validate( let mut right_pos = newborn_span.end; for child in &superseded[superseded_range.end ..] { - if child.kind().is_trivia() || child.kind() == &NodeKind::Parbreak { + if child.kind().is_trivia() { right_pos += child.len(); continue; } @@ -451,7 +451,6 @@ impl NodeKind { match self { // These are all replaceable by other tokens. Self::Linebreak - | Self::Parbreak | Self::Text(_) | Self::TextInLine(_) | Self::NonBreakingSpace diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c14c45cf2..bd217c1c1 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -172,11 +172,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Whitespace. NodeKind::Space(newlines) => { *at_start |= *newlines > 0; - if *newlines < 2 { - p.eat(); - } else { - p.convert(NodeKind::Parbreak); - } + p.eat(); return; } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index e495dbd00..545f6fd4e 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -176,13 +176,6 @@ impl<'s> Parser<'s> { } } - /// Eat the current token, but change its type. - pub fn convert(&mut self, kind: NodeKind) { - let marker = self.marker(); - self.eat(); - marker.convert(self, kind); - } - /// Whether the current token is of the given type. pub fn at(&self, kind: &NodeKind) -> bool { self.peek() == Some(kind) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 9d22121b9..7992f9de9 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -60,9 +60,9 @@ impl Markup { /// The markup nodes. pub fn nodes(&self) -> impl Iterator + '_ { self.0.children().filter_map(|node| match node.kind() { + NodeKind::Space(n) if *n > 1 => Some(MarkupNode::Parbreak), NodeKind::Space(_) => Some(MarkupNode::Space), NodeKind::Linebreak => Some(MarkupNode::Linebreak), - NodeKind::Parbreak => Some(MarkupNode::Parbreak), NodeKind::Text(s) | NodeKind::TextInLine(s) => { Some(MarkupNode::Text(s.clone())) } diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index af6fb0dfb..82f1ea0e8 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -198,7 +198,6 @@ impl Category { NodeKind::Underscore => None, NodeKind::Markup(_) => None, NodeKind::Space(_) => None, - NodeKind::Parbreak => None, NodeKind::Text(_) => None, NodeKind::TextInLine(_) => None, NodeKind::List => None, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index c702199e6..2211fcb6f 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -589,8 +589,6 @@ pub enum NodeKind { Space(usize), /// A forced line break: `\`. Linebreak, - /// A paragraph break: Two or more newlines. - Parbreak, /// A consecutive non-markup string. Text(EcoString), /// A text node that cannot appear at the beginning of a source line. @@ -760,7 +758,6 @@ impl NodeKind { pub fn is_at_start(&self, prev: bool) -> bool { match self { Self::Space(n) if *n > 0 => true, - Self::Parbreak => true, Self::LineComment | Self::BlockComment => prev, _ => false, } @@ -771,7 +768,6 @@ impl NodeKind { match self { Self::Markup(_) | Self::Linebreak - | Self::Parbreak | Self::Text(_) | Self::TextInLine(_) | Self::NonBreakingSpace @@ -862,9 +858,9 @@ impl NodeKind { Self::Include => "keyword `include`", Self::From => "keyword `from`", Self::Markup(_) => "markup", + Self::Space(n) if *n > 1 => "paragraph break", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", - Self::Parbreak => "paragraph break", Self::Text(_) | Self::TextInLine(_) => "text", Self::NonBreakingSpace => "non-breaking space", Self::EnDash => "en dash", @@ -988,7 +984,6 @@ impl Hash for NodeKind { Self::Markup(c) => c.hash(state), Self::Space(n) => n.hash(state), Self::Linebreak => {} - Self::Parbreak => {} Self::Text(s) => s.hash(state), Self::TextInLine(s) => s.hash(state), Self::NonBreakingSpace => {} From 20ac96f27a2e06b985abc1c95049c32c2b88ef5d Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 21 Feb 2022 22:49:50 +0100 Subject: [PATCH 2/4] New incremental parsing paradigm Also move column offset into scanner. This fixes #62 --- src/parse/incremental.rs | 770 +++++++++++---------------------------- src/parse/mod.rs | 148 ++++---- src/parse/parser.rs | 17 +- src/parse/scanner.rs | 25 +- src/parse/tokens.rs | 6 +- 5 files changed, 323 insertions(+), 643 deletions(-) diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index bb5288dc1..7850fe4f3 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -4,57 +4,11 @@ use std::sync::Arc; use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ - is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment, - parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode, + is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode, }; -/// The conditions that a node has to fulfill in order to be replaced. -/// -/// This can dictate if a node can be replaced at all and if yes, what can take -/// its place. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum SuccessionRule { - /// Changing this node can never have an influence on the other nodes. - Safe, - /// This node has to be replaced with a single token of the same kind. - SameKind(Option), - /// In code mode, this node can only be changed into a single atomic - /// expression, otherwise it is safe. - AtomicPrimary, - /// Changing an unsafe layer node in code mode changes what the parents or - /// the surrounding nodes would be and is therefore disallowed. Change the - /// parents or children instead. If it appears in Markup, however, it is - /// safe to change. - UnsafeLayer, - /// Changing an unsafe node or any of its children is not allowed. Change - /// the parents instead. - Unsafe, -} - -/// The conditions under which a node can be inserted or remain in a tree. -/// -/// These conditions all search the neighbors of the node and see if its -/// existence is plausible with them present. This can be used to encode some -/// context-free language components for incremental parsing. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum NeighbourRule { - /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Additionally, when - /// exchanging the right sibling or inserting such a node the indentation of - /// the first right non-trivia, non-whitespace sibling must not be greater - /// than the current indentation. - AtStart, - /// These nodes depend on not being at the start of a line. Reparsing of - /// safe left neighbors has to check this invariant. Otherwise, this node is - /// safe. - NotAtStart, - /// These nodes could end up somewhere else up the tree if the parse was - /// happening from scratch. The parse result has to be checked for such - /// nodes. They are safe to add if followed up by other nodes. - NotAtEnd, - /// No additional requirements. - None, -} +type ReparseFunc = + fn(&str, &str, usize, isize, &[Green], bool) -> Option<(Vec, bool, usize)>; /// Allows partial refreshs of the [`Green`] node tree. /// @@ -79,180 +33,185 @@ impl<'a> Reparser<'a> { impl Reparser<'_> { /// Find the innermost child that is incremental safe. pub fn reparse(&self, green: &mut Arc) -> Range { - self.reparse_step(Arc::make_mut(green), 0, TokenMode::Markup, true) - .unwrap_or_else(|| { - *green = parse(self.src); - 0 .. self.src.len() - }) + self.reparse_step(Arc::make_mut(green), 0, true).unwrap_or_else(|| { + *green = parse(self.src); + 0 .. self.src.len() + }) } fn reparse_step( &self, green: &mut GreenNode, mut offset: usize, - parent_mode: TokenMode, - mut outermost: bool, + outermost: bool, ) -> Option> { - let mode = green.kind().mode().unwrap_or(parent_mode); let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let original_count = green.children().len(); - // Save the current indent if this is a markup node. - let indent = match green.kind() { - NodeKind::Markup(n) => *n, - _ => 0, - }; - - let mut first = None; + let mut search = SearchState::default(); + let mut ahead_nontrivia = None; + // Whether the first node that should be replaced is at start. let mut at_start = true; + let mut end_outermost = false; // Find the the first child in the range of children to reparse. - for (i, child) in green.children_mut().iter_mut().enumerate() { + for (i, child) in green.children().iter().enumerate() { + let pos = GreenPos { idx: i, offset }; let child_span = offset .. offset + child.len(); - // We look for the start in the element but we only take a position - // at the right border if this is markup or the last element. - // - // This is because in Markup mode, we want to examine all nodes next - // to a replacement but in code we want to atomically replace. At - // least one character on either side of the replacement must be - // reparsed with it to keep the Space / Text node coalescing intact. - if (child_mode == TokenMode::Markup - && child_span.end + 1 >= self.replace_range.start) - || child_span.contains(&self.replace_range.start) - { - first = Some((i, offset)); - break; + match search { + SearchState::NoneFound => { + // The edit is contained within the span of the current element. + if child_span.contains(&self.replace_range.start) + && child_span.end >= self.replace_range.end + { + // In Markup mode, we want to consider a non-whitespace + // neighbor if the edit is on the node boundary. + search = if child_span.end == self.replace_range.end + && child_mode == TokenMode::Markup + { + SearchState::RequireNonWS(pos) + } else { + // println!("found containing block {:?}", green.kind()); + SearchState::Contained(pos) + }; + } else if child_span.contains(&self.replace_range.start) { + search = SearchState::Inside(pos); + } else { + if !child.kind().is_space() + && child.kind() != &NodeKind::Semicolon + { + ahead_nontrivia = Some((pos, at_start)); + } + at_start = child.kind().is_at_start(at_start); + } + } + SearchState::Inside(start) => { + if child_span.end == self.replace_range.end { + search = SearchState::RequireNonWS(start); + } else if child_span.end > self.replace_range.end { + search = SearchState::SpanFound(start, pos); + } + } + SearchState::RequireNonWS(start) => { + if !child.kind().is_trivia() { + search = SearchState::SpanFound(start, pos); + } + } + _ => unreachable!(), } offset += child.len(); - at_start = child.kind().is_at_start(at_start); - } - - let (first_idx, first_start) = first?; - let mut last = None; - - // Find the the last child in the range of children to reparse. - for (i, child) in green.children_mut().iter_mut().enumerate().skip(first_idx) { - let child_span = offset .. offset + child.len(); - - // Similarly to above, the end of the edit must be in the - // reconsidered range. However, in markup mode, we need to extend - // the reconsidered range by up to two nodes so that spacing etc. - // results in the same tree. - // - // Therefore, there are two cases: - // 1. We are at the end of the string or in code mode and the - // current node perfectly matches the end of the replacement - // 2. The end is contained within this node, and, in Markup mode, - // is not the first thing in it. - let ignore_overhang = - i + 1 == original_count || child_mode != TokenMode::Markup; - - if (self.replace_range.end == child_span.end && ignore_overhang) - || (child_span.end > self.replace_range.end - && (self.replace_range.end != child_span.start || ignore_overhang)) - { - outermost &= i + 1 == original_count; - last = Some((i, offset + child.len())); - break; - } else if child_mode != TokenMode::Markup - || !child.kind().succession_rule().safe_in_markup() - { + end_outermost = outermost && i + 1 == original_count; + if search.end().is_some() { break; } - - offset += child.len(); } - let (last_idx, last_end) = last?; - let superseded_range = first_idx .. last_idx + 1; - let superseded_span = first_start .. last_end; - let last_kind = green.children()[last_idx].kind().clone(); - - // First, we try if the child itself has another, more specific - // applicable child. - if superseded_range.len() == 1 { - let child = &mut green.children_mut()[superseded_range.start]; + if let SearchState::Contained(pos) = search { + let child = &mut green.children_mut()[pos.idx]; let prev_len = child.len(); - if last_kind.succession_rule() != SuccessionRule::Unsafe - && !matches!(last_kind, NodeKind::Strong | NodeKind::Emph) - { - if let Some(range) = match child { - Green::Node(node) => self.reparse_step( - Arc::make_mut(node), - first_start, - child_mode, - outermost, - ), - Green::Token(_) => None, - } { - let new_len = child.len(); - green.update_parent(new_len, prev_len); - return Some(range); + if let Some(range) = match child { + Green::Node(node) => { + self.reparse_step(Arc::make_mut(node), pos.offset, end_outermost) + } + Green::Token(_) => None, + } { + let new_len = child.len(); + green.update_parent(new_len, prev_len); + return Some(range); + } + + let superseded_span = pos.offset .. pos.offset + prev_len; + let func: Option = match child.kind() { + NodeKind::Template => Some(parse_template), + NodeKind::Block => Some(parse_block), + _ => None, + }; + + if let Some(func) = func { + if let Some(result) = self.replace( + green, + func, + pos.idx .. pos.idx + 1, + superseded_span, + at_start, + end_outermost, + outermost, + ) { + return Some(result); } } } - // We only replace multiple children in markup mode. - if superseded_range.len() > 1 && mode == TokenMode::Code { + if !matches!(green.kind(), NodeKind::Markup(_)) { return None; } - // We now have a child that we can replace and a function to do so. - let func = last_kind.reparsing_func(child_mode, indent)?; - let succession = last_kind.succession_rule(); + let (mut start, end) = search.end()?; + if let Some((ahead, ahead_at_start)) = ahead_nontrivia { + let ahead_kind = green.children()[ahead.idx].kind(); - let mut markup_min_column = 0; - - // If this is a markup node, we want to save its indent instead to pass - // the right indent argument. - if superseded_range.len() == 1 { - let child = &mut green.children_mut()[superseded_range.start]; - if let NodeKind::Markup(n) = child.kind() { - markup_min_column = *n; + if start.offset == self.replace_range.start + || ahead_kind.only_at_start() + || ahead_kind == &NodeKind::LineComment + { + start = ahead; + at_start = ahead_at_start; } } - // The span of the to-be-reparsed children in the new source. + let superseded_span = + start.offset .. end.offset + green.children()[end.idx].len(); + self.replace( + green, + parse_markup_elements, + start.idx .. end.idx + 1, + superseded_span, + at_start, + end_outermost, + outermost, + ) + } + + fn replace( + &self, + green: &mut GreenNode, + func: ReparseFunc, + superseded_idx: Range, + superseded_span: Range, + at_start: bool, + outermost: bool, + parent_outermost: bool, + ) -> Option> { + let differential: isize = + self.replace_len as isize - self.replace_range.len() as isize; let newborn_span = superseded_span.start .. - superseded_span.end + self.replace_len - self.replace_range.len(); - - // For atomic primaries we need to pass in the whole remaining string to - // check whether the parser would eat more stuff illicitly. - let reparse_span = if succession == SuccessionRule::AtomicPrimary { - newborn_span.start .. self.src.len() - } else { - newborn_span.clone() - }; + (superseded_span.end as isize + differential) as usize; + let superseded_start = superseded_idx.start; let mut prefix = ""; - for (i, c) in self.src[.. reparse_span.start].char_indices().rev() { + for (i, c) in self.src[.. newborn_span.start].char_indices().rev() { if is_newline(c) { break; } - prefix = &self.src[i .. reparse_span.start]; + prefix = &self.src[i .. newborn_span.start]; } - // Do the reparsing! - let (mut newborns, terminated) = func( + // // println!("reparsing..."); + + let (newborns, terminated, amount) = func( &prefix, - &self.src[reparse_span.clone()], + &self.src[newborn_span.start ..], + newborn_span.len(), + differential, + &green.children()[superseded_start ..], at_start, - markup_min_column, )?; - // Make sure that atomic primaries ate only what they were supposed to. - if succession == SuccessionRule::AtomicPrimary { - let len = newborn_span.len(); - if newborns.len() > 1 && newborns[0].len() == len { - newborns.truncate(1); - } else if newborns.iter().map(Green::len).sum::() != len { - return None; - } - } + // // println!("Reparse success"); // Do not accept unclosed nodes if the old node wasn't at the right edge // of the tree. @@ -260,362 +219,64 @@ impl Reparser<'_> { return None; } - // If all post- and preconditions match, we are good to go! - if validate( - green.children(), - superseded_range.clone(), - at_start, - &newborns, - mode, - succession, - newborn_span.clone(), - self.src, - ) { - green.replace_children(superseded_range, newborns); - Some(newborn_span) - } else { - None + if !parent_outermost && green.children()[superseded_start ..].len() == amount { + return None; } + + green.replace_children(superseded_start .. superseded_start + amount, newborns); + Some(newborn_span) } } -/// Validate that a node replacement is allowed by post- and preconditions. -fn validate( - superseded: &[Green], - superseded_range: Range, - mut at_start: bool, - newborns: &[Green], - mode: TokenMode, - post: SuccessionRule, - newborn_span: Range, - src: &str, -) -> bool { - // Atomic primaries must only generate one new child. - if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 { - return false; - } - - // Same kind in mode `inside` must generate only one child and that child - // must be of the same kind as previously. - if let SuccessionRule::SameKind(inside) = post { - let superseded_kind = superseded[superseded_range.start].kind(); - let superseded_mode = superseded_kind.mode().unwrap_or(mode); - if inside.map_or(true, |m| m == superseded_mode) - && (newborns.len() != 1 || superseded_kind != newborns[0].kind()) - { - return false; - } - } - - // Neighbor invariants are only relevant in markup mode. - if mode == TokenMode::Code { - return true; - } - - // Check if there are any `AtStart` predecessors which require a certain - // indentation. - let s = Scanner::new(src); - let mut prev_pos = newborn_span.start; - for child in (&superseded[.. superseded_range.start]).iter().rev() { - prev_pos -= child.len(); - if !child.kind().is_trivia() { - if child.kind().neighbour_rule() == NeighbourRule::AtStart { - let left_col = s.column(prev_pos); - - // Search for the first non-trivia newborn. - let mut new_pos = newborn_span.start; - let mut child_col = None; - for child in newborns { - if !child.kind().is_trivia() { - child_col = Some(s.column(new_pos)); - break; - } - - new_pos += child.len(); - } - - if let Some(child_col) = child_col { - if child_col > left_col { - return false; - } - } - } - - break; - } - } - - // Compute the at_start state behind the new children. - for child in newborns { - at_start = child.kind().is_at_start(at_start); - } - - // Ensure that a possible at-start or not-at-start precondition of - // a node after the replacement range is satisfied. - for child in &superseded[superseded_range.end ..] { - let neighbour_rule = child.kind().neighbour_rule(); - if (neighbour_rule == NeighbourRule::AtStart && !at_start) - || (neighbour_rule == NeighbourRule::NotAtStart && at_start) - { - return false; - } - - if !child.kind().is_trivia() { - break; - } - - at_start = child.kind().is_at_start(at_start); - } - - // Verify that the last of the newborns is not `NotAtEnd`. - if newborns.last().map_or(false, |child| { - child.kind().neighbour_rule() == NeighbourRule::NotAtEnd - }) { - return false; - } - - // We have to check whether the last non-trivia newborn is `AtStart` and - // verify the indent of its right neighbors in order to make sure its - // indentation requirements are fulfilled. - let mut child_pos = newborn_span.end; - for child in newborns.iter().rev() { - child_pos -= child.len(); - - if child.kind().is_trivia() { - continue; - } - - if child.kind().neighbour_rule() == NeighbourRule::AtStart { - let child_col = s.column(child_pos) - + match child.kind() { - NodeKind::Heading => child - .children() - .iter() - .filter(|n| n.kind() == &NodeKind::Eq) - .count(), - NodeKind::List => 1, - NodeKind::Enum => child.children().first().unwrap().len(), - _ => 0, - }; - - let mut right_pos = newborn_span.end; - for child in &superseded[superseded_range.end ..] { - if child.kind().is_trivia() { - right_pos += child.len(); - continue; - } - - if s.column(right_pos) > child_col { - return false; - } - break; - } - } - break; - } - - true +#[derive(Clone, Copy, Debug, PartialEq)] +struct GreenPos { + idx: usize, + offset: usize, } -impl NodeKind { - /// Return the correct reparsing function given the postconditions for the - /// type. - fn reparsing_func( - &self, - parent_mode: TokenMode, - indent: usize, - ) -> Option Option<(Vec, bool)>> { - let mode = self.mode().unwrap_or(parent_mode); - match self.succession_rule() { - SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, - SuccessionRule::AtomicPrimary => match mode { - TokenMode::Code => Some(parse_atomic), - TokenMode::Markup => Some(parse_atomic_markup), - }, - SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { - NodeKind::Markup(_) => Some(parse_markup), - NodeKind::Template => Some(parse_template), - NodeKind::Block => Some(parse_block), - NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), - _ => None, - }, - _ => match mode { - TokenMode::Markup if indent == 0 => Some(parse_markup_elements), - _ => return None, - }, - } +/// Encodes the state machine of the search for the node which is pending for +/// replacement. +#[derive(Clone, Copy, Debug, PartialEq)] +enum SearchState { + /// Neither an end nor a start have been found as of now. + /// The last non-whitespace child is continually saved. + NoneFound, + /// The search has concluded by finding a node that fully contains the + /// modifications. + Contained(GreenPos), + /// The search has found the start of the modified nodes. + Inside(GreenPos), + /// The search has found the end of the modified nodes but the change + /// touched its boundries so another non-trivia node is needed. + RequireNonWS(GreenPos), + /// The search has concluded by finding a start and an end index for nodes + /// with a pending reparse. + SpanFound(GreenPos, GreenPos), +} + +impl Default for SearchState { + fn default() -> Self { + Self::NoneFound } +} - /// Whether it is safe to do incremental parsing on this node. - pub fn succession_rule(&self) -> SuccessionRule { +impl SearchState { + fn end(&self) -> Option<(GreenPos, GreenPos)> { match self { - // These are all replaceable by other tokens. - Self::Linebreak - | Self::Text(_) - | Self::TextInLine(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph - | Self::Heading - | Self::Enum - | Self::List - | Self::Math(_) => SuccessionRule::Safe, - - // Only markup is expected at the points where it does occur. The - // indentation must be preserved as well, also for the children. - Self::Markup(_) => SuccessionRule::SameKind(None), - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None), - - // These can appear as bodies and would trigger an error if they - // became something else. - Self::Template => SuccessionRule::SameKind(None), - Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)), - - // Whitespace in code mode has to remain whitespace or else the type - // of things would change. - Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)), - - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group - | Self::None - | Self::Auto => SuccessionRule::AtomicPrimary, - - // More complex, but still an expression. - Self::ForExpr - | Self::WhileExpr - | Self::IfExpr - | Self::LetExpr - | Self::SetExpr - | Self::ShowExpr - | Self::WrapExpr - | Self::ImportExpr - | Self::IncludeExpr - | Self::BreakExpr - | Self::ContinueExpr - | Self::ReturnExpr => SuccessionRule::AtomicPrimary, - - // These are complex expressions which may screw with their - // environments. - Self::Call - | Self::Unary - | Self::Binary - | Self::CallArgs - | Self::Named - | Self::Spread => SuccessionRule::UnsafeLayer, - - // The closure is a bit magic with the let expression, and also it - // is not atomic. - Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer, - - // Missing these creates errors for the parents. - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - SuccessionRule::UnsafeLayer - } - - // Replacing parenthesis changes if the expression is balanced and - // is therefore not safe. - Self::LeftBracket - | Self::RightBracket - | Self::LeftBrace - | Self::RightBrace - | Self::LeftParen - | Self::RightParen => SuccessionRule::Unsafe, - - // These work similar to parentheses. - Self::Star | Self::Underscore => SuccessionRule::Unsafe, - - // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. - Self::Comma - | Self::Semicolon - | Self::Colon - | Self::Plus - | Self::Minus - | Self::Slash - | Self::Eq - | Self::EqEq - | Self::ExclEq - | Self::Lt - | Self::LtEq - | Self::Gt - | Self::GtEq - | Self::PlusEq - | Self::HyphEq - | Self::StarEq - | Self::SlashEq - | Self::Not - | Self::And - | Self::Or - | Self::With - | Self::Dots - | Self::Arrow => SuccessionRule::Unsafe, - - // These keywords change what kind of expression the parent is and - // how far the expression would go. - Self::Let - | Self::Set - | Self::Show - | Self::Wrap - | Self::If - | Self::Else - | Self::For - | Self::In - | Self::As - | Self::While - | Self::Break - | Self::Continue - | Self::Return - | Self::Import - | Self::Include - | Self::From => SuccessionRule::Unsafe, - - // This element always has to remain in the same column so better - // reparse the whole parent. - Self::Raw(_) => SuccessionRule::Unsafe, - - // Changing the heading level, enum numbering, or list bullet - // changes the next layer. - Self::EnumNumbering(_) => SuccessionRule::Unsafe, - - // This can be anything, so we don't make any promises. - Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe, - } - } - - /// Whether it is safe to insert this node next to some nodes or vice versa. - pub fn neighbour_rule(&self) -> NeighbourRule { - match self { - Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart, - Self::TextInLine(_) => NeighbourRule::NotAtStart, - Self::Error(_, _) => NeighbourRule::NotAtEnd, - _ => NeighbourRule::None, + Self::NoneFound => None, + Self::Contained(s) => Some((*s, *s)), + Self::Inside(_) => None, + Self::RequireNonWS(_) => None, + Self::SpanFound(s, e) => Some((*s, *e)), } } } -impl SuccessionRule { - /// Whether a node with this condition can be reparsed in markup mode. - pub fn safe_in_markup(&self) -> bool { +impl NodeKind { + /// Whether this node has to appear at the start of a line. + pub fn only_at_start(&self) -> bool { match self { - Self::Safe | Self::UnsafeLayer => true, - Self::SameKind(mode) => mode.map_or(false, |m| m != TokenMode::Markup), + Self::Heading | Self::Enum | Self::List => true, _ => false, } } @@ -639,59 +300,62 @@ mod tests { #[test] fn test_parse_incremental_simple_replacements() { - test("hello world", 7 .. 12, "walkers", 5 .. 14); + test("hello world", 7 .. 12, "walkers", 0 .. 14); test("some content", 0..12, "", 0..0); test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 9); test("a #f() e", 1 .. 6, " b c d", 0 .. 9); - test("a\nb\nc\nd\ne\n", 5..5, "c", 3..8); - test("a\n\nb\n\nc\n\nd\n\ne\n", 7..7, "c", 4..11); + test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 4 .. 7); + test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 6 .. 10); test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 6..20); - test("{a}", 1 .. 2, "b", 1 .. 2); - test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); - test("\n= A heading", 3 .. 3, "n evocative", 1 .. 23); - test("for~your~thing", 9 .. 9, "a", 4 .. 15); + test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6); + test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14); + test("\n= A heading", 3 .. 3, "n evocative", 3 .. 23); + test("for~your~thing", 9 .. 9, "a", 8 .. 15); test("a your thing a", 6 .. 7, "a", 0 .. 14); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 13); - test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 4 .. 34); - test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 0 .. 32); - test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); - test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); - test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); - test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); - test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11); + test("#call() abc", 7 .. 7, "[]", 0 .. 10); + // Investigate + test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); + test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 3..19); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 0..99); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33..42); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40); + test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33); + // Investigate + test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11); test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25); - test("understanding `code` is complicated", 15 .. 15, "C ", 0 .. 37); - test("{ let x = g() }", 10 .. 12, "f(54", 2 .. 15); + test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); + test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); - test(r#"a ```typst hello```"#, 16 .. 17, "", 0 .. 18); + test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); test("#for", 4 .. 4, "//", 0 .. 6); } #[test] fn test_parse_incremental_whitespace_invariants() { - test("hello \\ world", 7 .. 8, "a ", 5 .. 14); - test("hello \\ world", 7 .. 8, " a", 5 .. 14); - test("x = y", 1 .. 1, " + y", 0 .. 7); - test("x = y", 1 .. 1, " + y\n", 0 .. 10); - test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 22); - test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 20); - test("#let x = (1, 2 + ;~ Five\r\n\r", 20..23, "2.", 18..23); + test("hello \\ world", 7 .. 8, "a ", 6 .. 14); + test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("x = y", 1 .. 1, " + y", 0 .. 6); + test("x = y", 1 .. 1, " + y\n", 0 .. 7); + test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); + test("abc\n= a heading\njoke", 3 .. 4, "\nnot ", 0 .. 19); + test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); - test("hey #myfriend", 4 .. 4, "\\", 0 .. 6); + test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); + test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11) } #[test] fn test_parse_incremental_type_invariants() { - test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); - test("a #let x = 1 {5}", 3 .. 6, "if", 0 .. 15); - test("a {let x = 1 {5}} b", 3 .. 6, "if", 1 .. 16); - test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 17); + test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22); + test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11); + test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); + test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13); test("{let x = 1 {5}}", 4 .. 4, " if", 0 .. 18); - test("a // b c #f()", 3 .. 4, "", 0 .. 12); + test("a // b c #f()", 3 .. 4, "", 2 .. 12); test("{\nf()\n//g(a)\n}", 6 .. 8, "", 0 .. 12); test("a{\nf()\n//g(a)\n}b", 7 .. 9, "", 1 .. 13); test("a #while x {\n g(x) \n} b", 11 .. 11, "//", 0 .. 26); @@ -707,12 +371,12 @@ mod tests { test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); test("a b c", 1 .. 1, " /* letters */", 0 .. 19); test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 0 .. 41); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); - test("~~~~", 2 .. 2, "[]", 0 .. 6); - test("a[]b", 2 .. 2, "{", 0 .. 5); + test("~~~~", 2 .. 2, "[]", 1 .. 5); + test("a[]b", 2 .. 2, "{", 1 .. 4); test("[hello]", 2 .. 3, "]", 0 .. 7); - test("{a}", 1 .. 2, "b", 1 .. 2); + test("{a}", 1 .. 2, "b", 0 .. 3); test("{ a; b; c }", 5 .. 6, "[}]", 0 .. 13); test("#a()\n~", 3..4, "{}", 0..7); test("[]\n~", 1..2, "#if i==0 {true}", 0..18); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index bd217c1c1..7c0a09327 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -28,103 +28,124 @@ pub fn parse(src: &str) -> Arc { } } -/// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup( - prefix: &str, - src: &str, - _: bool, - min_column: usize, -) -> Option<(Vec, bool)> { - let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); - if min_column == 0 { - markup(&mut p, true); - } else { - markup_indented(&mut p, min_column); - } - p.consume() -} - /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. pub fn parse_markup_elements( prefix: &str, src: &str, + end_pos: usize, + differential: isize, + reference: &[Green], mut at_start: bool, - _: usize, -) -> Option<(Vec, bool)> { +) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + + let mut node: Option<&Green> = None; + let mut iter = reference.iter(); + let mut offset = 0; + let mut replaced = 0; + let mut stopped = false; + while !p.eof() { markup_node(&mut p, &mut at_start); + + if p.prev_end() >= end_pos { + let recent = p.children.last().unwrap(); + let recent_start = p.prev_end() - recent.len(); + + while offset <= recent_start { + if let Some(node) = node { + // The nodes are equal, at the same position and have the + // same content. The parsing trees have converged again, so + // the reparse may stop here. + if (offset as isize + differential) as usize == recent_start + && node == recent + { + replaced -= 1; + stopped = true; + break; + } + } + + let result = iter.next(); + if let Some(node) = node { + offset += node.len(); + } + node = result; + if node.is_none() { + break; + } else { + replaced += 1; + } + } + + if stopped { + break; + } + } } - p.consume() -} -/// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic( - prefix: &str, - src: &str, - _: bool, - _: usize, -) -> Option<(Vec, bool)> { - let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); - primary(&mut p, true).ok()?; - p.consume_open_ended() -} + if p.eof() && !stopped { + replaced = reference.len(); + } -/// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup( - prefix: &str, - src: &str, - _: bool, - _: usize, -) -> Option<(Vec, bool)> { - let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); - markup_expr(&mut p); - p.consume_open_ended() + let (mut res, terminated) = p.consume_open_ended()?; + if stopped { + res.pop().unwrap(); + } + + Some((res, terminated, replaced)) } /// Parse a template literal. Returns `Some` if all of the input was consumed. pub fn parse_template( prefix: &str, src: &str, + end_pos: usize, + _: isize, + reference: &[Green], _: bool, - _: usize, -) -> Option<(Vec, bool)> { +) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; } template(&mut p); - p.consume() + + let (mut green, terminated) = p.consume_open_ended()?; + let first = green.remove(0); + if first.len() != end_pos { + return None; + } + + Some((vec![first], terminated, 1)) } /// Parse a code block. Returns `Some` if all of the input was consumed. pub fn parse_block( prefix: &str, src: &str, + end_pos: usize, + _: isize, + reference: &[Green], _: bool, - _: usize, -) -> Option<(Vec, bool)> { +) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; } block(&mut p); - p.consume() -} -/// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment( - prefix: &str, - src: &str, - _: bool, - _: usize, -) -> Option<(Vec, bool)> { - let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); - comment(&mut p).ok()?; - p.consume() + let (mut green, terminated) = p.consume_open_ended()?; + let first = green.remove(0); + + if first.len() != end_pos { + return None; + } + + Some((vec![first], terminated, 1)) } /// Parse markup. @@ -916,17 +937,6 @@ fn body(p: &mut Parser) -> ParseResult { Ok(()) } -/// Parse a comment. -fn comment(p: &mut Parser) -> ParseResult { - match p.peek() { - Some(NodeKind::LineComment | NodeKind::BlockComment) => { - p.eat(); - Ok(()) - } - _ => Err(ParseError), - } -} - #[cfg(test)] mod tests { use std::fmt::Debug; diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 545f6fd4e..d9cc0e31b 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -7,8 +7,6 @@ use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; /// A convenient token-based parser. pub struct Parser<'s> { - /// Offsets the indentation on the first line of the source. - column_offset: usize, /// An iterator over the source tokens. tokens: Tokens<'s>, /// Whether we are at the end of the file or of a group. @@ -22,7 +20,7 @@ pub struct Parser<'s> { /// The stack of open groups. groups: Vec, /// The children of the currently built node. - children: Vec, + pub children: Vec, /// Whether the last group was not correctly terminated. unterminated_group: bool, /// Whether a group terminator was found, that did not close a group. @@ -32,10 +30,13 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str, mode: TokenMode) -> Self { - let mut tokens = Tokens::new(src, mode); + Self::with_offset(src, mode, 0) + } + + fn with_offset(src: &'s str, mode: TokenMode, offset: usize) -> Self { + let mut tokens = Tokens::new(src, mode, offset); let current = tokens.next(); Self { - column_offset: 0, tokens, eof: current.is_none(), current, @@ -52,9 +53,7 @@ impl<'s> Parser<'s> { /// that does not need to be parsed but taken into account for column /// calculation. pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { - let mut p = Self::new(src, mode); - p.column_offset = Scanner::new(prefix).column(prefix.len()); - p + Self::with_offset(src, mode, Scanner::new(prefix).column(prefix.len())) } /// End the parsing process and return the last child. @@ -226,7 +225,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column_offset(index, self.column_offset) + self.tokens.scanner().column(index) } /// Continue parsing in a group. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 685503c3d..15060c7b8 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -10,13 +10,21 @@ pub struct Scanner<'s> { /// The index at which the peekable character starts. Must be in bounds and /// at a codepoint boundary to guarantee safety. index: usize, + /// Offsets the indentation on the first line of the source. + column_offset: usize, } impl<'s> Scanner<'s> { /// Create a new char scanner. #[inline] pub fn new(src: &'s str) -> Self { - Self { src, index: 0 } + Self { src, index: 0, column_offset: 0 } + } + + /// Create a new char scanner with an offset for the first line indent. + #[inline] + pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self { + Self { src, index: 0, column_offset } } /// Whether the end of the string is reached. @@ -173,13 +181,6 @@ impl<'s> Scanner<'s> { /// The column index of a given index in the source string. #[inline] pub fn column(&self, index: usize) -> usize { - self.column_offset(index, 0) - } - - /// The column index of a given index in the source string when an offset is - /// applied to the first line of the string. - #[inline] - pub fn column_offset(&self, index: usize, offset: usize) -> usize { let mut apply_offset = false; let res = self.src[.. index] .char_indices() @@ -192,7 +193,13 @@ impl<'s> Scanner<'s> { }) .count(); - if apply_offset { res + offset } else { res } + // The loop is never executed if the slice is empty, but we are of + // course still at the start of the first line. + if self.src[.. index].len() == 0 { + apply_offset = true; + } + + if apply_offset { res + self.column_offset } else { res } } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index e88b49f90..4a13694a5 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -28,9 +28,9 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(src: &'s str, mode: TokenMode) -> Self { + pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self { Self { - s: Scanner::new(src), + s: Scanner::with_indent_offset(src, offset), mode, terminated: true, } @@ -689,7 +689,7 @@ mod tests { }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&src, $mode).collect::>(); + let found = Tokens::new(&src, $mode, 0).collect::>(); let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; From 4c8634c600ad0bba03ccdf884b32f234ecbff30c Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 23 Feb 2022 13:57:15 +0100 Subject: [PATCH 3/4] Early stop for falling indents. Fix code edits and at_start handling. Also fix dedenting for multi-byte chars in raw blocks. --- benches/oneshot.rs | 2 +- src/parse/incremental.rs | 65 +++++++++++++++++++++++----------------- src/parse/mod.rs | 24 +++++++++++---- src/parse/parser.rs | 7 ----- src/parse/resolve.rs | 7 ++++- src/syntax/mod.rs | 2 +- 6 files changed, 63 insertions(+), 44 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 5dbf993fe..1d70e4664 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -55,7 +55,7 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()); + iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup), 0).count()); } fn bench_parse(iai: &mut Iai) { diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 7850fe4f3..d3edff6e2 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -7,8 +7,15 @@ use super::{ is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode, }; -type ReparseFunc = - fn(&str, &str, usize, isize, &[Green], bool) -> Option<(Vec, bool, usize)>; +type ReparseFunc = fn( + &str, + &str, + usize, + isize, + &[Green], + bool, + usize, +) -> Option<(Vec, bool, usize)>; /// Allows partial refreshs of the [`Green`] node tree. /// @@ -48,11 +55,17 @@ impl Reparser<'_> { let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let original_count = green.children().len(); + // Save the current indent if this is a markup node. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => 0, + }; + let mut search = SearchState::default(); let mut ahead_nontrivia = None; // Whether the first node that should be replaced is at start. let mut at_start = true; - let mut end_outermost = false; + let mut child_outermost = false; // Find the the first child in the range of children to reparse. for (i, child) in green.children().iter().enumerate() { @@ -72,14 +85,16 @@ impl Reparser<'_> { { SearchState::RequireNonWS(pos) } else { - // println!("found containing block {:?}", green.kind()); SearchState::Contained(pos) }; } else if child_span.contains(&self.replace_range.start) { search = SearchState::Inside(pos); } else { - if !child.kind().is_space() - && child.kind() != &NodeKind::Semicolon + if (self.replace_range.len() != 0 + || self.replace_range.end != child_span.end + || ahead_nontrivia.is_none()) + && (!child.kind().is_space() + && child.kind() != &NodeKind::Semicolon) { ahead_nontrivia = Some((pos, at_start)); } @@ -102,7 +117,7 @@ impl Reparser<'_> { } offset += child.len(); - end_outermost = outermost && i + 1 == original_count; + child_outermost = outermost && i + 1 == original_count; if search.end().is_some() { break; } @@ -114,7 +129,7 @@ impl Reparser<'_> { if let Some(range) = match child { Green::Node(node) => { - self.reparse_step(Arc::make_mut(node), pos.offset, end_outermost) + self.reparse_step(Arc::make_mut(node), pos.offset, child_outermost) } Green::Token(_) => None, } { @@ -137,7 +152,7 @@ impl Reparser<'_> { pos.idx .. pos.idx + 1, superseded_span, at_start, - end_outermost, + indent, outermost, ) { return Some(result); @@ -155,7 +170,7 @@ impl Reparser<'_> { if start.offset == self.replace_range.start || ahead_kind.only_at_start() - || ahead_kind == &NodeKind::LineComment + || ahead_kind.mode() != Some(TokenMode::Markup) { start = ahead; at_start = ahead_at_start; @@ -170,7 +185,7 @@ impl Reparser<'_> { start.idx .. end.idx + 1, superseded_span, at_start, - end_outermost, + indent, outermost, ) } @@ -182,8 +197,8 @@ impl Reparser<'_> { superseded_idx: Range, superseded_span: Range, at_start: bool, + indent: usize, outermost: bool, - parent_outermost: bool, ) -> Option> { let differential: isize = self.replace_len as isize - self.replace_range.len() as isize; @@ -200,8 +215,6 @@ impl Reparser<'_> { prefix = &self.src[i .. newborn_span.start]; } - // // println!("reparsing..."); - let (newborns, terminated, amount) = func( &prefix, &self.src[newborn_span.start ..], @@ -209,20 +222,15 @@ impl Reparser<'_> { differential, &green.children()[superseded_start ..], at_start, + indent, )?; - // // println!("Reparse success"); - // Do not accept unclosed nodes if the old node wasn't at the right edge // of the tree. if !outermost && !terminated { return None; } - if !parent_outermost && green.children()[superseded_start ..].len() == amount { - return None; - } - green.replace_children(superseded_start .. superseded_start + amount, newborns); Some(newborn_span) } @@ -305,17 +313,16 @@ mod tests { test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 9); test("a #f() e", 1 .. 6, " b c d", 0 .. 9); - test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 4 .. 7); - test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 6 .. 10); + test("a\nb\nc\nd\ne\n", 5 .. 5, "c", 2 .. 7); + test("a\n\nb\n\nc\n\nd\n\ne\n", 7 .. 7, "c", 3 .. 10); test("a\nb\nc *hel a b lo* d\nd\ne", 13..13, "c ", 6..20); test("~~ {a} ~~", 4 .. 5, "b", 3 .. 6); test("{(0, 1, 2)}", 5 .. 6, "11pt", 0..14); test("\n= A heading", 3 .. 3, "n evocative", 3 .. 23); - test("for~your~thing", 9 .. 9, "a", 8 .. 15); + test("for~your~thing", 9 .. 9, "a", 4 .. 15); test("a your thing a", 6 .. 7, "a", 0 .. 14); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); test("#call() abc", 7 .. 7, "[]", 0 .. 10); - // Investigate test("hi[\n- item\n- item 2\n - item 3]", 11 .. 11, " ", 2 .. 35); test("hi\n- item\nno item\n - item 3", 10 .. 10, "- ", 3..19); test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 0..99); @@ -323,15 +330,16 @@ mod tests { test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33); test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33); - // Investigate test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11); test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25); test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); - test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); + test(r#"a ```typst hello``` b"#, 16 .. 17, "", 2 .. 18); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); test("#for", 4 .. 4, "//", 0 .. 6); + test("a\n#let \nb", 7 .. 7, "i", 2 .. 9); + test("a\n#for i \nb", 9 .. 9, "in", 2 .. 12); } #[test] @@ -345,7 +353,8 @@ mod tests { test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); - test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11) + test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11); + test("= foo\n bar\n baz", 6..8, "", 0..15); } #[test] @@ -373,7 +382,7 @@ mod tests { test("a b c", 1 .. 1, " /* letters", 0 .. 16); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 0 .. 41); test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); - test("~~~~", 2 .. 2, "[]", 1 .. 5); + test("~~~~", 2 .. 2, "[]", 0 .. 5); test("a[]b", 2 .. 2, "{", 1 .. 4); test("[hello]", 2 .. 3, "]", 0 .. 7); test("{a}", 1 .. 2, "b", 0 .. 3); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 7c0a09327..c08c5d6f1 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -37,6 +37,7 @@ pub fn parse_markup_elements( differential: isize, reference: &[Green], mut at_start: bool, + column: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); @@ -47,6 +48,12 @@ pub fn parse_markup_elements( let mut stopped = false; while !p.eof() { + if let Some(NodeKind::Space(1 ..)) = p.peek() { + if p.column(p.current_end()) < column { + return None; + } + } + markup_node(&mut p, &mut at_start); if p.prev_end() >= end_pos { @@ -85,11 +92,15 @@ pub fn parse_markup_elements( } } + if p.prev_end() < end_pos { + return None; + } + if p.eof() && !stopped { replaced = reference.len(); } - let (mut res, terminated) = p.consume_open_ended()?; + let (mut res, terminated) = p.consume()?; if stopped { res.pop().unwrap(); } @@ -103,8 +114,9 @@ pub fn parse_template( src: &str, end_pos: usize, _: isize, - reference: &[Green], + _: &[Green], _: bool, + _: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { @@ -113,7 +125,7 @@ pub fn parse_template( template(&mut p); - let (mut green, terminated) = p.consume_open_ended()?; + let (mut green, terminated) = p.consume()?; let first = green.remove(0); if first.len() != end_pos { return None; @@ -128,8 +140,9 @@ pub fn parse_block( src: &str, end_pos: usize, _: isize, - reference: &[Green], + _: &[Green], _: bool, + _: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { @@ -138,9 +151,8 @@ pub fn parse_block( block(&mut p); - let (mut green, terminated) = p.consume_open_ended()?; + let (mut green, terminated) = p.consume()?; let first = green.remove(0); - if first.len() != end_pos { return None; } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index d9cc0e31b..8588e5862 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -64,13 +64,6 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children and whether the /// last token was terminated. pub fn consume(self) -> Option<(Vec, bool)> { - (self.eof() && self.terminated()) - .then(|| (self.children, self.tokens.terminated())) - } - - /// End the parsing process and return multiple children and whether the - /// last token was terminated, even if there remains stuff in the string. - pub fn consume_open_ended(self) -> Option<(Vec, bool)> { self.terminated().then(|| (self.children, self.tokens.terminated())) } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index e15ae339d..0d4cf071f 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -89,7 +89,12 @@ fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) { // Dedent based on column, but not for the first line. for line in lines.iter_mut().skip(1) { - let offset = line.chars().take(column).take_while(|c| c.is_whitespace()).count(); + let offset = line + .chars() + .take(column) + .take_while(|c| c.is_whitespace()) + .map(char::len_utf8) + .sum(); *line = &line[offset ..]; } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 2211fcb6f..fc98bb34c 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -758,7 +758,7 @@ impl NodeKind { pub fn is_at_start(&self, prev: bool) -> bool { match self { Self::Space(n) if *n > 0 => true, - Self::LineComment | Self::BlockComment => prev, + Self::Space(_) | Self::LineComment | Self::BlockComment => prev, _ => false, } } From 9fda623b02b2a0a9e9cdf806d9945d0759c8bf01 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 23 Feb 2022 20:06:48 +0100 Subject: [PATCH 4/4] Code Review: That's just like your struct, man. --- benches/oneshot.rs | 2 +- src/parse/incremental.rs | 151 ++++++++++++++++++++++----------------- src/parse/mod.rs | 70 +++++++----------- src/parse/parser.rs | 20 +++--- src/parse/scanner.rs | 34 +-------- src/parse/tokens.rs | 51 +++++++++++-- src/syntax/ast.rs | 2 +- src/syntax/mod.rs | 4 +- 8 files changed, 173 insertions(+), 161 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 1d70e4664..5dbf993fe 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -55,7 +55,7 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup), 0).count()); + iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()); } fn bench_parse(iai: &mut Iai) { diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index d3edff6e2..4736845f5 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -4,19 +4,10 @@ use std::sync::Arc; use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ - is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode, + is_newline, parse, reparse_block, reparse_markup_elements, reparse_template, + TokenMode, }; -type ReparseFunc = fn( - &str, - &str, - usize, - isize, - &[Green], - bool, - usize, -) -> Option<(Vec, bool, usize)>; - /// Allows partial refreshs of the [`Green`] node tree. /// /// This struct holds a description of a change. Its methods can be used to try @@ -55,16 +46,12 @@ impl Reparser<'_> { let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let original_count = green.children().len(); - // Save the current indent if this is a markup node. - let indent = match green.kind() { - NodeKind::Markup(n) => *n, - _ => 0, - }; - let mut search = SearchState::default(); let mut ahead_nontrivia = None; + // Whether the first node that should be replaced is at start. let mut at_start = true; + // Whether the last searched child is the outermost child. let mut child_outermost = false; // Find the the first child in the range of children to reparse. @@ -83,18 +70,17 @@ impl Reparser<'_> { search = if child_span.end == self.replace_range.end && child_mode == TokenMode::Markup { - SearchState::RequireNonWS(pos) + SearchState::RequireNonTrivia(pos) } else { SearchState::Contained(pos) }; } else if child_span.contains(&self.replace_range.start) { search = SearchState::Inside(pos); } else { - if (self.replace_range.len() != 0 - || self.replace_range.end != child_span.end - || ahead_nontrivia.is_none()) - && (!child.kind().is_space() - && child.kind() != &NodeKind::Semicolon) + if (!child.kind().is_space() + && child.kind() != &NodeKind::Semicolon) + && (ahead_nontrivia.is_none() + || self.replace_range.start > child_span.end) { ahead_nontrivia = Some((pos, at_start)); } @@ -103,12 +89,12 @@ impl Reparser<'_> { } SearchState::Inside(start) => { if child_span.end == self.replace_range.end { - search = SearchState::RequireNonWS(start); + search = SearchState::RequireNonTrivia(start); } else if child_span.end > self.replace_range.end { search = SearchState::SpanFound(start, pos); } } - SearchState::RequireNonWS(start) => { + SearchState::RequireNonTrivia(start) => { if !child.kind().is_trivia() { search = SearchState::SpanFound(start, pos); } @@ -118,11 +104,21 @@ impl Reparser<'_> { offset += child.len(); child_outermost = outermost && i + 1 == original_count; - if search.end().is_some() { + + if search.done().is_some() { break; } } + // If we were looking for a non-whitespace element and hit the end of + // the file here, we instead use EOF as the end of the span. + if let SearchState::RequireNonTrivia(start) = search { + search = SearchState::SpanFound(start, GreenPos { + idx: green.children().len() - 1, + offset: offset - green.children().last().unwrap().len(), + }) + } + if let SearchState::Contained(pos) = search { let child = &mut green.children_mut()[pos.idx]; let prev_len = child.len(); @@ -139,20 +135,20 @@ impl Reparser<'_> { } let superseded_span = pos.offset .. pos.offset + prev_len; - let func: Option = match child.kind() { - NodeKind::Template => Some(parse_template), - NodeKind::Block => Some(parse_block), + let func: Option = match child.kind() { + NodeKind::Template => Some(ReparseMode::Template), + NodeKind::Block => Some(ReparseMode::Block), _ => None, }; + // Return if the element was reparsable on its own, otherwise try to + // treat it as a markup element. if let Some(func) = func { if let Some(result) = self.replace( green, func, pos.idx .. pos.idx + 1, superseded_span, - at_start, - indent, outermost, ) { return Some(result); @@ -160,11 +156,13 @@ impl Reparser<'_> { } } - if !matches!(green.kind(), NodeKind::Markup(_)) { - return None; - } + // Save the current indent if this is a markup node and stop otherwise. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => return None, + }; - let (mut start, end) = search.end()?; + let (mut start, end) = search.done()?; if let Some((ahead, ahead_at_start)) = ahead_nontrivia { let ahead_kind = green.children()[ahead.idx].kind(); @@ -179,13 +177,12 @@ impl Reparser<'_> { let superseded_span = start.offset .. end.offset + green.children()[end.idx].len(); + self.replace( green, - parse_markup_elements, + ReparseMode::MarkupElements(at_start, indent), start.idx .. end.idx + 1, superseded_span, - at_start, - indent, outermost, ) } @@ -193,19 +190,17 @@ impl Reparser<'_> { fn replace( &self, green: &mut GreenNode, - func: ReparseFunc, + mode: ReparseMode, superseded_idx: Range, superseded_span: Range, - at_start: bool, - indent: usize, outermost: bool, ) -> Option> { + let superseded_start = superseded_idx.start; + let differential: isize = self.replace_len as isize - self.replace_range.len() as isize; - let newborn_span = superseded_span.start - .. - (superseded_span.end as isize + differential) as usize; - let superseded_start = superseded_idx.start; + let newborn_end = (superseded_span.end as isize + differential) as usize; + let newborn_span = superseded_span.start .. newborn_end; let mut prefix = ""; for (i, c) in self.src[.. newborn_span.start].char_indices().rev() { @@ -215,15 +210,27 @@ impl Reparser<'_> { prefix = &self.src[i .. newborn_span.start]; } - let (newborns, terminated, amount) = func( - &prefix, - &self.src[newborn_span.start ..], - newborn_span.len(), - differential, - &green.children()[superseded_start ..], - at_start, - indent, - )?; + let (newborns, terminated, amount) = match mode { + ReparseMode::Block => reparse_block( + &prefix, + &self.src[newborn_span.start ..], + newborn_span.len(), + ), + ReparseMode::Template => reparse_template( + &prefix, + &self.src[newborn_span.start ..], + newborn_span.len(), + ), + ReparseMode::MarkupElements(at_start, indent) => reparse_markup_elements( + &prefix, + &self.src[newborn_span.start ..], + newborn_span.len(), + differential, + &green.children()[superseded_start ..], + at_start, + indent, + ), + }?; // Do not accept unclosed nodes if the old node wasn't at the right edge // of the tree. @@ -236,6 +243,8 @@ impl Reparser<'_> { } } +/// The position of a green node in terms of its string offset and index within +/// the parent node. #[derive(Clone, Copy, Debug, PartialEq)] struct GreenPos { idx: usize, @@ -256,7 +265,7 @@ enum SearchState { Inside(GreenPos), /// The search has found the end of the modified nodes but the change /// touched its boundries so another non-trivia node is needed. - RequireNonWS(GreenPos), + RequireNonTrivia(GreenPos), /// The search has concluded by finding a start and an end index for nodes /// with a pending reparse. SpanFound(GreenPos, GreenPos), @@ -269,17 +278,29 @@ impl Default for SearchState { } impl SearchState { - fn end(&self) -> Option<(GreenPos, GreenPos)> { + fn done(self) -> Option<(GreenPos, GreenPos)> { match self { Self::NoneFound => None, - Self::Contained(s) => Some((*s, *s)), + Self::Contained(s) => Some((s, s)), Self::Inside(_) => None, - Self::RequireNonWS(_) => None, - Self::SpanFound(s, e) => Some((*s, *e)), + Self::RequireNonTrivia(_) => None, + Self::SpanFound(s, e) => Some((s, e)), } } } +/// Which reparse function to choose for a span of elements. +#[derive(Clone, Copy, Debug, PartialEq)] +enum ReparseMode { + /// Reparse a code block with its braces. + Block, + /// Reparse a template, including its square brackets. + Template, + /// Reparse elements of the markup. The variant carries whether the node is + /// `at_start` and the minimum indent of the containing markup node. + MarkupElements(bool, usize), +} + impl NodeKind { /// Whether this node has to appear at the start of a line. pub fn only_at_start(&self) -> bool { @@ -330,7 +351,7 @@ mod tests { test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33); test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33); - test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11); + test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11); test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25); test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); @@ -344,8 +365,8 @@ mod tests { #[test] fn test_parse_incremental_whitespace_invariants() { - test("hello \\ world", 7 .. 8, "a ", 6 .. 14); - test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("hello \\ world", 7 .. 8, "a ", 0 .. 14); + test("hello \\ world", 7 .. 8, " a", 0 .. 14); test("x = y", 1 .. 1, " + y", 0 .. 6); test("x = y", 1 .. 1, " + y\n", 0 .. 7); test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); @@ -353,13 +374,13 @@ mod tests { test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); - test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11); - test("= foo\n bar\n baz", 6..8, "", 0..15); + test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11); + test("= foo\n bar\n baz", 6 .. 8, "", 0 .. 15); } #[test] fn test_parse_incremental_type_invariants() { - test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22); + test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11); test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c08c5d6f1..11ce872f8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -30,7 +30,7 @@ pub fn parse(src: &str) -> Arc { /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements( +pub fn reparse_markup_elements( prefix: &str, src: &str, end_pos: usize, @@ -43,11 +43,11 @@ pub fn parse_markup_elements( let mut node: Option<&Green> = None; let mut iter = reference.iter(); - let mut offset = 0; + let mut offset = differential; let mut replaced = 0; let mut stopped = false; - while !p.eof() { + 'outer: while !p.eof() { if let Some(NodeKind::Space(1 ..)) = p.peek() { if p.column(p.current_end()) < column { return None; @@ -56,44 +56,36 @@ pub fn parse_markup_elements( markup_node(&mut p, &mut at_start); - if p.prev_end() >= end_pos { - let recent = p.children.last().unwrap(); - let recent_start = p.prev_end() - recent.len(); + if p.prev_end() < end_pos { + continue; + } - while offset <= recent_start { - if let Some(node) = node { - // The nodes are equal, at the same position and have the - // same content. The parsing trees have converged again, so - // the reparse may stop here. - if (offset as isize + differential) as usize == recent_start - && node == recent - { - replaced -= 1; - stopped = true; - break; - } - } + let recent = p.children.last().unwrap(); + let recent_start = p.prev_end() - recent.len(); - let result = iter.next(); - if let Some(node) = node { - offset += node.len(); - } - node = result; - if node.is_none() { - break; - } else { - replaced += 1; + while offset <= recent_start as isize { + if let Some(node) = node { + // The nodes are equal, at the same position and have the + // same content. The parsing trees have converged again, so + // the reparse may stop here. + if offset == recent_start as isize && node == recent { + replaced -= 1; + stopped = true; + break 'outer; } } - if stopped { + if let Some(node) = node { + offset += node.len() as isize; + } + + node = iter.next(); + if node.is_none() { break; } - } - } - if p.prev_end() < end_pos { - return None; + replaced += 1; + } } if p.eof() && !stopped { @@ -109,14 +101,10 @@ pub fn parse_markup_elements( } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template( +pub fn reparse_template( prefix: &str, src: &str, end_pos: usize, - _: isize, - _: &[Green], - _: bool, - _: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { @@ -135,14 +123,10 @@ pub fn parse_template( } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block( +pub fn reparse_block( prefix: &str, src: &str, end_pos: usize, - _: isize, - _: &[Green], - _: bool, - _: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8588e5862..123871a58 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -2,7 +2,7 @@ use core::slice::SliceIndex; use std::fmt::{self, Display, Formatter}; use std::mem; -use super::{Scanner, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; /// A convenient token-based parser. @@ -30,11 +30,14 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self::with_offset(src, mode, 0) + Self::with_prefix("", src, mode) } - fn with_offset(src: &'s str, mode: TokenMode, offset: usize) -> Self { - let mut tokens = Tokens::new(src, mode, offset); + /// Create a new parser for the source string that is prefixed by some text + /// that does not need to be parsed but taken into account for column + /// calculation. + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { + let mut tokens = Tokens::with_prefix(prefix, src, mode); let current = tokens.next(); Self { tokens, @@ -49,13 +52,6 @@ impl<'s> Parser<'s> { } } - /// Create a new parser for the source string that is prefixed by some text - /// that does not need to be parsed but taken into account for column - /// calculation. - pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { - Self::with_offset(src, mode, Scanner::new(prefix).column(prefix.len())) - } - /// End the parsing process and return the last child. pub fn finish(self) -> Vec { self.children @@ -218,7 +214,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.tokens.column(index) } /// Continue parsing in a group. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 15060c7b8..e4cf56e97 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -10,21 +10,13 @@ pub struct Scanner<'s> { /// The index at which the peekable character starts. Must be in bounds and /// at a codepoint boundary to guarantee safety. index: usize, - /// Offsets the indentation on the first line of the source. - column_offset: usize, } impl<'s> Scanner<'s> { /// Create a new char scanner. #[inline] pub fn new(src: &'s str) -> Self { - Self { src, index: 0, column_offset: 0 } - } - - /// Create a new char scanner with an offset for the first line indent. - #[inline] - pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self { - Self { src, index: 0, column_offset } + Self { src, index: 0 } } /// Whether the end of the string is reached. @@ -177,30 +169,6 @@ impl<'s> Scanner<'s> { // optimized away in some cases. self.src.get(start .. self.index).unwrap_or_default() } - - /// The column index of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - let mut apply_offset = false; - let res = self.src[.. index] - .char_indices() - .rev() - .take_while(|&(_, c)| !is_newline(c)) - .inspect(|&(i, _)| { - if i == 0 { - apply_offset = true - } - }) - .count(); - - // The loop is never executed if the slice is empty, but we are of - // course still at the start of the first line. - if self.src[.. index].len() == 0 { - apply_offset = true; - } - - if apply_offset { res + self.column_offset } else { res } - } } /// Whether this character denotes a newline. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 4a13694a5..91bbf9e84 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -11,9 +11,14 @@ use crate::util::EcoString; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { + /// The underlying scanner. s: Scanner<'s>, + /// The mode the scanner is in. This determines what tokens it recognizes. mode: TokenMode, + /// Whether the last token has been terminated. terminated: bool, + /// Offsets the indentation on the first line of the source. + column_offset: usize, } /// What kind of tokens to emit. @@ -28,11 +33,19 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self { + pub fn new(src: &'s str, mode: TokenMode) -> Self { + Self::with_prefix("", src, mode) + } + + /// Create a new token iterator with the given mode and a prefix to offset + /// column calculations. + #[inline] + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { Self { - s: Scanner::with_indent_offset(src, offset), + s: Scanner::new(src), mode, terminated: true, + column_offset: column(prefix, prefix.len(), 0), } } @@ -74,6 +87,12 @@ impl<'s> Tokens<'s> { pub fn terminated(&self) -> bool { self.terminated } + + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + column(self.s.src(), index, self.column_offset) + } } impl<'s> Iterator for Tokens<'s> { @@ -321,7 +340,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.s.column(self.s.index() - 1); + let column = self.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') { @@ -574,6 +593,30 @@ fn keyword(ident: &str) -> Option { }) } +/// The column index of a given index in the source string, given a column offset for the first line. +#[inline] +fn column(string: &str, index: usize, offset: usize) -> usize { + let mut apply_offset = false; + let res = string[.. index] + .char_indices() + .rev() + .take_while(|&(_, c)| !is_newline(c)) + .inspect(|&(i, _)| { + if i == 0 { + apply_offset = true + } + }) + .count(); + + // The loop is never executed if the slice is empty, but we are of + // course still at the start of the first line. + if index == 0 { + apply_offset = true; + } + + if apply_offset { res + offset } else { res } +} + #[cfg(test)] #[allow(non_snake_case)] mod tests { @@ -689,7 +732,7 @@ mod tests { }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&src, $mode, 0).collect::>(); + let found = Tokens::new(&src, $mode).collect::>(); let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 7992f9de9..10e5ec70d 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -60,7 +60,7 @@ impl Markup { /// The markup nodes. pub fn nodes(&self) -> impl Iterator + '_ { self.0.children().filter_map(|node| match node.kind() { - NodeKind::Space(n) if *n > 1 => Some(MarkupNode::Parbreak), + NodeKind::Space(2 ..) => Some(MarkupNode::Parbreak), NodeKind::Space(_) => Some(MarkupNode::Space), NodeKind::Linebreak => Some(MarkupNode::Linebreak), NodeKind::Text(s) | NodeKind::TextInLine(s) => { diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index fc98bb34c..85f2013c4 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -757,7 +757,7 @@ impl NodeKind { /// Whether this node is `at_start` given the previous value of the property. pub fn is_at_start(&self, prev: bool) -> bool { match self { - Self::Space(n) if *n > 0 => true, + Self::Space(1 ..) => true, Self::Space(_) | Self::LineComment | Self::BlockComment => prev, _ => false, } @@ -858,7 +858,7 @@ impl NodeKind { Self::Include => "keyword `include`", Self::From => "keyword `from`", Self::Markup(_) => "markup", - Self::Space(n) if *n > 1 => "paragraph break", + Self::Space(2 ..) => "paragraph break", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Text(_) | Self::TextInLine(_) => "text",