diff --git a/Cargo.toml b/Cargo.toml index 8251a7fa6..0bf68d74f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,21 +20,14 @@ opt-level = 2 [dependencies] fxhash = "0.2" -image = { version = "0.23", default-features = false, features = [ - "png", - "jpeg", -] } +image = { version = "0.23", default-features = false, features = ["png", "jpeg"] } itertools = "0.10" miniz_oxide = "0.4" once_cell = "1" pdf-writer = "0.4" rustybuzz = "0.4" serde = { version = "1", features = ["derive", "rc"] } -svg2pdf = { version = "0.1", default-features = false, features = [ - "text", - "png", - "jpeg", -] } +svg2pdf = { version = "0.1", default-features = false, features = ["text", "png", "jpeg"] } ttf-parser = "0.12" typst-macros = { path = "./macros" } unicode-bidi = "0.3.5" diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 5cb016d2c..4c82f158b 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -4,8 +4,8 @@ use std::rc::Rc; use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ - parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, - parse_markup_elements, parse_template, Scanner, TokenMode, + is_newline, parse, parse_atomic, parse_atomic_markup, parse_block, parse_comment, + parse_markup, parse_markup_elements, parse_template, Scanner, TokenMode, }; /// The conditions that a node has to fulfill in order to be replaced. @@ -13,21 +13,21 @@ use super::{ /// This can dictate if a node can be replaced at all and if yes, what can take /// its place. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Postcondition { +pub enum SuccessionRule { /// Changing this node can never have an influence on the other nodes. Safe, /// This node has to be replaced with a single token of the same kind. SameKind(Option), - /// Changing this node into a single atomic expression is allowed if it - /// appears in code mode, otherwise it is safe. + /// In code mode, this node can only be changed into a single atomic + /// expression, otherwise it is safe. AtomicPrimary, - /// Changing an unsafe layer node changes what the parents or the - /// surrounding nodes would be and is therefore disallowed. Change the + /// Changing an unsafe layer node in code mode changes what the parents or + /// the surrounding nodes would be and is therefore disallowed. Change the /// parents or children instead. If it appears in Markup, however, it is /// safe to change. UnsafeLayer, - /// Changing an unsafe node or any of its children will trigger undefined - /// behavior. Change the parents instead. + /// Changing an unsafe node or any of its children is not allowed. Change + /// the parents instead. Unsafe, } @@ -37,11 +37,12 @@ pub enum Postcondition { /// existence is plausible with them present. This can be used to encode some /// context-free language components for incremental parsing. #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Precondition { +pub enum NeighbourRule { /// These nodes depend on being at the start of a line. Reparsing of safe - /// left neighbors has to check this invariant. Otherwise, this node is - /// safe. Additionally, the indentation of the first right non-trivia, - /// non-whitespace sibling must not be greater than the current indentation. + /// left neighbors has to check this invariant. Additionally, when + /// exchanging the right sibling or inserting such a node the indentation of + /// the first right non-trivia, non-whitespace sibling must not be greater + /// than the current indentation. AtStart, /// These nodes depend on not being at the start of a line. Reparsing of /// safe left neighbors has to check this invariant. Otherwise, this node is @@ -77,8 +78,12 @@ impl<'a> Reparser<'a> { impl Reparser<'_> { /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Option> { - self.reparse_step(green, 0, TokenMode::Markup, true) + pub fn reparse(&self, green: &mut Rc) -> Range { + self.reparse_step(Rc::make_mut(green), 0, TokenMode::Markup, true) + .unwrap_or_else(|| { + *green = parse(self.src); + 0 .. self.src.len() + }) } fn reparse_step( @@ -90,7 +95,7 @@ impl Reparser<'_> { ) -> Option> { let mode = green.kind().mode().unwrap_or(parent_mode); let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); - let child_count = green.children().len(); + let original_count = green.children().len(); // Save the current indent if this is a markup node. let indent = match green.kind() { @@ -134,12 +139,14 @@ impl Reparser<'_> { // neighbor! if child_span.contains(&self.replace_range.end) || self.replace_range.end == child_span.end - && (mode != TokenMode::Markup || i + 1 == child_count) + && (mode != TokenMode::Markup || i + 1 == original_count) { - outermost &= i + 1 == child_count; + outermost &= i + 1 == original_count; last = Some((i, offset + child.len())); break; - } else if mode != TokenMode::Markup || !child.kind().post().safe_in_markup() { + } else if mode != TokenMode::Markup + || !child.kind().succession_rule().safe_in_markup() + { break; } @@ -147,17 +154,17 @@ impl Reparser<'_> { } let (last_idx, last_end) = last?; - let children_range = first_idx .. last_idx + 1; - let children_span = first_start .. last_end; + let superseded_range = first_idx .. last_idx + 1; + let superseded_span = first_start .. last_end; let last_kind = green.children()[last_idx].kind().clone(); // First, we try if the child itself has another, more specific // applicable child. - if children_range.len() == 1 { - let child = &mut green.children_mut()[children_range.start]; + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; let prev_len = child.len(); - if last_kind.post() != Postcondition::Unsafe { + if last_kind.succession_rule() != SuccessionRule::Unsafe { if let Some(range) = match child { Green::Node(node) => self.reparse_step( Rc::make_mut(node), @@ -168,56 +175,64 @@ impl Reparser<'_> { Green::Token(_) => None, } { let new_len = child.len(); - green.update_child_len(new_len, prev_len); + green.update_parent(new_len, prev_len); return Some(range); } } } // We only replace multiple children in markup mode. - if children_range.len() > 1 && mode == TokenMode::Code { + if superseded_range.len() > 1 && mode == TokenMode::Code { return None; } // We now have a child that we can replace and a function to do so. let func = last_kind.reparsing_func(child_mode, indent)?; - let post = last_kind.post(); + let succession = last_kind.succession_rule(); - let mut column = if mode == TokenMode::Markup { - // In this case, we want to pass the indentation to the function. - Scanner::new(self.src).column(children_span.start) - } else { - 0 - }; + let mut markup_min_column = 0; // If this is a markup node, we want to save its indent instead to pass // the right indent argument. - if children_range.len() == 1 { - let child = &mut green.children_mut()[children_range.start]; + if superseded_range.len() == 1 { + let child = &mut green.children_mut()[superseded_range.start]; if let NodeKind::Markup(n) = child.kind() { - column = *n; + markup_min_column = *n; } } // The span of the to-be-reparsed children in the new source. - let replace_span = children_span.start + let newborn_span = superseded_span.start .. - children_span.end + self.replace_len - self.replace_range.len(); + superseded_span.end + self.replace_len - self.replace_range.len(); // For atomic primaries we need to pass in the whole remaining string to // check whether the parser would eat more stuff illicitly. - let reparse_span = if post == Postcondition::AtomicPrimary { - replace_span.start .. self.src.len() + let reparse_span = if succession == SuccessionRule::AtomicPrimary { + newborn_span.start .. self.src.len() } else { - replace_span.clone() + newborn_span.clone() }; + let mut prefix = ""; + for (i, c) in self.src[.. reparse_span.start].char_indices().rev() { + if is_newline(c) { + break; + } + prefix = &self.src[i .. reparse_span.start]; + } + // Do the reparsing! - let (mut newborns, terminated) = func(&self.src[reparse_span], at_start, column)?; + let (mut newborns, terminated) = func( + &prefix, + &self.src[reparse_span.clone()], + at_start, + markup_min_column, + )?; // Make sure that atomic primaries ate only what they were supposed to. - if post == Postcondition::AtomicPrimary { - let len = replace_span.len(); + if succession == SuccessionRule::AtomicPrimary { + let len = newborn_span.len(); if newborns.len() > 1 && newborns[0].len() == len { newborns.truncate(1); } else if newborns.iter().map(Green::len).sum::() != len { @@ -234,16 +249,16 @@ impl Reparser<'_> { // If all post- and preconditions match, we are good to go! if validate( green.children(), - children_range.clone(), + superseded_range.clone(), at_start, &newborns, mode, - post, - replace_span.clone(), + succession, + newborn_span.clone(), self.src, ) { - green.replace_child_range(children_range, newborns); - Some(replace_span) + green.replace_children(superseded_range, newborns); + Some(newborn_span) } else { None } @@ -252,27 +267,27 @@ impl Reparser<'_> { /// Validate that a node replacement is allowed by post- and preconditions. fn validate( - prev_children: &[Green], - children_range: Range, + superseded: &[Green], + superseded_range: Range, mut at_start: bool, newborns: &[Green], mode: TokenMode, - post: Postcondition, - replace_span: Range, + post: SuccessionRule, + newborn_span: Range, src: &str, ) -> bool { // Atomic primaries must only generate one new child. - if post == Postcondition::AtomicPrimary && newborns.len() != 1 { + if post == SuccessionRule::AtomicPrimary && newborns.len() != 1 { return false; } // Same kind in mode `inside` must generate only one child and that child // must be of the same kind as previously. - if let Postcondition::SameKind(inside) = post { - let prev_kind = prev_children[children_range.start].kind(); - let prev_mode = prev_kind.mode().unwrap_or(mode); - if inside.map_or(true, |m| m == prev_mode) - && (newborns.len() != 1 || prev_kind != newborns[0].kind()) + if let SuccessionRule::SameKind(inside) = post { + let superseded_kind = superseded[superseded_range.start].kind(); + let superseded_mode = superseded_kind.mode().unwrap_or(mode); + if inside.map_or(true, |m| m == superseded_mode) + && (newborns.len() != 1 || superseded_kind != newborns[0].kind()) { return false; } @@ -286,15 +301,15 @@ fn validate( // Check if there are any `AtStart` predecessors which require a certain // indentation. let s = Scanner::new(src); - let mut prev_pos = replace_span.start; - for child in (&prev_children[.. children_range.start]).iter().rev() { + let mut prev_pos = newborn_span.start; + for child in (&superseded[.. superseded_range.start]).iter().rev() { prev_pos -= child.len(); if !child.kind().is_trivia() { - if child.kind().pre() == Precondition::AtStart { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { let left_col = s.column(prev_pos); // Search for the first non-trivia newborn. - let mut new_pos = replace_span.start; + let mut new_pos = newborn_span.start; let mut child_col = None; for child in newborns { if !child.kind().is_trivia() { @@ -323,15 +338,15 @@ fn validate( // Ensure that a possible at-start or not-at-start precondition of // a node after the replacement range is satisfied. - for child in &prev_children[children_range.end ..] { - if !child.kind().is_trivia() { - let pre = child.kind().pre(); - if (pre == Precondition::AtStart && !at_start) - || (pre == Precondition::NotAtStart && at_start) - { - return false; - } + for child in &superseded[superseded_range.end ..] { + let neighbour_rule = child.kind().neighbour_rule(); + if (neighbour_rule == NeighbourRule::AtStart && !at_start) + || (neighbour_rule == NeighbourRule::NotAtStart && at_start) + { + return false; + } + if !child.kind().is_trivia() { break; } @@ -339,42 +354,40 @@ fn validate( } // Verify that the last of the newborns is not `NotAtEnd`. - if newborns - .last() - .map_or(false, |child| child.kind().pre() == Precondition::NotAtEnd) - { + if newborns.last().map_or(false, |child| { + child.kind().neighbour_rule() == NeighbourRule::NotAtEnd + }) { return false; } // We have to check whether the last non-trivia newborn is `AtStart` and // verify the indent of its right neighbors in order to make sure its // indentation requirements are fulfilled. - let mut child_pos = replace_span.end; - let mut child_col = None; + let mut child_pos = newborn_span.end; for child in newborns.iter().rev() { child_pos -= child.len(); - if !child.kind().is_trivia() { - if child.kind().pre() == Precondition::AtStart { - child_col = Some(s.column(child_pos)); - } - break; + if child.kind().is_trivia() { + continue; } - } - if let Some(child_col) = child_col { - let mut right_pos = replace_span.end; - for child in &prev_children[children_range.end ..] { - if !child.kind().is_trivia() { + if child.kind().neighbour_rule() == NeighbourRule::AtStart { + let child_col = s.column(child_pos); + + let mut right_pos = newborn_span.end; + for child in &superseded[superseded_range.end ..] { + if child.kind().is_trivia() { + right_pos += child.len(); + continue; + } + if s.column(right_pos) > child_col { return false; } - break; } - - right_pos += child.len(); } + break; } true @@ -387,13 +400,15 @@ impl NodeKind { &self, parent_mode: TokenMode, indent: usize, - ) -> Option Option<(Vec, bool)>> { + ) -> Option Option<(Vec, bool)>> { let mode = self.mode().unwrap_or(parent_mode); - match self.post() { - Postcondition::Unsafe | Postcondition::UnsafeLayer => None, - Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), - Postcondition::AtomicPrimary => Some(parse_atomic_markup), - Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + match self.succession_rule() { + SuccessionRule::Unsafe | SuccessionRule::UnsafeLayer => None, + SuccessionRule::AtomicPrimary if mode == TokenMode::Code => { + Some(parse_atomic) + } + SuccessionRule::AtomicPrimary => Some(parse_atomic_markup), + SuccessionRule::SameKind(x) if x == None || x == Some(mode) => match self { NodeKind::Markup(_) => Some(parse_markup), NodeKind::Template => Some(parse_template), NodeKind::Block => Some(parse_block), @@ -409,7 +424,7 @@ impl NodeKind { /// Whether it is safe to do incremental parsing on this node. Never allow /// non-termination errors if this is not already the last leaf node. - pub fn post(&self) -> Postcondition { + pub fn succession_rule(&self) -> SuccessionRule { match self { // Replacing parenthesis changes if the expression is balanced and // is therefore not safe. @@ -418,7 +433,7 @@ impl NodeKind { | Self::LeftBrace | Self::RightBrace | Self::LeftParen - | Self::RightParen => Postcondition::Unsafe, + | Self::RightParen => SuccessionRule::Unsafe, // Replacing an operator can change whether the parent is an // operation which makes it unsafe. The star can appear in markup. @@ -445,7 +460,7 @@ impl NodeKind { | Self::Or | Self::With | Self::Dots - | Self::Arrow => Postcondition::Unsafe, + | Self::Arrow => SuccessionRule::Unsafe, // These keywords change what kind of expression the parent is and // how far the expression would go. @@ -461,14 +476,14 @@ impl NodeKind { | Self::Return | Self::Import | Self::Include - | Self::From => Postcondition::Unsafe, + | Self::From => SuccessionRule::Unsafe, // Changing the heading level, enum numbering, or list bullet // changes the next layer. - Self::EnumNumbering(_) => Postcondition::Unsafe, + Self::EnumNumbering(_) => SuccessionRule::Unsafe, // This can be anything, so we don't make any promises. - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + Self::Error(_, _) | Self::Unknown(_) => SuccessionRule::Unsafe, // These are complex expressions which may screw with their // environments. @@ -477,33 +492,33 @@ impl NodeKind { | Self::Binary | Self::CallArgs | Self::Named - | Self::Spread => Postcondition::UnsafeLayer, + | Self::Spread => SuccessionRule::UnsafeLayer, // The closure is a bit magic with the let expression, and also it // is not atomic. - Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + Self::Closure | Self::ClosureParams => SuccessionRule::UnsafeLayer, // Missing these creates errors for the parents. Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer + SuccessionRule::UnsafeLayer } // Only markup is expected at the points where it does occur. The // indentation must be preserved as well, also for the children. - Self::Markup(_) => Postcondition::SameKind(None), + Self::Markup(_) => SuccessionRule::SameKind(None), // These can appear everywhere and must not change to other stuff // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + Self::LineComment | Self::BlockComment => SuccessionRule::SameKind(None), // These can appear as bodies and would trigger an error if they // became something else. - Self::Template => Postcondition::SameKind(None), - Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + Self::Template => SuccessionRule::SameKind(None), + Self::Block => SuccessionRule::SameKind(Some(TokenMode::Code)), // Whitespace in code mode has to remain whitespace or else the type // of things would change. - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + Self::Space(_) => SuccessionRule::SameKind(Some(TokenMode::Code)), // These are expressions that can be replaced by other expressions. Self::Ident(_) @@ -519,7 +534,7 @@ impl NodeKind { | Self::Dict | Self::Group | Self::None - | Self::Auto => Postcondition::AtomicPrimary, + | Self::Auto => SuccessionRule::AtomicPrimary, // More complex, but still an expression. Self::ForExpr @@ -528,11 +543,11 @@ impl NodeKind { | Self::LetExpr | Self::SetExpr | Self::ImportExpr - | Self::IncludeExpr => Postcondition::AtomicPrimary, + | Self::IncludeExpr => SuccessionRule::AtomicPrimary, // This element always has to remain in the same column so better // reparse the whole parent. - Self::Raw(_) => Postcondition::Unsafe, + Self::Raw(_) => SuccessionRule::Unsafe, // These are all replaceable by other tokens. Self::Parbreak @@ -548,22 +563,22 @@ impl NodeKind { | Self::Heading | Self::Enum | Self::List - | Self::Math(_) => Postcondition::Safe, + | Self::Math(_) => SuccessionRule::Safe, } } /// The appropriate precondition for the type. - pub fn pre(&self) -> Precondition { + pub fn neighbour_rule(&self) -> NeighbourRule { match self { - Self::Heading | Self::Enum | Self::List => Precondition::AtStart, - Self::TextInLine(_) => Precondition::NotAtStart, - Self::Error(_, _) => Precondition::NotAtEnd, - _ => Precondition::None, + Self::Heading | Self::Enum | Self::List => NeighbourRule::AtStart, + Self::TextInLine(_) => NeighbourRule::NotAtStart, + Self::Error(_, _) => NeighbourRule::NotAtEnd, + _ => NeighbourRule::None, } } } -impl Postcondition { +impl SuccessionRule { /// Whether a node with this condition can be reparsed in markup mode. pub fn safe_in_markup(&self) -> bool { match self { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f48267300..a97526453 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -29,72 +29,102 @@ pub fn parse(src: &str) -> Rc { } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Code); +pub fn parse_atomic( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); primary(&mut p, true).ok()?; - p.eject_partial() + p.consume_unterminated() } /// Parse an atomic primary. Returns `Some` if all of the input was consumed. -pub fn parse_atomic_markup(src: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); +pub fn parse_atomic_markup( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); markup_expr(&mut p); - p.eject_partial() + p.consume_unterminated() } /// Parse some markup. Returns `Some` if all of the input was consumed. -pub fn parse_markup(src: &str, _: bool, column: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); - if column == 0 { +pub fn parse_markup( + prefix: &str, + src: &str, + _: bool, + min_column: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); + if min_column == 0 { markup(&mut p); } else { - markup_indented(&mut p, column); + markup_indented(&mut p, min_column); } - p.eject() + p.consume() } /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. pub fn parse_markup_elements( + prefix: &str, src: &str, mut at_start: bool, - column: usize, + _: usize, ) -> Option<(Vec, bool)> { - let mut p = Parser::new(src, TokenMode::Markup); - p.offset(column); + let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup); while !p.eof() { markup_node(&mut p, &mut at_start); } - p.eject() + p.consume() } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_template( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { return None; } template(&mut p); - p.eject() + p.consume() } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_block( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { return None; } block(&mut p); - p.eject() + p.consume() } /// Parse a comment. Returns `Some` if all of the input was consumed. -pub fn parse_comment(source: &str, _: bool, _: usize) -> Option<(Vec, bool)> { - let mut p = Parser::new(source, TokenMode::Code); +pub fn parse_comment( + prefix: &str, + src: &str, + _: bool, + _: usize, +) -> Option<(Vec, bool)> { + let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); comment(&mut p).ok()?; - p.eject() + p.consume() } /// Parse markup. @@ -111,7 +141,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, column, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.clean_column(p.current_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -170,14 +200,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Eq if *at_start => heading(p), - NodeKind::Minus if *at_start => list_node(p), - NodeKind::EnumNumbering(_) if *at_start => enum_node(p), - - // Line-based markup that is not currently at the start of the line. - NodeKind::Eq | NodeKind::Minus | NodeKind::EnumNumbering(_) => { - p.convert(NodeKind::TextInLine(p.peek_src().into())) - } + NodeKind::Eq => heading(p, *at_start), + NodeKind::Minus => list_node(p, *at_start), + NodeKind::EnumNumbering(_) => enum_node(p, *at_start), // Hashtag + keyword / identifier. NodeKind::Ident(_) @@ -201,42 +226,49 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { } /// Parse a heading. -fn heading(p: &mut Parser) { - p.perform(NodeKind::Heading, |p| { - p.eat_assert(&NodeKind::Eq); - while p.eat_if(&NodeKind::Eq) {} +fn heading(p: &mut Parser, at_start: bool) { + let marker = p.marker(); + let current_start = p.current_start(); + p.eat_assert(&NodeKind::Eq); + while p.eat_if(&NodeKind::Eq) {} + + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); - }); + marker.end(p, NodeKind::Heading); + } else { + let text = p.get(current_start .. p.prev_end()).into(); + marker.convert(p, NodeKind::TextInLine(text)); + } } /// Parse a single list item. -fn list_node(p: &mut Parser) { +fn list_node(p: &mut Parser, at_start: bool) { let marker = p.marker(); - let src: EcoString = p.peek_src().into(); + let text: EcoString = p.peek_src().into(); p.eat_assert(&NodeKind::Minus); - if p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::List); } else { - marker.convert(p, NodeKind::TextInLine(src)); + marker.convert(p, NodeKind::TextInLine(text)); } } /// Parse a single enum item. -fn enum_node(p: &mut Parser) { +fn enum_node(p: &mut Parser, at_start: bool) { let marker = p.marker(); - let src: EcoString = p.peek_src().into(); + let text: EcoString = p.peek_src().into(); p.eat(); - if p.peek().map_or(true, |kind| kind.is_whitespace()) { + if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) { let column = p.column(p.prev_end()); markup_indented(p, column); marker.end(p, NodeKind::Enum); } else { - marker.convert(p, NodeKind::TextInLine(src)); + marker.convert(p, NodeKind::TextInLine(text)); } } @@ -582,23 +614,18 @@ fn template(p: &mut Parser) { fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { p.start_group(Group::Brace); - expr_list(p); - p.end_group(); - }); -} + while !p.eof() { + p.start_group(Group::Stmt); + if expr(p).is_ok() && !p.eof() { + p.expected_at("semicolon or line break"); + } + p.end_group(); -/// Parse a number of code expressions. -fn expr_list(p: &mut Parser) { - while !p.eof() { - p.start_group(Group::Stmt); - if expr(p).is_ok() && !p.eof() { - p.expected_at("semicolon or line break"); + // Forcefully skip over newlines since the group's contents can't. + p.eat_while(|t| matches!(t, NodeKind::Space(_))); } p.end_group(); - - // Forcefully skip over newlines since the group's contents can't. - p.eat_while(|t| matches!(t, NodeKind::Space(_))); - } + }); } /// Parse a function call. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index f36155d5d..4e5b277d2 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,8 @@ +use core::slice::SliceIndex; use std::fmt::{self, Display, Formatter}; use std::mem; -use super::{TokenMode, Tokens}; +use super::{Scanner, TokenMode, Tokens}; use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -24,8 +25,7 @@ pub struct Parser<'s> { /// Is `Some` if there is an unterminated group at the last position where /// groups were terminated. last_unterminated: Option, - /// Offset the indentation. This can be used if the parser is processing a - /// subslice of the source and there was leading indent. + /// Offsets the indentation on the first line of the source. column_offset: usize, } @@ -47,18 +47,31 @@ impl<'s> Parser<'s> { } } + /// Create a new parser for the source string that is prefixed by some text + /// that does not need to be parsed but taken into account for column + /// calculation. + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { + let mut p = Self::new(src, mode); + p.column_offset = Scanner::new(prefix).column(prefix.len()); + p + } + /// End the parsing process and return the last child. pub fn finish(self) -> Vec { self.children } - /// End the parsing process and return multiple children. - pub fn eject(self) -> Option<(Vec, bool)> { - if self.eof() && self.group_success() { - Some((self.children, self.tokens.was_terminated())) - } else { - None - } + /// End the parsing process and return multiple children and whether the + /// last token was terminated. + pub fn consume(self) -> Option<(Vec, bool)> { + (self.eof() && self.terminated()) + .then(|| (self.children, self.tokens.terminated())) + } + + /// End the parsing process and return multiple children and whether the + /// last token was terminated, even if there remains stuff in the string. + pub fn consume_unterminated(self) -> Option<(Vec, bool)> { + self.terminated().then(|| (self.children, self.tokens.terminated())) } /// Create a new marker. @@ -100,18 +113,6 @@ impl<'s> Parser<'s> { output } - /// End the parsing process and return multiple children, even if there - /// remains stuff in the string. - pub fn eject_partial(self) -> Option<(Vec, bool)> { - self.group_success() - .then(|| (self.children, self.tokens.was_terminated())) - } - - /// Set an indentation offset. - pub fn offset(&mut self, columns: usize) { - self.column_offset = columns; - } - /// Whether the end of the source string or group is reached. pub fn eof(&self) -> bool { self.eof @@ -199,6 +200,14 @@ impl<'s> Parser<'s> { self.tokens.scanner().get(self.current_start() .. self.current_end()) } + /// Obtain a range of the source code. + pub fn get(&self, index: I) -> &'s str + where + I: SliceIndex, + { + self.tokens.scanner().get(index) + } + /// The byte index at which the last non-trivia token ended. pub fn prev_end(&self) -> usize { self.prev_end @@ -216,13 +225,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.column_offset - } - - /// Determine the column index for the given byte index while ignoring the - /// offset. - pub fn clean_column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.tokens.scanner().column_offset(index, self.column_offset) } /// Continue parsing in a group. @@ -260,10 +263,8 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - if let Some(n) = self.last_unterminated { - if n != self.prev_end() { - self.last_unterminated = None; - } + if self.last_unterminated != Some(self.prev_end()) { + self.last_unterminated = None; } let mut rescan = self.tokens.mode() != group_mode; @@ -301,23 +302,15 @@ impl<'s> Parser<'s> { } } - /// Check if the group processing was successfully terminated. - pub fn group_success(&self) -> bool { - self.last_unterminated.is_none() && self.groups.is_empty() + /// Checks if all groups were correctly terminated. + pub fn terminated(&self) -> bool { + self.groups.is_empty() && self.last_unterminated.is_none() } /// Low-level bump that consumes exactly one token without special trivia /// handling. fn bump(&mut self) { let kind = self.current.take().unwrap(); - if match kind { - NodeKind::Space(n) if n > 0 => true, - NodeKind::Parbreak => true, - _ => false, - } { - self.column_offset = 0; - } - let len = self.tokens.index() - self.current_start; self.children.push(GreenData::new(kind, len).into()); self.current_start = self.tokens.index(); diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index c735be407..6db891323 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -162,11 +162,26 @@ impl<'s> Scanner<'s> { /// The column index of a given index in the source string. #[inline] pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() + self.column_offset(index, 0) + } + + /// The column index of a given index in the source string when an offset is + /// applied to the first line of the string. + #[inline] + pub fn column_offset(&self, index: usize, offset: usize) -> usize { + let mut apply_offset = false; + let res = self.src[.. index] + .char_indices() .rev() - .take_while(|&c| !is_newline(c)) - .count() + .take_while(|&(_, c)| !is_newline(c)) + .inspect(|&(i, _)| { + if i == 0 { + apply_offset = true + } + }) + .count(); + + if apply_offset { res + offset } else { res } } } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7dfca2bf4..69c4d2dee 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -13,7 +13,7 @@ use crate::util::EcoString; pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, - was_terminated: bool, + terminated: bool, } /// What kind of tokens to emit. @@ -32,7 +32,7 @@ impl<'s> Tokens<'s> { Self { s: Scanner::new(src), mode, - was_terminated: true, + terminated: true, } } @@ -71,8 +71,8 @@ impl<'s> Tokens<'s> { /// Whether the last token was terminated. #[inline] - pub fn was_terminated(&self) -> bool { - self.was_terminated + pub fn terminated(&self) -> bool { + self.terminated } } @@ -128,9 +128,7 @@ impl<'s> Tokens<'s> { '`' => self.raw(), '$' => self.math(), '-' => self.hyph(), - '=' if self.s.check_or(true, |c| c == '=' || c.is_whitespace()) => { - NodeKind::Eq - } + '=' => NodeKind::Eq, c if c == '.' || c.is_ascii_digit() => self.numbering(start, c), // Plain text. @@ -259,7 +257,7 @@ impl<'s> Tokens<'s> { ) } } else { - self.was_terminated = false; + self.terminated = false; NodeKind::Error( ErrorPos::End, "expected closing brace".into(), @@ -352,7 +350,7 @@ impl<'s> Tokens<'s> { let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; - self.was_terminated = false; + self.terminated = false; NodeKind::Error( ErrorPos::End, if found == 0 { @@ -400,7 +398,7 @@ impl<'s> Tokens<'s> { display, })) } else { - self.was_terminated = false; + self.terminated = false; NodeKind::Error( ErrorPos::End, if !display || (!escaped && dollar) { @@ -489,7 +487,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - self.was_terminated = false; + self.terminated = false; NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -497,7 +495,7 @@ impl<'s> Tokens<'s> { fn line_comment(&mut self) -> NodeKind { self.s.eat_until(is_newline); if self.s.peek().is_none() { - self.was_terminated = false; + self.terminated = false; } NodeKind::LineComment } @@ -505,7 +503,7 @@ impl<'s> Tokens<'s> { fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; - self.was_terminated = false; + self.terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -513,7 +511,7 @@ impl<'s> Tokens<'s> { ('*', '/') => { depth -= 1; if depth == 0 { - self.was_terminated = true; + self.terminated = true; break; } '_' @@ -742,7 +740,7 @@ mod tests { // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():\"b")); t!(Markup[" /"]: ";:,|/+" => Text(";:,|"), Text("/+")); - t!(Markup[" /"]: "=-a" => Text("="), Minus, Text("a")); + t!(Markup[" /"]: "=-a" => Eq, Minus, Text("a")); t!(Markup[" "]: "#123" => Text("#"), Text("123")); // Test text ends. diff --git a/src/source.rs b/src/source.rs index 6cca9f751..7afeaa8a3 100644 --- a/src/source.rs +++ b/src/source.rs @@ -154,9 +154,14 @@ impl SourceFile { &self.root } + /// The root red node of the file's untyped red tree. + pub fn red(&self) -> RedNode { + RedNode::from_root(self.root.clone(), self.id) + } + /// The root node of the file's typed abstract syntax tree. pub fn ast(&self) -> TypResult { - let red = RedNode::from_root(self.root.clone(), self.id); + let red = self.red(); let errors = red.errors(); if errors.is_empty() { Ok(red.cast().unwrap()) @@ -284,14 +289,8 @@ impl SourceFile { self.line_starts .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); - // Update the root node. - let reparser = Reparser::new(&self.src, replace, with.len()); - if let Some(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { - range - } else { - self.root = parse(&self.src); - 0 .. self.src.len() - } + // Incrementally reparse the replaced range. + Reparser::new(&self.src, replace, with.len()).reparse(&mut self.root) } /// Provide highlighting categories for the given range of the source file. diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 388d0bb0c..3a0f3a5e0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -108,7 +108,7 @@ pub struct GreenNode { /// This node's children, losslessly make up this node. children: Vec, /// Whether this node or any of its children are erroneous. - pub erroneous: bool, + erroneous: bool, } impl GreenNode { @@ -139,7 +139,7 @@ impl GreenNode { } /// The node's metadata. - pub fn data(&self) -> &GreenData { + fn data(&self) -> &GreenData { &self.data } @@ -159,41 +159,29 @@ impl GreenNode { } /// Replaces a range of children with some replacement. - /// - /// This method updates the `erroneous` and `data.len` fields. - pub(crate) fn replace_child_range( + pub(crate) fn replace_children( &mut self, - child_idx_range: Range, + range: Range, replacement: Vec, ) { - let old_len: usize = - self.children[child_idx_range.clone()].iter().map(Green::len).sum(); - let new_len: usize = replacement.iter().map(Green::len).sum(); + let superseded = &self.children[range.clone()]; + let superseded_len: usize = superseded.iter().map(Green::len).sum(); + let replacement_len: usize = replacement.iter().map(Green::len).sum(); - if self.erroneous { - if self.children[child_idx_range.clone()].iter().any(Green::erroneous) { - // the old range was erroneous but we do not know if anywhere - // else was so we have to iterate over the whole thing. - self.erroneous = self.children[.. child_idx_range.start] - .iter() - .any(Green::erroneous) - || self.children[child_idx_range.end ..].iter().any(Green::erroneous); - } - // in this case nothing changes so we do not have to bother. - } + // If we're erroneous, but not due to the superseded range, then we will + // still be erroneous after the replacement. + let still_erroneous = self.erroneous && !superseded.iter().any(Green::erroneous); - // the or assignment operator is not lazy. - self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); - - self.children.splice(child_idx_range, replacement); - self.data.len = self.data.len + new_len - old_len; + self.children.splice(range, replacement); + self.data.len = self.data.len + replacement_len - superseded_len; + self.erroneous = still_erroneous || self.children.iter().any(Green::erroneous); } - /// Update the length of this node given the old and new length of a - /// replaced child. - pub(crate) fn update_child_len(&mut self, new_len: usize, old_len: usize) { + /// Update the length of this node given the old and new length of + /// replaced children. + pub(crate) fn update_parent(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; - self.erroneous = self.children.iter().any(|x| x.erroneous()); + self.erroneous = self.children.iter().any(Green::erroneous); } } @@ -255,7 +243,7 @@ impl From for Green { impl Debug for GreenData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", &self.kind, self.len) + write!(f, "{:?}: {}", self.kind, self.len) } } @@ -398,12 +386,13 @@ impl<'a> RedRef<'a> { } } - /// Perform a depth-first search starting at this node. - pub fn all_children(&self) -> Vec { - let mut res = vec![self.clone()]; - res.extend(self.children().flat_map(|child| child.all_children().into_iter())); - - res + /// Returns all leaf descendants of this node (may include itself). + pub fn leafs(self) -> Vec { + if self.is_leaf() { + vec![self] + } else { + self.children().flat_map(Self::leafs).collect() + } } /// Convert the node to a typed AST node. diff --git a/tests/typ/code/let.typ b/tests/typ/code/let.typ index d4765ea5d..a95d651aa 100644 --- a/tests/typ/code/let.typ +++ b/tests/typ/code/let.typ @@ -59,7 +59,6 @@ Three // Error: 18 expected expression // Error: 18 expected closing paren #let v5 = (1, 2 + ; Five - ^^^^^ + \r\n --- // Error: 13 expected body diff --git a/tests/typeset.rs b/tests/typeset.rs index aa3bcf9d5..b1296886a 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -1,6 +1,7 @@ use std::env; use std::ffi::OsStr; use std::fs; +use std::ops::Range; use std::path::Path; use std::rc::Rc; @@ -19,8 +20,8 @@ use typst::image::{Image, RasterImage, Svg}; use typst::library::{PageNode, TextNode}; use typst::loading::FsLoader; use typst::parse::Scanner; -use typst::source::{SourceFile, SourceId}; -use typst::syntax::{RedNode, Span}; +use typst::source::SourceFile; +use typst::syntax::Span; use typst::Context; #[cfg(feature = "layout-cache")] @@ -263,13 +264,12 @@ fn test_part( debug: bool, rng: &mut LinearShift, ) -> (bool, bool, Vec>) { - let mut ok = test_reparse(&src, i, rng); - let id = ctx.sources.provide(src_path, src); let source = ctx.sources.get(id); let (local_compare_ref, mut ref_errors) = parse_metadata(&source); let compare_ref = local_compare_ref.unwrap_or(compare_ref); + let mut ok = test_reparse(ctx.sources.get(id).src(), i, rng); let (frames, mut errors) = match ctx.evaluate(id) { Ok(module) => { @@ -444,43 +444,31 @@ fn test_reparse(src: &str, i: usize, rng: &mut LinearShift) -> bool { } }; - let mut in_range = |range: std::ops::Range| { - let full = rng.next().unwrap() as f64 / u64::MAX as f64; - (range.start as f64 + full * (range.end as f64 - range.start as f64)).floor() - as usize + let mut pick = |range: Range| { + let ratio = rng.next(); + (range.start as f64 + ratio * (range.end - range.start) as f64).floor() as usize }; let insertions = (src.len() as f64 / 400.0).ceil() as usize; for _ in 0 .. insertions { - let supplement = supplements[in_range(0 .. supplements.len())]; - let start = in_range(0 .. src.len()); - let end = in_range(start .. src.len()); + let supplement = supplements[pick(0 .. supplements.len())]; + let start = pick(0 .. src.len()); + let end = pick(start .. src.len()); if !src.is_char_boundary(start) || !src.is_char_boundary(end) { continue; } - if !apply(start .. end, supplement) { - println!("original tree: {:#?}", SourceFile::detached(src).root()); - ok = false; - } + ok &= apply(start .. end, supplement); } - let red = RedNode::from_root( - SourceFile::detached(src).root().clone(), - SourceId::from_raw(0), - ); + let red = SourceFile::detached(src).red(); - let leafs: Vec<_> = red - .as_ref() - .all_children() - .into_iter() - .filter(|red| red.is_leaf()) - .collect(); + let leafs = red.as_ref().leafs(); - let leaf_start = leafs[in_range(0 .. leafs.len())].span().start; - let supplement = supplements[in_range(0 .. supplements.len())]; + let leaf_start = leafs[pick(0 .. leafs.len())].span().start; + let supplement = supplements[pick(0 .. supplements.len())]; ok &= apply(leaf_start .. leaf_start, supplement); @@ -954,23 +942,14 @@ impl LinearShift { pub fn new() -> Self { Self(0xACE5) } -} -impl Iterator for LinearShift { - type Item = u64; - - /// Apply the shift. - fn next(&mut self) -> Option { + /// Return a pseudo-random number between `0.0` and `1.0`. + pub fn next(&mut self) -> f64 { self.0 ^= self.0 >> 3; self.0 ^= self.0 << 14; self.0 ^= self.0 >> 28; self.0 ^= self.0 << 36; self.0 ^= self.0 >> 52; - Some(self.0) - } - - /// The iterator is endless but will repeat eventually. - fn size_hint(&self) -> (usize, Option) { - (usize::MAX, None) + self.0 as f64 / u64::MAX as f64 } }