diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 1d70e4664..5dbf993fe 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -55,7 +55,7 @@ fn bench_scan(iai: &mut Iai) { } fn bench_tokenize(iai: &mut Iai) { - iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup), 0).count()); + iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()); } fn bench_parse(iai: &mut Iai) { diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index d3edff6e2..4736845f5 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -4,19 +4,10 @@ use std::sync::Arc; use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ - is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode, + is_newline, parse, reparse_block, reparse_markup_elements, reparse_template, + TokenMode, }; -type ReparseFunc = fn( - &str, - &str, - usize, - isize, - &[Green], - bool, - usize, -) -> Option<(Vec, bool, usize)>; - /// Allows partial refreshs of the [`Green`] node tree. /// /// This struct holds a description of a change. Its methods can be used to try @@ -55,16 +46,12 @@ impl Reparser<'_> { let child_mode = green.kind().mode().unwrap_or(TokenMode::Code); let original_count = green.children().len(); - // Save the current indent if this is a markup node. - let indent = match green.kind() { - NodeKind::Markup(n) => *n, - _ => 0, - }; - let mut search = SearchState::default(); let mut ahead_nontrivia = None; + // Whether the first node that should be replaced is at start. let mut at_start = true; + // Whether the last searched child is the outermost child. let mut child_outermost = false; // Find the the first child in the range of children to reparse. @@ -83,18 +70,17 @@ impl Reparser<'_> { search = if child_span.end == self.replace_range.end && child_mode == TokenMode::Markup { - SearchState::RequireNonWS(pos) + SearchState::RequireNonTrivia(pos) } else { SearchState::Contained(pos) }; } else if child_span.contains(&self.replace_range.start) { search = SearchState::Inside(pos); } else { - if (self.replace_range.len() != 0 - || self.replace_range.end != child_span.end - || ahead_nontrivia.is_none()) - && (!child.kind().is_space() - && child.kind() != &NodeKind::Semicolon) + if (!child.kind().is_space() + && child.kind() != &NodeKind::Semicolon) + && (ahead_nontrivia.is_none() + || self.replace_range.start > child_span.end) { ahead_nontrivia = Some((pos, at_start)); } @@ -103,12 +89,12 @@ impl Reparser<'_> { } SearchState::Inside(start) => { if child_span.end == self.replace_range.end { - search = SearchState::RequireNonWS(start); + search = SearchState::RequireNonTrivia(start); } else if child_span.end > self.replace_range.end { search = SearchState::SpanFound(start, pos); } } - SearchState::RequireNonWS(start) => { + SearchState::RequireNonTrivia(start) => { if !child.kind().is_trivia() { search = SearchState::SpanFound(start, pos); } @@ -118,11 +104,21 @@ impl Reparser<'_> { offset += child.len(); child_outermost = outermost && i + 1 == original_count; - if search.end().is_some() { + + if search.done().is_some() { break; } } + // If we were looking for a non-whitespace element and hit the end of + // the file here, we instead use EOF as the end of the span. + if let SearchState::RequireNonTrivia(start) = search { + search = SearchState::SpanFound(start, GreenPos { + idx: green.children().len() - 1, + offset: offset - green.children().last().unwrap().len(), + }) + } + if let SearchState::Contained(pos) = search { let child = &mut green.children_mut()[pos.idx]; let prev_len = child.len(); @@ -139,20 +135,20 @@ impl Reparser<'_> { } let superseded_span = pos.offset .. pos.offset + prev_len; - let func: Option = match child.kind() { - NodeKind::Template => Some(parse_template), - NodeKind::Block => Some(parse_block), + let func: Option = match child.kind() { + NodeKind::Template => Some(ReparseMode::Template), + NodeKind::Block => Some(ReparseMode::Block), _ => None, }; + // Return if the element was reparsable on its own, otherwise try to + // treat it as a markup element. if let Some(func) = func { if let Some(result) = self.replace( green, func, pos.idx .. pos.idx + 1, superseded_span, - at_start, - indent, outermost, ) { return Some(result); @@ -160,11 +156,13 @@ impl Reparser<'_> { } } - if !matches!(green.kind(), NodeKind::Markup(_)) { - return None; - } + // Save the current indent if this is a markup node and stop otherwise. + let indent = match green.kind() { + NodeKind::Markup(n) => *n, + _ => return None, + }; - let (mut start, end) = search.end()?; + let (mut start, end) = search.done()?; if let Some((ahead, ahead_at_start)) = ahead_nontrivia { let ahead_kind = green.children()[ahead.idx].kind(); @@ -179,13 +177,12 @@ impl Reparser<'_> { let superseded_span = start.offset .. end.offset + green.children()[end.idx].len(); + self.replace( green, - parse_markup_elements, + ReparseMode::MarkupElements(at_start, indent), start.idx .. end.idx + 1, superseded_span, - at_start, - indent, outermost, ) } @@ -193,19 +190,17 @@ impl Reparser<'_> { fn replace( &self, green: &mut GreenNode, - func: ReparseFunc, + mode: ReparseMode, superseded_idx: Range, superseded_span: Range, - at_start: bool, - indent: usize, outermost: bool, ) -> Option> { + let superseded_start = superseded_idx.start; + let differential: isize = self.replace_len as isize - self.replace_range.len() as isize; - let newborn_span = superseded_span.start - .. - (superseded_span.end as isize + differential) as usize; - let superseded_start = superseded_idx.start; + let newborn_end = (superseded_span.end as isize + differential) as usize; + let newborn_span = superseded_span.start .. newborn_end; let mut prefix = ""; for (i, c) in self.src[.. newborn_span.start].char_indices().rev() { @@ -215,15 +210,27 @@ impl Reparser<'_> { prefix = &self.src[i .. newborn_span.start]; } - let (newborns, terminated, amount) = func( - &prefix, - &self.src[newborn_span.start ..], - newborn_span.len(), - differential, - &green.children()[superseded_start ..], - at_start, - indent, - )?; + let (newborns, terminated, amount) = match mode { + ReparseMode::Block => reparse_block( + &prefix, + &self.src[newborn_span.start ..], + newborn_span.len(), + ), + ReparseMode::Template => reparse_template( + &prefix, + &self.src[newborn_span.start ..], + newborn_span.len(), + ), + ReparseMode::MarkupElements(at_start, indent) => reparse_markup_elements( + &prefix, + &self.src[newborn_span.start ..], + newborn_span.len(), + differential, + &green.children()[superseded_start ..], + at_start, + indent, + ), + }?; // Do not accept unclosed nodes if the old node wasn't at the right edge // of the tree. @@ -236,6 +243,8 @@ impl Reparser<'_> { } } +/// The position of a green node in terms of its string offset and index within +/// the parent node. #[derive(Clone, Copy, Debug, PartialEq)] struct GreenPos { idx: usize, @@ -256,7 +265,7 @@ enum SearchState { Inside(GreenPos), /// The search has found the end of the modified nodes but the change /// touched its boundries so another non-trivia node is needed. - RequireNonWS(GreenPos), + RequireNonTrivia(GreenPos), /// The search has concluded by finding a start and an end index for nodes /// with a pending reparse. SpanFound(GreenPos, GreenPos), @@ -269,17 +278,29 @@ impl Default for SearchState { } impl SearchState { - fn end(&self) -> Option<(GreenPos, GreenPos)> { + fn done(self) -> Option<(GreenPos, GreenPos)> { match self { Self::NoneFound => None, - Self::Contained(s) => Some((*s, *s)), + Self::Contained(s) => Some((s, s)), Self::Inside(_) => None, - Self::RequireNonWS(_) => None, - Self::SpanFound(s, e) => Some((*s, *e)), + Self::RequireNonTrivia(_) => None, + Self::SpanFound(s, e) => Some((s, e)), } } } +/// Which reparse function to choose for a span of elements. +#[derive(Clone, Copy, Debug, PartialEq)] +enum ReparseMode { + /// Reparse a code block with its braces. + Block, + /// Reparse a template, including its square brackets. + Template, + /// Reparse elements of the markup. The variant carries whether the node is + /// `at_start` and the minimum indent of the containing markup node. + MarkupElements(bool, usize), +} + impl NodeKind { /// Whether this node has to appear at the start of a line. pub fn only_at_start(&self) -> bool { @@ -330,7 +351,7 @@ mod tests { test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33); test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 0 .. 33); - test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11); + test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11); test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25); test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); @@ -344,8 +365,8 @@ mod tests { #[test] fn test_parse_incremental_whitespace_invariants() { - test("hello \\ world", 7 .. 8, "a ", 6 .. 14); - test("hello \\ world", 7 .. 8, " a", 6 .. 14); + test("hello \\ world", 7 .. 8, "a ", 0 .. 14); + test("hello \\ world", 7 .. 8, " a", 0 .. 14); test("x = y", 1 .. 1, " + y", 0 .. 6); test("x = y", 1 .. 1, " + y\n", 0 .. 7); test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21); @@ -353,13 +374,13 @@ mod tests { test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23); test("hey #myfriend", 4 .. 4, "\\", 0 .. 14); test("hey #myfriend", 4 .. 4, "\\", 3 .. 6); - test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11); - test("= foo\n bar\n baz", 6..8, "", 0..15); + test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11); + test("= foo\n bar\n baz", 6 .. 8, "", 0 .. 15); } #[test] fn test_parse_incremental_type_invariants() { - test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22); + test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22); test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11); test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16); test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c08c5d6f1..11ce872f8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -30,7 +30,7 @@ pub fn parse(src: &str) -> Arc { /// Parse some markup without the topmost node. Returns `Some` if all of the /// input was consumed. -pub fn parse_markup_elements( +pub fn reparse_markup_elements( prefix: &str, src: &str, end_pos: usize, @@ -43,11 +43,11 @@ pub fn parse_markup_elements( let mut node: Option<&Green> = None; let mut iter = reference.iter(); - let mut offset = 0; + let mut offset = differential; let mut replaced = 0; let mut stopped = false; - while !p.eof() { + 'outer: while !p.eof() { if let Some(NodeKind::Space(1 ..)) = p.peek() { if p.column(p.current_end()) < column { return None; @@ -56,44 +56,36 @@ pub fn parse_markup_elements( markup_node(&mut p, &mut at_start); - if p.prev_end() >= end_pos { - let recent = p.children.last().unwrap(); - let recent_start = p.prev_end() - recent.len(); + if p.prev_end() < end_pos { + continue; + } - while offset <= recent_start { - if let Some(node) = node { - // The nodes are equal, at the same position and have the - // same content. The parsing trees have converged again, so - // the reparse may stop here. - if (offset as isize + differential) as usize == recent_start - && node == recent - { - replaced -= 1; - stopped = true; - break; - } - } + let recent = p.children.last().unwrap(); + let recent_start = p.prev_end() - recent.len(); - let result = iter.next(); - if let Some(node) = node { - offset += node.len(); - } - node = result; - if node.is_none() { - break; - } else { - replaced += 1; + while offset <= recent_start as isize { + if let Some(node) = node { + // The nodes are equal, at the same position and have the + // same content. The parsing trees have converged again, so + // the reparse may stop here. + if offset == recent_start as isize && node == recent { + replaced -= 1; + stopped = true; + break 'outer; } } - if stopped { + if let Some(node) = node { + offset += node.len() as isize; + } + + node = iter.next(); + if node.is_none() { break; } - } - } - if p.prev_end() < end_pos { - return None; + replaced += 1; + } } if p.eof() && !stopped { @@ -109,14 +101,10 @@ pub fn parse_markup_elements( } /// Parse a template literal. Returns `Some` if all of the input was consumed. -pub fn parse_template( +pub fn reparse_template( prefix: &str, src: &str, end_pos: usize, - _: isize, - _: &[Green], - _: bool, - _: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBracket) { @@ -135,14 +123,10 @@ pub fn parse_template( } /// Parse a code block. Returns `Some` if all of the input was consumed. -pub fn parse_block( +pub fn reparse_block( prefix: &str, src: &str, end_pos: usize, - _: isize, - _: &[Green], - _: bool, - _: usize, ) -> Option<(Vec, bool, usize)> { let mut p = Parser::with_prefix(prefix, src, TokenMode::Code); if !p.at(&NodeKind::LeftBrace) { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8588e5862..123871a58 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -2,7 +2,7 @@ use core::slice::SliceIndex; use std::fmt::{self, Display, Formatter}; use std::mem; -use super::{Scanner, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind}; /// A convenient token-based parser. @@ -30,11 +30,14 @@ pub struct Parser<'s> { impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self::with_offset(src, mode, 0) + Self::with_prefix("", src, mode) } - fn with_offset(src: &'s str, mode: TokenMode, offset: usize) -> Self { - let mut tokens = Tokens::new(src, mode, offset); + /// Create a new parser for the source string that is prefixed by some text + /// that does not need to be parsed but taken into account for column + /// calculation. + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { + let mut tokens = Tokens::with_prefix(prefix, src, mode); let current = tokens.next(); Self { tokens, @@ -49,13 +52,6 @@ impl<'s> Parser<'s> { } } - /// Create a new parser for the source string that is prefixed by some text - /// that does not need to be parsed but taken into account for column - /// calculation. - pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { - Self::with_offset(src, mode, Scanner::new(prefix).column(prefix.len())) - } - /// End the parsing process and return the last child. pub fn finish(self) -> Vec { self.children @@ -218,7 +214,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.tokens.scanner().column(index) + self.tokens.column(index) } /// Continue parsing in a group. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 15060c7b8..e4cf56e97 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -10,21 +10,13 @@ pub struct Scanner<'s> { /// The index at which the peekable character starts. Must be in bounds and /// at a codepoint boundary to guarantee safety. index: usize, - /// Offsets the indentation on the first line of the source. - column_offset: usize, } impl<'s> Scanner<'s> { /// Create a new char scanner. #[inline] pub fn new(src: &'s str) -> Self { - Self { src, index: 0, column_offset: 0 } - } - - /// Create a new char scanner with an offset for the first line indent. - #[inline] - pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self { - Self { src, index: 0, column_offset } + Self { src, index: 0 } } /// Whether the end of the string is reached. @@ -177,30 +169,6 @@ impl<'s> Scanner<'s> { // optimized away in some cases. self.src.get(start .. self.index).unwrap_or_default() } - - /// The column index of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - let mut apply_offset = false; - let res = self.src[.. index] - .char_indices() - .rev() - .take_while(|&(_, c)| !is_newline(c)) - .inspect(|&(i, _)| { - if i == 0 { - apply_offset = true - } - }) - .count(); - - // The loop is never executed if the slice is empty, but we are of - // course still at the start of the first line. - if self.src[.. index].len() == 0 { - apply_offset = true; - } - - if apply_offset { res + self.column_offset } else { res } - } } /// Whether this character denotes a newline. diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 4a13694a5..91bbf9e84 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -11,9 +11,14 @@ use crate::util::EcoString; /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { + /// The underlying scanner. s: Scanner<'s>, + /// The mode the scanner is in. This determines what tokens it recognizes. mode: TokenMode, + /// Whether the last token has been terminated. terminated: bool, + /// Offsets the indentation on the first line of the source. + column_offset: usize, } /// What kind of tokens to emit. @@ -28,11 +33,19 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. #[inline] - pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self { + pub fn new(src: &'s str, mode: TokenMode) -> Self { + Self::with_prefix("", src, mode) + } + + /// Create a new token iterator with the given mode and a prefix to offset + /// column calculations. + #[inline] + pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self { Self { - s: Scanner::with_indent_offset(src, offset), + s: Scanner::new(src), mode, terminated: true, + column_offset: column(prefix, prefix.len(), 0), } } @@ -74,6 +87,12 @@ impl<'s> Tokens<'s> { pub fn terminated(&self) -> bool { self.terminated } + + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + column(self.s.src(), index, self.column_offset) + } } impl<'s> Iterator for Tokens<'s> { @@ -321,7 +340,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.s.column(self.s.index() - 1); + let column = self.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') { @@ -574,6 +593,30 @@ fn keyword(ident: &str) -> Option { }) } +/// The column index of a given index in the source string, given a column offset for the first line. +#[inline] +fn column(string: &str, index: usize, offset: usize) -> usize { + let mut apply_offset = false; + let res = string[.. index] + .char_indices() + .rev() + .take_while(|&(_, c)| !is_newline(c)) + .inspect(|&(i, _)| { + if i == 0 { + apply_offset = true + } + }) + .count(); + + // The loop is never executed if the slice is empty, but we are of + // course still at the start of the first line. + if index == 0 { + apply_offset = true; + } + + if apply_offset { res + offset } else { res } +} + #[cfg(test)] #[allow(non_snake_case)] mod tests { @@ -689,7 +732,7 @@ mod tests { }}; (@$mode:ident: $src:expr => $($token:expr),*) => {{ let src = $src; - let found = Tokens::new(&src, $mode, 0).collect::>(); + let found = Tokens::new(&src, $mode).collect::>(); let expected = vec![$($token.clone()),*]; check(&src, found, expected); }}; diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 7992f9de9..10e5ec70d 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -60,7 +60,7 @@ impl Markup { /// The markup nodes. pub fn nodes(&self) -> impl Iterator + '_ { self.0.children().filter_map(|node| match node.kind() { - NodeKind::Space(n) if *n > 1 => Some(MarkupNode::Parbreak), + NodeKind::Space(2 ..) => Some(MarkupNode::Parbreak), NodeKind::Space(_) => Some(MarkupNode::Space), NodeKind::Linebreak => Some(MarkupNode::Linebreak), NodeKind::Text(s) | NodeKind::TextInLine(s) => { diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index fc98bb34c..85f2013c4 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -757,7 +757,7 @@ impl NodeKind { /// Whether this node is `at_start` given the previous value of the property. pub fn is_at_start(&self, prev: bool) -> bool { match self { - Self::Space(n) if *n > 0 => true, + Self::Space(1 ..) => true, Self::Space(_) | Self::LineComment | Self::BlockComment => prev, _ => false, } @@ -858,7 +858,7 @@ impl NodeKind { Self::Include => "keyword `include`", Self::From => "keyword `from`", Self::Markup(_) => "markup", - Self::Space(n) if *n > 1 => "paragraph break", + Self::Space(2 ..) => "paragraph break", Self::Space(_) => "space", Self::Linebreak => "forced linebreak", Self::Text(_) | Self::TextInLine(_) => "text",