Code Review: That's just like your struct, man.

2025-07-12 07:02:53 +08:00 · 2022-02-23 20:06:48 +01:00 · 2022-02-23 20:06:48 +01:00 · 9fda623b02
commit 9fda623b02
parent 4c8634c600
8 changed files with 173 additions and 161 deletions
--- a/benches/oneshot.rs
+++ b/benches/oneshot.rs
@ -55,7 +55,7 @@ fn bench_scan(iai: &mut Iai) {
 }
 fn bench_tokenize(iai: &mut Iai) {
-    iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup), 0).count());
+    iai.run(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count());
 }
 fn bench_parse(iai: &mut Iai) {
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@ -4,19 +4,10 @@ use std::sync::Arc;
 use crate::syntax::{Green, GreenNode, NodeKind};
 use super::{
-    is_newline, parse, parse_block, parse_markup_elements, parse_template, TokenMode,
+    is_newline, parse, reparse_block, reparse_markup_elements, reparse_template,
    TokenMode,
 };
 type ReparseFunc = fn(
    &str,
    &str,
    usize,
    isize,
    &[Green],
    bool,
    usize,
 ) -> Option<(Vec<Green>, bool, usize)>;
 /// Allows partial refreshs of the [`Green`] node tree.
 ///
 /// This struct holds a description of a change. Its methods can be used to try
@ -55,16 +46,12 @@ impl Reparser<'_> {
        let child_mode = green.kind().mode().unwrap_or(TokenMode::Code);
        let original_count = green.children().len();
        // Save the current indent if this is a markup node.
        let indent = match green.kind() {
            NodeKind::Markup(n) => *n,
            _ => 0,
        };
        let mut search = SearchState::default();
        let mut ahead_nontrivia = None;
        // Whether the first node that should be replaced is at start.
        let mut at_start = true;
        // Whether the last searched child is the outermost child.
        let mut child_outermost = false;
        // Find the the first child in the range of children to reparse.
@ -83,18 +70,17 @@ impl Reparser<'_> {
                        search = if child_span.end == self.replace_range.end
                            && child_mode == TokenMode::Markup
                        {
-                            SearchState::RequireNonWS(pos)
+                            SearchState::RequireNonTrivia(pos)
                        } else {
                            SearchState::Contained(pos)
                        };
                    } else if child_span.contains(&self.replace_range.start) {
                        search = SearchState::Inside(pos);
                    } else {
-                        if (self.replace_range.len() != 0
+                        if (!child.kind().is_space()
                            || self.replace_range.end != child_span.end
                            || ahead_nontrivia.is_none())
                            && (!child.kind().is_space()
                            && child.kind() != &NodeKind::Semicolon)
                            && (ahead_nontrivia.is_none()
                                || self.replace_range.start > child_span.end)
                        {
                            ahead_nontrivia = Some((pos, at_start));
                        }
@ -103,12 +89,12 @@ impl Reparser<'_> {
                }
                SearchState::Inside(start) => {
                    if child_span.end == self.replace_range.end {
-                        search = SearchState::RequireNonWS(start);
+                        search = SearchState::RequireNonTrivia(start);
                    } else if child_span.end > self.replace_range.end {
                        search = SearchState::SpanFound(start, pos);
                    }
                }
-                SearchState::RequireNonWS(start) => {
+                SearchState::RequireNonTrivia(start) => {
                    if !child.kind().is_trivia() {
                        search = SearchState::SpanFound(start, pos);
                    }
@ -118,11 +104,21 @@ impl Reparser<'_> {
            offset += child.len();
            child_outermost = outermost && i + 1 == original_count;
-            if search.end().is_some() {
+
            if search.done().is_some() {
                break;
            }
        }
        // If we were looking for a non-whitespace element and hit the end of
        // the file here, we instead use EOF as the end of the span.
        if let SearchState::RequireNonTrivia(start) = search {
            search = SearchState::SpanFound(start, GreenPos {
                idx: green.children().len() - 1,
                offset: offset - green.children().last().unwrap().len(),
            })
        }
        if let SearchState::Contained(pos) = search {
            let child = &mut green.children_mut()[pos.idx];
            let prev_len = child.len();
@ -139,20 +135,20 @@ impl Reparser<'_> {
            }
            let superseded_span = pos.offset .. pos.offset + prev_len;
-            let func: Option<ReparseFunc> = match child.kind() {
+            let func: Option<ReparseMode> = match child.kind() {
-                NodeKind::Template => Some(parse_template),
+                NodeKind::Template => Some(ReparseMode::Template),
-                NodeKind::Block => Some(parse_block),
+                NodeKind::Block => Some(ReparseMode::Block),
                _ => None,
            };
            // Return if the element was reparsable on its own, otherwise try to
            // treat it as a markup element.
            if let Some(func) = func {
                if let Some(result) = self.replace(
                    green,
                    func,
                    pos.idx .. pos.idx + 1,
                    superseded_span,
                    at_start,
                    indent,
                    outermost,
                ) {
                    return Some(result);
@ -160,11 +156,13 @@ impl Reparser<'_> {
            }
        }
-        if !matches!(green.kind(), NodeKind::Markup(_)) {
+        // Save the current indent if this is a markup node and stop otherwise.
-            return None;
+        let indent = match green.kind() {
-        }
+            NodeKind::Markup(n) => *n,
            _ => return None,
        };
-        let (mut start, end) = search.end()?;
+        let (mut start, end) = search.done()?;
        if let Some((ahead, ahead_at_start)) = ahead_nontrivia {
            let ahead_kind = green.children()[ahead.idx].kind();
@ -179,13 +177,12 @@ impl Reparser<'_> {
        let superseded_span =
            start.offset .. end.offset + green.children()[end.idx].len();
        self.replace(
            green,
-            parse_markup_elements,
+            ReparseMode::MarkupElements(at_start, indent),
            start.idx .. end.idx + 1,
            superseded_span,
            at_start,
            indent,
            outermost,
        )
    }
@ -193,19 +190,17 @@ impl Reparser<'_> {
    fn replace(
        &self,
        green: &mut GreenNode,
-        func: ReparseFunc,
+        mode: ReparseMode,
        superseded_idx: Range<usize>,
        superseded_span: Range<usize>,
        at_start: bool,
        indent: usize,
        outermost: bool,
    ) -> Option<Range<usize>> {
        let superseded_start = superseded_idx.start;
        let differential: isize =
            self.replace_len as isize - self.replace_range.len() as isize;
-        let newborn_span = superseded_span.start
+        let newborn_end = (superseded_span.end as isize + differential) as usize;
-            ..
+        let newborn_span = superseded_span.start .. newborn_end;
            (superseded_span.end as isize + differential) as usize;
        let superseded_start = superseded_idx.start;
        let mut prefix = "";
        for (i, c) in self.src[.. newborn_span.start].char_indices().rev() {
@ -215,7 +210,18 @@ impl Reparser<'_> {
            prefix = &self.src[i .. newborn_span.start];
        }
-        let (newborns, terminated, amount) = func(
+        let (newborns, terminated, amount) = match mode {
            ReparseMode::Block => reparse_block(
                &prefix,
                &self.src[newborn_span.start ..],
                newborn_span.len(),
            ),
            ReparseMode::Template => reparse_template(
                &prefix,
                &self.src[newborn_span.start ..],
                newborn_span.len(),
            ),
            ReparseMode::MarkupElements(at_start, indent) => reparse_markup_elements(
                &prefix,
                &self.src[newborn_span.start ..],
                newborn_span.len(),
@ -223,7 +229,8 @@ impl Reparser<'_> {
                &green.children()[superseded_start ..],
                at_start,
                indent,
-        )?;
+            ),
        }?;
        // Do not accept unclosed nodes if the old node wasn't at the right edge
        // of the tree.
@ -236,6 +243,8 @@ impl Reparser<'_> {
    }
 }
 /// The position of a green node in terms of its string offset and index within
 /// the parent node.
 #[derive(Clone, Copy, Debug, PartialEq)]
 struct GreenPos {
    idx: usize,
@ -256,7 +265,7 @@ enum SearchState {
    Inside(GreenPos),
    /// The search has found the end of the modified nodes but the change
    /// touched its boundries so another non-trivia node is needed.
-    RequireNonWS(GreenPos),
+    RequireNonTrivia(GreenPos),
    /// The search has concluded by finding a start and an end index for nodes
    /// with a pending reparse.
    SpanFound(GreenPos, GreenPos),
@ -269,17 +278,29 @@ impl Default for SearchState {
 }
 impl SearchState {
-    fn end(&self) -> Option<(GreenPos, GreenPos)> {
+    fn done(self) -> Option<(GreenPos, GreenPos)> {
        match self {
            Self::NoneFound => None,
-            Self::Contained(s) => Some((*s, *s)),
+            Self::Contained(s) => Some((s, s)),
            Self::Inside(_) => None,
-            Self::RequireNonWS(_) => None,
+            Self::RequireNonTrivia(_) => None,
-            Self::SpanFound(s, e) => Some((*s, *e)),
+            Self::SpanFound(s, e) => Some((s, e)),
        }
    }
 }
 /// Which reparse function to choose for a span of elements.
 #[derive(Clone, Copy, Debug, PartialEq)]
 enum ReparseMode {
    /// Reparse a code block with its braces.
    Block,
    /// Reparse a template, including its square brackets.
    Template,
    /// Reparse elements of the markup. The variant carries whether the node is
    /// `at_start` and the minimum indent of the containing markup node.
    MarkupElements(bool, usize),
 }
 impl NodeKind {
    /// Whether this node has to appear at the start of a line.
    pub fn only_at_start(&self) -> bool {
@ -330,7 +351,7 @@ mod tests {
        test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 33 .. 40);
        test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 0 .. 33);
        test("{let i=1; for x in range(5) {i}}", 13 .. 14, "  ", 0 .. 33);
-        test("hello~~{x}", 7 .. 10, "#f()", 0 .. 11);
+        test("hello~~{x}", 7 .. 10, "#f()", 5 .. 11);
        test("this~is -- in my opinion -- spectacular", 8 .. 10, "---", 5 .. 25);
        test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22);
        test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17);
@ -344,8 +365,8 @@ mod tests {
    #[test]
    fn test_parse_incremental_whitespace_invariants() {
-        test("hello \\ world", 7 .. 8, "a ", 6 .. 14);
+        test("hello \\ world", 7 .. 8, "a ", 0 .. 14);
-        test("hello \\ world", 7 .. 8, " a", 6 .. 14);
+        test("hello \\ world", 7 .. 8, " a", 0 .. 14);
        test("x = y", 1 .. 1, " + y", 0 .. 6);
        test("x = y", 1 .. 1, " + y\n", 0 .. 7);
        test("abc\n= a heading\njoke", 3 .. 4, "\nmore\n\n", 0 .. 21);
@ -353,13 +374,13 @@ mod tests {
        test("#let x = (1, 2 + ;~ Five\r\n\r", 20 .. 23, "2.", 18 .. 23);
        test("hey #myfriend", 4 .. 4, "\\", 0 .. 14);
        test("hey  #myfriend", 4 .. 4, "\\", 3 .. 6);
-        test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0..11);
+        test("= foo\nbar\n - a\n - b", 6 .. 9, "", 0 .. 11);
-        test("= foo\n  bar\n  baz", 6..8, "", 0..15);
+        test("= foo\n  bar\n  baz", 6 .. 8, "", 0 .. 15);
    }
    #[test]
    fn test_parse_incremental_type_invariants() {
-        test("a #for x in array {x}", 18 .. 21, "[#x]", 0 .. 22);
+        test("a #for x in array {x}", 18 .. 21, "[#x]", 2 .. 22);
        test("a #let x = 1 {5}", 3 .. 6, "if", 2 .. 11);
        test("a {let x = 1 {5}} b", 3 .. 6, "if", 2 .. 16);
        test("#let x = 1 {5}", 4 .. 4, " if", 0 .. 13);
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -30,7 +30,7 @@ pub fn parse(src: &str) -> Arc<GreenNode> {
 /// Parse some markup without the topmost node. Returns `Some` if all of the
 /// input was consumed.
-pub fn parse_markup_elements(
+pub fn reparse_markup_elements(
    prefix: &str,
    src: &str,
    end_pos: usize,
@ -43,11 +43,11 @@ pub fn parse_markup_elements(
    let mut node: Option<&Green> = None;
    let mut iter = reference.iter();
-    let mut offset = 0;
+    let mut offset = differential;
    let mut replaced = 0;
    let mut stopped = false;
-    while !p.eof() {
+    'outer: while !p.eof() {
        if let Some(NodeKind::Space(1 ..)) = p.peek() {
            if p.column(p.current_end()) < column {
                return None;
@ -56,46 +56,38 @@ pub fn parse_markup_elements(
        markup_node(&mut p, &mut at_start);
-        if p.prev_end() >= end_pos {
+        if p.prev_end() < end_pos {
            continue;
        }
        let recent = p.children.last().unwrap();
        let recent_start = p.prev_end() - recent.len();
-            while offset <= recent_start {
+        while offset <= recent_start as isize {
            if let Some(node) = node {
                // The nodes are equal, at the same position and have the
                // same content. The parsing trees have converged again, so
                // the reparse may stop here.
-                    if (offset as isize + differential) as usize == recent_start
+                if offset == recent_start as isize && node == recent {
                        && node == recent
                    {
                    replaced -= 1;
                    stopped = true;
-                        break;
+                    break 'outer;
                }
            }
                let result = iter.next();
            if let Some(node) = node {
-                    offset += node.len();
+                offset += node.len() as isize;
            }
-                node = result;
+
            node = iter.next();
            if node.is_none() {
                break;
-                } else {
+            }
            replaced += 1;
        }
    }
            if stopped {
                break;
            }
        }
    }
    if p.prev_end() < end_pos {
        return None;
    }
    if p.eof() && !stopped {
        replaced = reference.len();
    }
@ -109,14 +101,10 @@ pub fn parse_markup_elements(
 }
 /// Parse a template literal. Returns `Some` if all of the input was consumed.
-pub fn parse_template(
+pub fn reparse_template(
    prefix: &str,
    src: &str,
    end_pos: usize,
    _: isize,
    _: &[Green],
    _: bool,
    _: usize,
 ) -> Option<(Vec<Green>, bool, usize)> {
    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
    if !p.at(&NodeKind::LeftBracket) {
@ -135,14 +123,10 @@ pub fn parse_template(
 }
 /// Parse a code block. Returns `Some` if all of the input was consumed.
-pub fn parse_block(
+pub fn reparse_block(
    prefix: &str,
    src: &str,
    end_pos: usize,
    _: isize,
    _: &[Green],
    _: bool,
    _: usize,
 ) -> Option<(Vec<Green>, bool, usize)> {
    let mut p = Parser::with_prefix(prefix, src, TokenMode::Code);
    if !p.at(&NodeKind::LeftBrace) {
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@ -2,7 +2,7 @@ use core::slice::SliceIndex;
 use std::fmt::{self, Display, Formatter};
 use std::mem;
-use super::{Scanner, TokenMode, Tokens};
+use super::{TokenMode, Tokens};
 use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
 /// A convenient token-based parser.
@ -30,11 +30,14 @@ pub struct Parser<'s> {
 impl<'s> Parser<'s> {
    /// Create a new parser for the source string.
    pub fn new(src: &'s str, mode: TokenMode) -> Self {
-        Self::with_offset(src, mode, 0)
+        Self::with_prefix("", src, mode)
    }
-    fn with_offset(src: &'s str, mode: TokenMode, offset: usize) -> Self {
+    /// Create a new parser for the source string that is prefixed by some text
-        let mut tokens = Tokens::new(src, mode, offset);
+    /// that does not need to be parsed but taken into account for column
    /// calculation.
    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
        let mut tokens = Tokens::with_prefix(prefix, src, mode);
        let current = tokens.next();
        Self {
            tokens,
@ -49,13 +52,6 @@ impl<'s> Parser<'s> {
        }
    }
    /// Create a new parser for the source string that is prefixed by some text
    /// that does not need to be parsed but taken into account for column
    /// calculation.
    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
        Self::with_offset(src, mode, Scanner::new(prefix).column(prefix.len()))
    }
    /// End the parsing process and return the last child.
    pub fn finish(self) -> Vec<Green> {
        self.children
@ -218,7 +214,7 @@ impl<'s> Parser<'s> {
    /// Determine the column index for the given byte index.
    pub fn column(&self, index: usize) -> usize {
-        self.tokens.scanner().column(index)
+        self.tokens.column(index)
    }
    /// Continue parsing in a group.
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@ -10,21 +10,13 @@ pub struct Scanner<'s> {
    /// The index at which the peekable character starts. Must be in bounds and
    /// at a codepoint boundary to guarantee safety.
    index: usize,
    /// Offsets the indentation on the first line of the source.
    column_offset: usize,
 }
 impl<'s> Scanner<'s> {
    /// Create a new char scanner.
    #[inline]
    pub fn new(src: &'s str) -> Self {
-        Self { src, index: 0, column_offset: 0 }
+        Self { src, index: 0 }
    }
    /// Create a new char scanner with an offset for the first line indent.
    #[inline]
    pub fn with_indent_offset(src: &'s str, column_offset: usize) -> Self {
        Self { src, index: 0, column_offset }
    }
    /// Whether the end of the string is reached.
@ -177,30 +169,6 @@ impl<'s> Scanner<'s> {
        // optimized away in some cases.
        self.src.get(start .. self.index).unwrap_or_default()
    }
    /// The column index of a given index in the source string.
    #[inline]
    pub fn column(&self, index: usize) -> usize {
        let mut apply_offset = false;
        let res = self.src[.. index]
            .char_indices()
            .rev()
            .take_while(|&(_, c)| !is_newline(c))
            .inspect(|&(i, _)| {
                if i == 0 {
                    apply_offset = true
                }
            })
            .count();
        // The loop is never executed if the slice is empty, but we are of
        // course still at the start of the first line.
        if self.src[.. index].len() == 0 {
            apply_offset = true;
        }
        if apply_offset { res + self.column_offset } else { res }
    }
 }
 /// Whether this character denotes a newline.
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@ -11,9 +11,14 @@ use crate::util::EcoString;
 /// An iterator over the tokens of a string of source code.
 pub struct Tokens<'s> {
    /// The underlying scanner.
    s: Scanner<'s>,
    /// The mode the scanner is in. This determines what tokens it recognizes.
    mode: TokenMode,
    /// Whether the last token has been terminated.
    terminated: bool,
    /// Offsets the indentation on the first line of the source.
    column_offset: usize,
 }
 /// What kind of tokens to emit.
@ -28,11 +33,19 @@ pub enum TokenMode {
 impl<'s> Tokens<'s> {
    /// Create a new token iterator with the given mode.
    #[inline]
-    pub fn new(src: &'s str, mode: TokenMode, offset: usize) -> Self {
+    pub fn new(src: &'s str, mode: TokenMode) -> Self {
        Self::with_prefix("", src, mode)
    }
    /// Create a new token iterator with the given mode and a prefix to offset
    /// column calculations.
    #[inline]
    pub fn with_prefix(prefix: &str, src: &'s str, mode: TokenMode) -> Self {
        Self {
-            s: Scanner::with_indent_offset(src, offset),
+            s: Scanner::new(src),
            mode,
            terminated: true,
            column_offset: column(prefix, prefix.len(), 0),
        }
    }
@ -74,6 +87,12 @@ impl<'s> Tokens<'s> {
    pub fn terminated(&self) -> bool {
        self.terminated
    }
    /// The column index of a given index in the source string.
    #[inline]
    pub fn column(&self, index: usize) -> usize {
        column(self.s.src(), index, self.column_offset)
    }
 }
 impl<'s> Iterator for Tokens<'s> {
@ -321,7 +340,7 @@ impl<'s> Tokens<'s> {
    }
    fn raw(&mut self) -> NodeKind {
-        let column = self.s.column(self.s.index() - 1);
+        let column = self.column(self.s.index() - 1);
        let mut backticks = 1;
        while self.s.eat_if('`') {
@ -574,6 +593,30 @@ fn keyword(ident: &str) -> Option<NodeKind> {
    })
 }
 /// The column index of a given index in the source string, given a column offset for the first line.
 #[inline]
 fn column(string: &str, index: usize, offset: usize) -> usize {
    let mut apply_offset = false;
    let res = string[.. index]
        .char_indices()
        .rev()
        .take_while(|&(_, c)| !is_newline(c))
        .inspect(|&(i, _)| {
            if i == 0 {
                apply_offset = true
            }
        })
        .count();
    // The loop is never executed if the slice is empty, but we are of
    // course still at the start of the first line.
    if index == 0 {
        apply_offset = true;
    }
    if apply_offset { res + offset } else { res }
 }
 #[cfg(test)]
 #[allow(non_snake_case)]
 mod tests {
@ -689,7 +732,7 @@ mod tests {
        }};
        (@$mode:ident: $src:expr => $($token:expr),*) => {{
            let src = $src;
-            let found = Tokens::new(&src, $mode, 0).collect::<Vec<_>>();
+            let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
            let expected = vec![$($token.clone()),*];
            check(&src, found, expected);
        }};
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@ -60,7 +60,7 @@ impl Markup {
    /// The markup nodes.
    pub fn nodes(&self) -> impl Iterator<Item = MarkupNode> + '_ {
        self.0.children().filter_map(|node| match node.kind() {
-            NodeKind::Space(n) if *n > 1 => Some(MarkupNode::Parbreak),
+            NodeKind::Space(2 ..) => Some(MarkupNode::Parbreak),
            NodeKind::Space(_) => Some(MarkupNode::Space),
            NodeKind::Linebreak => Some(MarkupNode::Linebreak),
            NodeKind::Text(s) | NodeKind::TextInLine(s) => {
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@ -757,7 +757,7 @@ impl NodeKind {
    /// Whether this node is `at_start` given the previous value of the property.
    pub fn is_at_start(&self, prev: bool) -> bool {
        match self {
-            Self::Space(n) if *n > 0 => true,
+            Self::Space(1 ..) => true,
            Self::Space(_) | Self::LineComment | Self::BlockComment => prev,
            _ => false,
        }
@ -858,7 +858,7 @@ impl NodeKind {
            Self::Include => "keyword `include`",
            Self::From => "keyword `from`",
            Self::Markup(_) => "markup",
-            Self::Space(n) if *n > 1 => "paragraph break",
+            Self::Space(2 ..) => "paragraph break",
            Self::Space(_) => "space",
            Self::Linebreak => "forced linebreak",
            Self::Text(_) | Self::TextInLine(_) => "text",