Refactor Parser (#5310)

2025-07-16 00:52:54 +08:00 · 2024-11-04 10:17:49 +01:00 · 2024-11-04 10:17:49 +01:00 · cb1aad3a0c
commit cb1aad3a0c
parent 6b636167ef 2c9728f53b
17 changed files with 1120 additions and 705 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3018,6 +3018,7 @@ dependencies = [
 "typst-pdf",
 "typst-render",
 "typst-svg",
 "typst-syntax",
 "unscanny",
 "walkdir",
 ]
--- a/crates/typst-syntax/README.md
+++ b/crates/typst-syntax/README.md
@ -0,0 +1,40 @@
 # typst-syntax
 Welcome to the Typst Syntax crate! This crate manages the syntactical structure
 of Typst by holding some core abstractions like assigning source file ids,
 parsing Typst syntax, creating an Abstract Syntax Tree (AST), initializing
 source "spans" (for linking AST elements to their outputs in a document), and
 syntax highlighting.
 Below are quick descriptions of the files you might be editing if you find
 yourself here :)
 - `lexer.rs`: The lexical foundation of the parser, which converts a string of
  characters into tokens.
 - `parser.rs`: The main parser definition, preparing a Concrete Syntax Tree made
  of nested vectors of `SyntaxNode`s.
 - `reparser.rs`: The algorithm for reparsing the minimal required amount of
  source text for efficient incremental compilation.
 - `ast.rs`: The conversion layer between the Concrete Syntax Tree of the parser
  and the Abstract Syntax Tree used for code evaluation.
 - `node.rs` & `span.rs`: The underlying data structure for the Concrete Syntax
  Tree and the definitions of source spans used for efficiently pointing to a
  syntax node in things like diagnostics.
 - `kind.rs` & `set.rs`: An enum with all syntactical tokens and nodes and
  bit-set data structure for sets of `SyntaxKind`s.
 - `highlight.rs`: Extracting of syntax highlighting information out of the
  Concrete Syntax Tree (and outputting as HTML).
 - `path.rs`, `file.rs`, `package.rs`: The system for interning project and
  package paths as unique file IDs and resolving them in a virtual filesystem
  (not actually for _opening_ files).
 The structure of the parser is largely adapted from Rust Analyzer. Their
 [documentation][ra] is a good reference for a number of the design decisions
 around the parser and AST.
 The reparsing algorithm is explained in Section 4 of [Martin's thesis][thesis]
 (though it changed a bit since).
 [ra]: https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md
 [thesis]:
    https://www.researchgate.net/publication/364622490_Fast_Typesetting_with_Incremental_Compilation
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@ -4,20 +4,18 @@ use unicode_script::{Script, UnicodeScript};
 use unicode_segmentation::UnicodeSegmentation;
 use unscanny::Scanner;
-use crate::{SyntaxError, SyntaxKind};
+use crate::{SyntaxError, SyntaxKind, SyntaxNode};
-/// Splits up a string of source code into tokens.
+/// An iterator over a source code string which returns tokens.
 #[derive(Clone)]
 pub(super) struct Lexer<'s> {
-    /// The underlying scanner.
+    /// The scanner: contains the underlying string and location as a "cursor".
    s: Scanner<'s>,
    /// The mode the lexer is in. This determines which kinds of tokens it
    /// produces.
    mode: LexMode,
    /// Whether the last token contained a newline.
    newline: bool,
    /// The state held by raw line lexing.
    raw: Vec<(SyntaxKind, usize)>,
    /// An error for the last token.
    error: Option<SyntaxError>,
 }
@ -31,8 +29,6 @@ pub(super) enum LexMode {
    Math,
    /// Keywords, literals and operators.
    Code,
    /// The contents of a raw block.
    Raw,
 }
 impl<'s> Lexer<'s> {
@ -44,7 +40,6 @@ impl<'s> Lexer<'s> {
            mode,
            newline: false,
            error: None,
            raw: Vec::new(),
        }
    }
@ -74,9 +69,11 @@ impl<'s> Lexer<'s> {
        self.newline
    }
-    /// Take out the last error, if any.
+    /// The number of characters until the most recent newline from an index.
-    pub fn take_error(&mut self) -> Option<SyntaxError> {
+    pub fn column(&self, index: usize) -> usize {
-        self.error.take()
+        let mut s = self.s; // Make a new temporary scanner (cheap).
        s.jump(index);
        s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
    }
 }
@ -97,21 +94,14 @@ impl Lexer<'_> {
 /// Shared methods with all [`LexMode`].
 impl Lexer<'_> {
-    /// Proceed to the next token and return its [`SyntaxKind`]. Note the
+    /// Return the next token in our text. Returns both the [`SyntaxNode`]
-    /// token could be a [trivia](SyntaxKind::is_trivia).
+    /// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
-    pub fn next(&mut self) -> SyntaxKind {
+    pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
-        if self.mode == LexMode::Raw {
+        debug_assert!(self.error.is_none());
-            let Some((kind, end)) = self.raw.pop() else {
+        let start = self.s.cursor();
                return SyntaxKind::End;
            };
            self.s.jump(end);
            return kind;
        }
        self.newline = false;
-        self.error = None;
+        let kind = match self.s.eat() {
        let start = self.s.cursor();
        match self.s.eat() {
            Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
            Some('/') if self.s.eat_if('/') => self.line_comment(),
            Some('/') if self.s.eat_if('*') => self.block_comment(),
@ -123,22 +113,32 @@ impl Lexer<'_> {
                );
                kind
            }
-
+            Some('`') if self.mode != LexMode::Math => return self.raw(),
            Some(c) => match self.mode {
                LexMode::Markup => self.markup(start, c),
-                LexMode::Math => self.math(start, c),
+                LexMode::Math => match self.math(start, c) {
                    (kind, None) => kind,
                    (kind, Some(node)) => return (kind, node),
                },
                LexMode::Code => self.code(start, c),
                LexMode::Raw => unreachable!(),
            },
            None => SyntaxKind::End,
-        }
+        };
        let text = self.s.from(start);
        let node = match self.error.take() {
            Some(error) => SyntaxNode::error(error, text),
            None => SyntaxNode::leaf(kind, text),
        };
        (kind, node)
    }
    /// Eat whitespace characters greedily.
    fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
        let more = self.s.eat_while(|c| is_space(c, self.mode));
        let newlines = match c {
            // Optimize eating a single space.
            ' ' if more.is_empty() => 0,
            _ => count_newlines(self.s.from(start)),
        };
@ -187,7 +187,6 @@ impl Lexer<'_> {
    fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
        match c {
            '\\' => self.backslash(),
            '`' => self.raw(),
            'h' if self.s.eat_if("ttp://") => self.link(),
            'h' if self.s.eat_if("ttps://") => self.link(),
            '<' if self.s.at(is_id_continue) => self.label(),
@ -252,9 +251,10 @@ impl Lexer<'_> {
        }
    }
-    fn raw(&mut self) -> SyntaxKind {
+    /// Lex an entire raw segment at once. This is a convenience to avoid going
    /// to and from the parser for each raw section.
    fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
        let start = self.s.cursor() - 1;
        self.raw.clear();
        // Determine number of opening backticks.
        let mut backticks = 1;
@ -264,9 +264,11 @@ impl Lexer<'_> {
        // Special case for ``.
        if backticks == 2 {
-            self.push_raw(SyntaxKind::RawDelim);
+            let nodes = vec![
-            self.s.jump(start + 1);
+                SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
-            return SyntaxKind::RawDelim;
+                SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
            ];
            return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
        }
        // Find end of raw text.
@ -275,43 +277,55 @@ impl Lexer<'_> {
            match self.s.eat() {
                Some('`') => found += 1,
                Some(_) => found = 0,
-                None => break,
+                None => {
                    let msg = SyntaxError::new("unclosed raw text");
                    let error = SyntaxNode::error(msg, self.s.from(start));
                    return (SyntaxKind::Error, error);
                }
            }
        }
        if found != backticks {
            return self.error("unclosed raw text");
        }
        let end = self.s.cursor();
        if backticks >= 3 {
            self.blocky_raw(start, end, backticks);
        } else {
            self.inline_raw(start, end, backticks);
        }
-        // Closing delimiter.
+        let mut nodes = Vec::with_capacity(3); // Will have at least 3.
        self.push_raw(SyntaxKind::RawDelim);
-        // The saved tokens will be removed in reverse.
+        // A closure for pushing a node onto our raw vector. Assumes the caller
-        self.raw.reverse();
+        // will move the scanner to the next location at each step.
        let mut prev_start = start;
        let mut push_raw = |kind, s: &Scanner| {
            nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
            prev_start = s.cursor();
        };
        // Opening delimiter.
        self.s.jump(start + backticks);
-        SyntaxKind::RawDelim
+        push_raw(SyntaxKind::RawDelim, &self.s);
        if backticks >= 3 {
            self.blocky_raw(end - backticks, &mut push_raw);
        } else {
            self.inline_raw(end - backticks, &mut push_raw);
        }
        // Closing delimiter.
        self.s.jump(end);
        push_raw(SyntaxKind::RawDelim, &self.s);
        (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
    }
-    fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
+    fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
    where
        F: FnMut(SyntaxKind, &Scanner),
    {
        // Language tag.
        self.s.jump(start + backticks);
        if self.s.eat_if(is_id_start) {
            self.s.eat_while(is_id_continue);
-            self.push_raw(SyntaxKind::RawLang);
+            push_raw(SyntaxKind::RawLang, &self.s);
        }
        // Determine inner content between backticks.
        self.s.eat_if(' ');
-        let inner = self.s.to(end - backticks);
+        let inner = self.s.to(inner_end);
        // Determine dedent level.
        let mut lines = split_newlines(inner);
@ -357,41 +371,32 @@ impl Lexer<'_> {
            let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
            self.s.eat_newline();
            self.s.advance(offset);
-            self.push_raw(SyntaxKind::RawTrimmed);
+            push_raw(SyntaxKind::RawTrimmed, &self.s);
            self.s.advance(line.len() - offset);
-            self.push_raw(SyntaxKind::Text);
+            push_raw(SyntaxKind::Text, &self.s);
        }
        // Add final trimmed.
-        if self.s.cursor() < end - backticks {
+        if self.s.cursor() < inner_end {
-            self.s.jump(end - backticks);
+            self.s.jump(inner_end);
-            self.push_raw(SyntaxKind::RawTrimmed);
+            push_raw(SyntaxKind::RawTrimmed, &self.s);
        }
        self.s.jump(end);
    }
-    fn inline_raw(&mut self, start: usize, end: usize, backticks: usize) {
+    fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
-        self.s.jump(start + backticks);
+    where
-
+        F: FnMut(SyntaxKind, &Scanner),
-        while self.s.cursor() < end - backticks {
+    {
        while self.s.cursor() < inner_end {
            if self.s.at(is_newline) {
-                self.push_raw(SyntaxKind::Text);
+                push_raw(SyntaxKind::Text, &self.s);
                self.s.eat_newline();
-                self.push_raw(SyntaxKind::RawTrimmed);
+                push_raw(SyntaxKind::RawTrimmed, &self.s);
                continue;
            }
            self.s.eat();
        }
-        self.push_raw(SyntaxKind::Text);
+        push_raw(SyntaxKind::Text, &self.s);
        self.s.jump(end);
    }
    /// Push the current cursor that marks the end of a raw segment of
    /// the given `kind`.
    fn push_raw(&mut self, kind: SyntaxKind) {
        let end = self.s.cursor();
        self.raw.push((kind, end));
    }
    fn link(&mut self) -> SyntaxKind {
@ -512,8 +517,8 @@ impl Lexer<'_> {
 /// Math.
 impl Lexer<'_> {
-    fn math(&mut self, start: usize, c: char) -> SyntaxKind {
+    fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
-        match c {
+        let kind = match c {
            '\\' => self.backslash(),
            '"' => self.string(),
@ -566,11 +571,41 @@ impl Lexer<'_> {
            // Identifiers.
            c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
                self.s.eat_while(is_math_id_continue);
-                SyntaxKind::MathIdent
+                let (kind, node) = self.math_ident_or_field(start);
                return (kind, Some(node));
            }
            // Other math atoms.
            _ => self.math_text(start, c),
        };
        (kind, None)
    }
    /// Parse a single `MathIdent` or an entire `FieldAccess`.
    fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
        let mut kind = SyntaxKind::MathIdent;
        let mut node = SyntaxNode::leaf(kind, self.s.from(start));
        while let Some(ident) = self.maybe_dot_ident() {
            kind = SyntaxKind::FieldAccess;
            let field_children = vec![
                node,
                SyntaxNode::leaf(SyntaxKind::Dot, '.'),
                SyntaxNode::leaf(SyntaxKind::Ident, ident),
            ];
            node = SyntaxNode::inner(kind, field_children);
        }
        (kind, node)
    }
    /// If at a dot and a math identifier, eat and return the identifier.
    fn maybe_dot_ident(&mut self) -> Option<&str> {
        if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
            let ident_start = self.s.cursor();
            self.s.eat();
            self.s.eat_while(is_math_id_continue);
            Some(self.s.from(ident_start))
        } else {
            None
        }
    }
@ -599,7 +634,6 @@ impl Lexer<'_> {
 impl Lexer<'_> {
    fn code(&mut self, start: usize, c: char) -> SyntaxKind {
        match c {
            '`' => self.raw(),
            '<' if self.s.at(is_id_continue) => self.label(),
            '0'..='9' => self.number(start, c),
            '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
--- a/crates/typst-syntax/src/reparser.rs
+++ b/crates/typst-syntax/src/reparser.rs
@ -157,19 +157,13 @@ fn try_reparse(
        let new_range = shifted..shifted + new_len;
        let at_end = end == children.len();
        // Stop parsing early if this kind is encountered.
        let stop_kind = match parent_kind {
            Some(_) => SyntaxKind::RightBracket,
            None => SyntaxKind::End,
        };
        // Reparse!
        let reparsed = reparse_markup(
            text,
            new_range.clone(),
            &mut at_start,
            &mut nesting,
-            |kind| kind == stop_kind,
+            parent_kind.is_none(),
        );
        if let Some(newborns) = reparsed {
--- a/crates/typst-syntax/src/set.rs
+++ b/crates/typst-syntax/src/set.rs
@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
 pub const MATH_EXPR: SyntaxSet = syntax_set!(
    Hash,
    MathIdent,
    FieldAccess,
    Text,
    MathShorthand,
    Linebreak,
@ -104,7 +105,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
    Numeric,
    Str,
    Label,
-    RawDelim,
+    Raw,
 );
 /// Syntax kinds that are unary operators.
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@ -11,14 +11,32 @@ name = "tests"
 path = "src/tests.rs"
 harness = false
 [features]
 # Allow just compiling the parser when only testing typst-syntax. To do so,
 # pass '--no-default-features' to 'cargo test'.
 default = [
    # "typst-syntax" intentionally not present
    "typst",
    "typst-assets",
    "typst-dev-assets",
    "typst-library",
    "typst-pdf",
    "typst-render",
    "typst-svg",
    "typst-svg",
 ]
 [dependencies]
-typst = { workspace = true }
+typst-syntax = { workspace = true }
-typst-assets = { workspace = true, features = ["fonts"] }
+# Mark other Typst crates as optional so we can use '--no-default-features'
-typst-dev-assets = { workspace = true }
+# to decrease compile times for parser testing.
-typst-library = { workspace = true }
+typst = { workspace = true, optional = true }
-typst-pdf = { workspace = true }
+typst-assets = { workspace = true, features = ["fonts"], optional = true }
-typst-render = { workspace = true }
+typst-dev-assets = { workspace = true, optional = true }
-typst-svg = { workspace = true }
+typst-library = { workspace = true, optional = true }
 typst-pdf = { workspace = true, optional = true }
 typst-render = { workspace = true, optional = true }
 typst-svg = { workspace = true, optional = true }
 clap = { workspace = true }
 comemo = { workspace = true }
 ecow = { workspace = true }
--- a/tests/ref/single-right-bracket.png
+++ b/tests/ref/single-right-bracket.png
--- a/tests/src/args.rs
+++ b/tests/src/args.rs
@ -43,7 +43,9 @@ pub struct CliArguments {
    /// Runs SVG export.
    #[arg(long)]
    pub svg: bool,
-    /// Displays the syntax tree.
+    /// Displays the syntax tree before running tests.
    ///
    /// Note: This is ignored if using '--syntax-compare'.
    #[arg(long)]
    pub syntax: bool,
    /// Displays only one line per test, hiding details about failures.
@ -55,6 +57,29 @@ pub struct CliArguments {
    /// How many threads to spawn when running the tests.
    #[arg(short = 'j', long)]
    pub num_threads: Option<usize>,
    /// Changes testing behavior for debugging the parser: With no argument,
    /// outputs the concrete syntax trees of tests as files in
    /// 'tests/store/syntax/'. With a directory as argument, will treat it as a
    /// reference of correct syntax tree files and will print which output
    /// syntax trees differ (viewing the diffs is on you).
    ///
    /// This overrides the normal testing system. It parses, but does not run
    /// the test suite.
    ///
    /// If `cargo test` is run with `--no-default-features`, then compiling will
    /// not include Typst's core crates, only typst-syntax, greatly speeding up
    /// debugging when changing the parser.
    ///
    /// You can generate a correct reference directory by running on a known
    /// good commit and copying the generated outputs to a new directory.
    /// `_things` may be a good location as it is in the top-level gitignore.
    ///
    /// You can view diffs in VS Code with: `code --diff <ref_dir>/<test>.syntax
    /// tests/store/syntax/<test>.syntax`
    #[arg(long)]
    pub parser_compare: Option<Option<PathBuf>>,
    // ^ I'm not using a subcommand here because then test patterns don't parse
    // how you would expect and I'm too lazy to try to fix it.
 }
 impl CliArguments {
--- a/tests/src/collect.rs
+++ b/tests/src/collect.rs
@ -6,8 +6,8 @@ use std::str::FromStr;
 use std::sync::LazyLock;
 use ecow::{eco_format, EcoString};
-use typst::syntax::package::PackageVersion;
+use typst_syntax::package::PackageVersion;
-use typst::syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
+use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
 use unscanny::Scanner;
 /// Collects all tests from all files.
--- a/tests/src/logger.rs
+++ b/tests/src/logger.rs
@ -2,7 +2,16 @@ use std::io::{self, IsTerminal, StderrLock, Write};
 use std::time::{Duration, Instant};
 use crate::collect::Test;
-use crate::run::TestResult;
+
 /// The result of running a single test.
 pub struct TestResult {
    /// The error log for this test. If empty, the test passed.
    pub errors: String,
    /// The info log for this test.
    pub infos: String,
    /// Whether the image was mismatched.
    pub mismatched_image: bool,
 }
 /// Receives status updates by individual test runs.
 pub struct Logger<'a> {
@ -58,7 +67,7 @@ impl<'a> Logger<'a> {
            }
        };
-        if result.is_ok() {
+        if result.errors.is_empty() {
            self.passed += 1;
        } else {
            self.failed += 1;
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@ -12,6 +12,7 @@ use typst::WorldExt;
 use typst_pdf::PdfOptions;
 use crate::collect::{FileSize, NoteKind, Test};
 use crate::logger::TestResult;
 use crate::world::TestWorld;
 /// Runs a single test.
@ -21,23 +22,6 @@ pub fn run(test: &Test) -> TestResult {
    Runner::new(test).run()
 }
 /// The result of running a single test.
 pub struct TestResult {
    /// The error log for this test. If empty, the test passed.
    pub errors: String,
    /// The info log for this test.
    pub infos: String,
    /// Whether the image was mismatched.
    pub mismatched_image: bool,
 }
 impl TestResult {
    /// Whether the test passed.
    pub fn is_ok(&self) -> bool {
        self.errors.is_empty()
    }
 }
 /// Write a line to a log sink, defaulting to the test's error log.
 macro_rules! log {
    (into: $sink:expr, $($tts:tt)*) => {
--- a/tests/src/tests.rs
+++ b/tests/src/tests.rs
@ -1,13 +1,19 @@
 //! Typst's test runner.
 #![cfg_attr(not(feature = "default"), allow(dead_code, unused_imports))]
 mod args;
 mod collect;
 mod custom;
 mod logger;
 #[cfg(feature = "default")]
 mod custom;
 #[cfg(feature = "default")]
 mod run;
 #[cfg(feature = "default")]
 mod world;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::sync::LazyLock;
 use std::time::Duration;
@ -16,7 +22,8 @@ use parking_lot::Mutex;
 use rayon::iter::{ParallelBridge, ParallelIterator};
 use crate::args::{CliArguments, Command};
-use crate::logger::Logger;
+use crate::collect::Test;
 use crate::logger::{Logger, TestResult};
 /// The parsed command line arguments.
 static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse);
@ -27,6 +34,9 @@ const SUITE_PATH: &str = "tests/suite";
 /// The directory where the full test results are stored.
 const STORE_PATH: &str = "tests/store";
 /// The directory where syntax trees are stored.
 const SYNTAX_PATH: &str = "tests/store/syntax";
 /// The directory where the reference images are stored.
 const REF_PATH: &str = "tests/ref";
@ -89,6 +99,21 @@ fn test() {
        return;
    }
    let parser_dirs = ARGS.parser_compare.clone().map(create_syntax_store);
    #[cfg(not(feature = "default"))]
    let parser_dirs = parser_dirs.or_else(|| Some(create_syntax_store(None)));
    let runner = |test: &Test| {
        if let Some((live_path, ref_path)) = &parser_dirs {
            run_parser_test(test, live_path, ref_path)
        } else {
            #[cfg(feature = "default")]
            return run::run(test);
            #[cfg(not(feature = "default"))]
            unreachable!();
        }
    };
    // Run the tests.
    let logger = Mutex::new(Logger::new(selected, skipped));
    std::thread::scope(|scope| {
@ -112,7 +137,7 @@ fn test() {
        // to `typst::utils::Deferred` yielding.
        tests.iter().par_bridge().for_each(|test| {
            logger.lock().start(test);
-            let result = std::panic::catch_unwind(|| run::run(test));
+            let result = std::panic::catch_unwind(|| runner(test));
            logger.lock().end(test, result);
        });
@ -142,3 +167,46 @@ fn undangle() {
        }
    }
 }
 fn create_syntax_store(ref_path: Option<PathBuf>) -> (&'static Path, Option<PathBuf>) {
    if ref_path.as_ref().is_some_and(|p| !p.exists()) {
        eprintln!("syntax reference path doesn't exist");
        std::process::exit(1);
    }
    let live_path = Path::new(SYNTAX_PATH);
    std::fs::remove_dir_all(live_path).ok();
    std::fs::create_dir_all(live_path).unwrap();
    (live_path, ref_path)
 }
 fn run_parser_test(
    test: &Test,
    live_path: &Path,
    ref_path: &Option<PathBuf>,
 ) -> TestResult {
    let mut result = TestResult {
        errors: String::new(),
        infos: String::new(),
        mismatched_image: false,
    };
    let syntax_file = live_path.join(format!("{}.syntax", test.name));
    let tree = format!("{:#?}\n", test.source.root());
    std::fs::write(syntax_file, &tree).unwrap();
    let Some(ref_path) = ref_path else { return result };
    let ref_file = ref_path.join(format!("{}.syntax", test.name));
    match std::fs::read_to_string(&ref_file) {
        Ok(ref_tree) => {
            if tree != ref_tree {
                result.errors = "differs".to_string();
            }
        }
        Err(_) => {
            result.errors = format!("missing reference: {}", ref_file.display());
        }
    }
    result
 }
--- a/tests/suite/math/symbols.typ
+++ b/tests/suite/math/symbols.typ
@ -0,0 +1,29 @@
 // Test math symbol edge cases.
 --- math-symbol-basic ---
 #let sym = symbol("s", ("basic", "s"))
 #test($sym.basic$, $#"s"$)
 --- math-symbol-underscore ---
 #let sym = symbol("s", ("test_underscore", "s"))
 // Error: 6-10 unknown symbol modifier
 $sym.test_underscore$
 --- math-symbol-dash ---
 #let sym = symbol("s", ("test-dash", "s"))
 // Error: 6-10 unknown symbol modifier
 $sym.test-dash$
 --- math-symbol-double ---
 #let sym = symbol("s", ("test.basic", "s"))
 #test($sym.test.basic$, $#"s"$)
 --- math-symbol-double-underscore ---
 #let sym = symbol("s", ("one.test_underscore", "s"))
 // Error: 10-14 unknown symbol modifier
 $sym.one.test_underscore$
 --- math-symbol-double-dash ---
 #let sym = symbol("s", ("one.test-dash", "s"))
 // Error: 10-14 unknown symbol modifier
 $sym.one.test-dash$
--- a/tests/suite/model/heading.typ
+++ b/tests/suite/model/heading.typ
@ -38,7 +38,7 @@ multiline.
 --- heading-trailing-whitespace ---
 // Whether headings contain trailing whitespace with or without comments/labels.
 // Labels are special cased to immediately end headings in the parser, but also
-// have unique whitespace behavior.
+// #strike[have unique whitespace behavior] Now their behavior is consistent!
 #let join(..xs) = xs.pos().join()
 #let head(h) = heading(depth: 1, h)
@ -49,19 +49,20 @@ multiline.
 #test(head[h], [= h<a>])
 #test(head[h], [= h/**/<b>])
-// Label behaves differently than normal trailing space and comment.
+// #strike[Label behaves differently than normal trailing space and comment.]
-#test(head(join[h][ ]), [= h  ])
+// Now they behave the same!
-#test(head(join[h][ ]), [= h  /**/])
+#test(join(head[h])[ ], [= h  ])
 #test(join(head[h])[ ], [= h  /**/])
 #test(join(head[h])[ ], [= h  <c>])
 // Combinations.
-#test(head(join[h][ ][ ]), [= h  /**/  ])
+#test(join(head[h])[ ][ ], [= h  /**/  ])
 #test(join(head[h])[ ][ ], [= h  <d>  ])
-#test(head(join[h][ ]), [= h  /**/<e>])
+#test(join(head[h])[ ], [= h  /**/<e>])
 #test(join(head[h])[ ], [= h/**/  <f>])
-// The first space attaches, but not the second
+// #strike[The first space attaches, but not the second] Now neither attaches!
-#test(join(head(join[h][ ]))[ ], [= h  /**/  <g>])
+#test(join(head(join[h]))[ ][ ], [= h  /**/  <g>])
 --- heading-leading-whitespace ---
 // Test that leading whitespace and comments don't matter.
--- a/tests/suite/model/list.typ
+++ b/tests/suite/model/list.typ
@ -34,6 +34,51 @@ _Shopping list_
   - C
 - D
 --- list-indent-trivia-nesting ---
 // Test indent nesting behavior with odd trivia (comments and spaces).
 #let indented = [
 - a
 /**/- b
 /**/ - c
   /*spanning
     multiple
      lines */ - d
    - e
 /**/       - f
 /**/  - g
 ]
 // Current behavior is that list columns are based on the first non-whitespace
 // element in their line, so the block comments here determine the column the
 // list starts at
 #let item = list.item
 #let manual = {
  [ ]
  item({
    [a]
    [ ]
    item[b]
    [ ]; [ ]
    item({
      [c]
      [ ]; [ ]
      item[d]
    })
    [ ]
    item({
      [e]
      [ ]; [ ]
      item[f]
      [ ]; [ ]
      item[g]
    })
  })
  [ ]
 }
 #test(indented, manual)
 --- list-tabs ---
 // This works because tabs are used consistently.
 	- A with 1 tab
--- a/tests/suite/scripting/blocks.typ
+++ b/tests/suite/scripting/blocks.typ
@ -135,6 +135,9 @@
 // Error: 2-3 unexpected closing brace
 #}
 --- single-right-bracket ---
 ]
 --- content-block-in-markup-scope ---
 // Content blocks also create a scope.
 #[#let x = 1]