Refactor Parser (#5310)

2025-07-01 01:32:52 +08:00 · 2024-11-04 10:17:49 +01:00 · 2024-11-04 10:17:49 +01:00 · cb1aad3a0c
commit cb1aad3a0c
parent 6b636167ef 2c9728f53b
17 changed files with 1120 additions and 705 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3018,6 +3018,7 @@ dependencies = [
 "typst-pdf",
 "typst-render",
 "typst-svg",
+ "typst-syntax",
 "unscanny",
 "walkdir",
 ]
--- a/crates/typst-syntax/README.md
+++ b/crates/typst-syntax/README.md
@ -0,0 +1,40 @@
+# typst-syntax
+
+Welcome to the Typst Syntax crate! This crate manages the syntactical structure
+of Typst by holding some core abstractions like assigning source file ids,
+parsing Typst syntax, creating an Abstract Syntax Tree (AST), initializing
+source "spans" (for linking AST elements to their outputs in a document), and
+syntax highlighting.
+
+Below are quick descriptions of the files you might be editing if you find
+yourself here :)
+
+- `lexer.rs`: The lexical foundation of the parser, which converts a string of
+  characters into tokens.
+- `parser.rs`: The main parser definition, preparing a Concrete Syntax Tree made
+  of nested vectors of `SyntaxNode`s.
+- `reparser.rs`: The algorithm for reparsing the minimal required amount of
+  source text for efficient incremental compilation.
+- `ast.rs`: The conversion layer between the Concrete Syntax Tree of the parser
+  and the Abstract Syntax Tree used for code evaluation.
+- `node.rs` & `span.rs`: The underlying data structure for the Concrete Syntax
+  Tree and the definitions of source spans used for efficiently pointing to a
+  syntax node in things like diagnostics.
+- `kind.rs` & `set.rs`: An enum with all syntactical tokens and nodes and
+  bit-set data structure for sets of `SyntaxKind`s.
+- `highlight.rs`: Extracting of syntax highlighting information out of the
+  Concrete Syntax Tree (and outputting as HTML).
+- `path.rs`, `file.rs`, `package.rs`: The system for interning project and
+  package paths as unique file IDs and resolving them in a virtual filesystem
+  (not actually for _opening_ files).
+
+The structure of the parser is largely adapted from Rust Analyzer. Their
+[documentation][ra] is a good reference for a number of the design decisions
+around the parser and AST.
+
+The reparsing algorithm is explained in Section 4 of [Martin's thesis][thesis]
+(though it changed a bit since).
+
+[ra]: https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md
+[thesis]:
+    https://www.researchgate.net/publication/364622490_Fast_Typesetting_with_Incremental_Compilation
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@ -4,20 +4,18 @@ use unicode_script::{Script, UnicodeScript};
 use unicode_segmentation::UnicodeSegmentation;
 use unscanny::Scanner;

-use crate::{SyntaxError, SyntaxKind};
+use crate::{SyntaxError, SyntaxKind, SyntaxNode};

-/// Splits up a string of source code into tokens.
+/// An iterator over a source code string which returns tokens.
 #[derive(Clone)]
 pub(super) struct Lexer<'s> {
-    /// The underlying scanner.
+    /// The scanner: contains the underlying string and location as a "cursor".
    s: Scanner<'s>,
    /// The mode the lexer is in. This determines which kinds of tokens it
    /// produces.
    mode: LexMode,
    /// Whether the last token contained a newline.
    newline: bool,
-    /// The state held by raw line lexing.
-    raw: Vec<(SyntaxKind, usize)>,
    /// An error for the last token.
    error: Option<SyntaxError>,
 }
@ -31,8 +29,6 @@ pub(super) enum LexMode {
    Math,
    /// Keywords, literals and operators.
    Code,
-    /// The contents of a raw block.
-    Raw,
 }

 impl<'s> Lexer<'s> {
@ -44,7 +40,6 @@ impl<'s> Lexer<'s> {
            mode,
            newline: false,
            error: None,
-            raw: Vec::new(),
        }
    }

@ -74,9 +69,11 @@ impl<'s> Lexer<'s> {
        self.newline
    }

-    /// Take out the last error, if any.
-    pub fn take_error(&mut self) -> Option<SyntaxError> {
-        self.error.take()
+    /// The number of characters until the most recent newline from an index.
+    pub fn column(&self, index: usize) -> usize {
+        let mut s = self.s; // Make a new temporary scanner (cheap).
+        s.jump(index);
+        s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
    }
 }

@ -97,21 +94,14 @@ impl Lexer<'_> {

 /// Shared methods with all [`LexMode`].
 impl Lexer<'_> {
-    /// Proceed to the next token and return its [`SyntaxKind`]. Note the
-    /// token could be a [trivia](SyntaxKind::is_trivia).
-    pub fn next(&mut self) -> SyntaxKind {
-        if self.mode == LexMode::Raw {
-            let Some((kind, end)) = self.raw.pop() else {
-                return SyntaxKind::End;
-            };
-            self.s.jump(end);
-            return kind;
-        }
+    /// Return the next token in our text. Returns both the [`SyntaxNode`]
+    /// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
+    pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
+        debug_assert!(self.error.is_none());
+        let start = self.s.cursor();

        self.newline = false;
-        self.error = None;
-        let start = self.s.cursor();
-        match self.s.eat() {
+        let kind = match self.s.eat() {
            Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
            Some('/') if self.s.eat_if('/') => self.line_comment(),
            Some('/') if self.s.eat_if('*') => self.block_comment(),
@ -123,22 +113,32 @@ impl Lexer<'_> {
                );
                kind
            }
-
+            Some('`') if self.mode != LexMode::Math => return self.raw(),
            Some(c) => match self.mode {
                LexMode::Markup => self.markup(start, c),
-                LexMode::Math => self.math(start, c),
+                LexMode::Math => match self.math(start, c) {
+                    (kind, None) => kind,
+                    (kind, Some(node)) => return (kind, node),
+                },
                LexMode::Code => self.code(start, c),
-                LexMode::Raw => unreachable!(),
            },

            None => SyntaxKind::End,
-        }
+        };
+
+        let text = self.s.from(start);
+        let node = match self.error.take() {
+            Some(error) => SyntaxNode::error(error, text),
+            None => SyntaxNode::leaf(kind, text),
+        };
+        (kind, node)
    }

    /// Eat whitespace characters greedily.
    fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
        let more = self.s.eat_while(|c| is_space(c, self.mode));
        let newlines = match c {
+            // Optimize eating a single space.
            ' ' if more.is_empty() => 0,
            _ => count_newlines(self.s.from(start)),
        };
@ -187,7 +187,6 @@ impl Lexer<'_> {
    fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
        match c {
            '\\' => self.backslash(),
-            '`' => self.raw(),
            'h' if self.s.eat_if("ttp://") => self.link(),
            'h' if self.s.eat_if("ttps://") => self.link(),
            '<' if self.s.at(is_id_continue) => self.label(),
@ -252,9 +251,10 @@ impl Lexer<'_> {
        }
    }

-    fn raw(&mut self) -> SyntaxKind {
+    /// Lex an entire raw segment at once. This is a convenience to avoid going
+    /// to and from the parser for each raw section.
+    fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
        let start = self.s.cursor() - 1;
-        self.raw.clear();

        // Determine number of opening backticks.
        let mut backticks = 1;
@ -264,9 +264,11 @@ impl Lexer<'_> {

        // Special case for ``.
        if backticks == 2 {
-            self.push_raw(SyntaxKind::RawDelim);
-            self.s.jump(start + 1);
-            return SyntaxKind::RawDelim;
+            let nodes = vec![
+                SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
+                SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
+            ];
+            return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
        }

        // Find end of raw text.
@ -275,43 +277,55 @@ impl Lexer<'_> {
            match self.s.eat() {
                Some('`') => found += 1,
                Some(_) => found = 0,
-                None => break,
+                None => {
+                    let msg = SyntaxError::new("unclosed raw text");
+                    let error = SyntaxNode::error(msg, self.s.from(start));
+                    return (SyntaxKind::Error, error);
+                }
            }
        }
-
-        if found != backticks {
-            return self.error("unclosed raw text");
-        }
-
        let end = self.s.cursor();
-        if backticks >= 3 {
-            self.blocky_raw(start, end, backticks);
-        } else {
-            self.inline_raw(start, end, backticks);
-        }

-        // Closing delimiter.
-        self.push_raw(SyntaxKind::RawDelim);
+        let mut nodes = Vec::with_capacity(3); // Will have at least 3.

-        // The saved tokens will be removed in reverse.
-        self.raw.reverse();
+        // A closure for pushing a node onto our raw vector. Assumes the caller
+        // will move the scanner to the next location at each step.
+        let mut prev_start = start;
+        let mut push_raw = |kind, s: &Scanner| {
+            nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
+            prev_start = s.cursor();
+        };

        // Opening delimiter.
        self.s.jump(start + backticks);
-        SyntaxKind::RawDelim
+        push_raw(SyntaxKind::RawDelim, &self.s);
+
+        if backticks >= 3 {
+            self.blocky_raw(end - backticks, &mut push_raw);
+        } else {
+            self.inline_raw(end - backticks, &mut push_raw);
+        }
+
+        // Closing delimiter.
+        self.s.jump(end);
+        push_raw(SyntaxKind::RawDelim, &self.s);
+
+        (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
    }

-    fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
+    fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
+    where
+        F: FnMut(SyntaxKind, &Scanner),
+    {
        // Language tag.
-        self.s.jump(start + backticks);
        if self.s.eat_if(is_id_start) {
            self.s.eat_while(is_id_continue);
-            self.push_raw(SyntaxKind::RawLang);
+            push_raw(SyntaxKind::RawLang, &self.s);
        }

        // Determine inner content between backticks.
        self.s.eat_if(' ');
-        let inner = self.s.to(end - backticks);
+        let inner = self.s.to(inner_end);

        // Determine dedent level.
        let mut lines = split_newlines(inner);
@ -357,41 +371,32 @@ impl Lexer<'_> {
            let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
            self.s.eat_newline();
            self.s.advance(offset);
-            self.push_raw(SyntaxKind::RawTrimmed);
+            push_raw(SyntaxKind::RawTrimmed, &self.s);
            self.s.advance(line.len() - offset);
-            self.push_raw(SyntaxKind::Text);
+            push_raw(SyntaxKind::Text, &self.s);
        }

        // Add final trimmed.
-        if self.s.cursor() < end - backticks {
-            self.s.jump(end - backticks);
-            self.push_raw(SyntaxKind::RawTrimmed);
+        if self.s.cursor() < inner_end {
+            self.s.jump(inner_end);
+            push_raw(SyntaxKind::RawTrimmed, &self.s);
        }
-        self.s.jump(end);
    }

-    fn inline_raw(&mut self, start: usize, end: usize, backticks: usize) {
-        self.s.jump(start + backticks);
-
-        while self.s.cursor() < end - backticks {
+    fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
+    where
+        F: FnMut(SyntaxKind, &Scanner),
+    {
+        while self.s.cursor() < inner_end {
            if self.s.at(is_newline) {
-                self.push_raw(SyntaxKind::Text);
+                push_raw(SyntaxKind::Text, &self.s);
                self.s.eat_newline();
-                self.push_raw(SyntaxKind::RawTrimmed);
+                push_raw(SyntaxKind::RawTrimmed, &self.s);
                continue;
            }
            self.s.eat();
        }
-        self.push_raw(SyntaxKind::Text);
-
-        self.s.jump(end);
-    }
-
-    /// Push the current cursor that marks the end of a raw segment of
-    /// the given `kind`.
-    fn push_raw(&mut self, kind: SyntaxKind) {
-        let end = self.s.cursor();
-        self.raw.push((kind, end));
+        push_raw(SyntaxKind::Text, &self.s);
    }

    fn link(&mut self) -> SyntaxKind {
@ -512,8 +517,8 @@ impl Lexer<'_> {

 /// Math.
 impl Lexer<'_> {
-    fn math(&mut self, start: usize, c: char) -> SyntaxKind {
-        match c {
+    fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
+        let kind = match c {
            '\\' => self.backslash(),
            '"' => self.string(),

@ -566,11 +571,41 @@ impl Lexer<'_> {
            // Identifiers.
            c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
                self.s.eat_while(is_math_id_continue);
-                SyntaxKind::MathIdent
+                let (kind, node) = self.math_ident_or_field(start);
+                return (kind, Some(node));
            }

            // Other math atoms.
            _ => self.math_text(start, c),
+        };
+        (kind, None)
+    }
+
+    /// Parse a single `MathIdent` or an entire `FieldAccess`.
+    fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
+        let mut kind = SyntaxKind::MathIdent;
+        let mut node = SyntaxNode::leaf(kind, self.s.from(start));
+        while let Some(ident) = self.maybe_dot_ident() {
+            kind = SyntaxKind::FieldAccess;
+            let field_children = vec![
+                node,
+                SyntaxNode::leaf(SyntaxKind::Dot, '.'),
+                SyntaxNode::leaf(SyntaxKind::Ident, ident),
+            ];
+            node = SyntaxNode::inner(kind, field_children);
+        }
+        (kind, node)
+    }
+
+    /// If at a dot and a math identifier, eat and return the identifier.
+    fn maybe_dot_ident(&mut self) -> Option<&str> {
+        if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
+            let ident_start = self.s.cursor();
+            self.s.eat();
+            self.s.eat_while(is_math_id_continue);
+            Some(self.s.from(ident_start))
+        } else {
+            None
        }
    }

@ -599,7 +634,6 @@ impl Lexer<'_> {
 impl Lexer<'_> {
    fn code(&mut self, start: usize, c: char) -> SyntaxKind {
        match c {
-            '`' => self.raw(),
            '<' if self.s.at(is_id_continue) => self.label(),
            '0'..='9' => self.number(start, c),
            '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
--- a/crates/typst-syntax/src/parser.rs
+++ b/crates/typst-syntax/src/parser.rs
--- a/crates/typst-syntax/src/reparser.rs
+++ b/crates/typst-syntax/src/reparser.rs
@ -157,19 +157,13 @@ fn try_reparse(
        let new_range = shifted..shifted + new_len;
        let at_end = end == children.len();

-        // Stop parsing early if this kind is encountered.
-        let stop_kind = match parent_kind {
-            Some(_) => SyntaxKind::RightBracket,
-            None => SyntaxKind::End,
-        };
-
        // Reparse!
        let reparsed = reparse_markup(
            text,
            new_range.clone(),
            &mut at_start,
            &mut nesting,
-            |kind| kind == stop_kind,
+            parent_kind.is_none(),
        );

        if let Some(newborns) = reparsed {
--- a/crates/typst-syntax/src/set.rs
+++ b/crates/typst-syntax/src/set.rs
@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
 pub const MATH_EXPR: SyntaxSet = syntax_set!(
    Hash,
    MathIdent,
+    FieldAccess,
    Text,
    MathShorthand,
    Linebreak,
@ -104,7 +105,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
    Numeric,
    Str,
    Label,
-    RawDelim,
+    Raw,
 );

 /// Syntax kinds that are unary operators.
--- a/tests/Cargo.toml
+++ b/tests/Cargo.toml
@ -11,14 +11,32 @@ name = "tests"
 path = "src/tests.rs"
 harness = false

+[features]
+# Allow just compiling the parser when only testing typst-syntax. To do so,
+# pass '--no-default-features' to 'cargo test'.
+default = [
+    # "typst-syntax" intentionally not present
+    "typst",
+    "typst-assets",
+    "typst-dev-assets",
+    "typst-library",
+    "typst-pdf",
+    "typst-render",
+    "typst-svg",
+    "typst-svg",
+]
+
 [dependencies]
-typst = { workspace = true }
-typst-assets = { workspace = true, features = ["fonts"] }
-typst-dev-assets = { workspace = true }
-typst-library = { workspace = true }
-typst-pdf = { workspace = true }
-typst-render = { workspace = true }
-typst-svg = { workspace = true }
+typst-syntax = { workspace = true }
+# Mark other Typst crates as optional so we can use '--no-default-features'
+# to decrease compile times for parser testing.
+typst = { workspace = true, optional = true }
+typst-assets = { workspace = true, features = ["fonts"], optional = true }
+typst-dev-assets = { workspace = true, optional = true }
+typst-library = { workspace = true, optional = true }
+typst-pdf = { workspace = true, optional = true }
+typst-render = { workspace = true, optional = true }
+typst-svg = { workspace = true, optional = true }
 clap = { workspace = true }
 comemo = { workspace = true }
 ecow = { workspace = true }
--- a/tests/ref/single-right-bracket.png
+++ b/tests/ref/single-right-bracket.png
--- a/tests/src/args.rs
+++ b/tests/src/args.rs
@ -43,7 +43,9 @@ pub struct CliArguments {
    /// Runs SVG export.
    #[arg(long)]
    pub svg: bool,
-    /// Displays the syntax tree.
+    /// Displays the syntax tree before running tests.
+    ///
+    /// Note: This is ignored if using '--syntax-compare'.
    #[arg(long)]
    pub syntax: bool,
    /// Displays only one line per test, hiding details about failures.
@ -55,6 +57,29 @@ pub struct CliArguments {
    /// How many threads to spawn when running the tests.
    #[arg(short = 'j', long)]
    pub num_threads: Option<usize>,
+    /// Changes testing behavior for debugging the parser: With no argument,
+    /// outputs the concrete syntax trees of tests as files in
+    /// 'tests/store/syntax/'. With a directory as argument, will treat it as a
+    /// reference of correct syntax tree files and will print which output
+    /// syntax trees differ (viewing the diffs is on you).
+    ///
+    /// This overrides the normal testing system. It parses, but does not run
+    /// the test suite.
+    ///
+    /// If `cargo test` is run with `--no-default-features`, then compiling will
+    /// not include Typst's core crates, only typst-syntax, greatly speeding up
+    /// debugging when changing the parser.
+    ///
+    /// You can generate a correct reference directory by running on a known
+    /// good commit and copying the generated outputs to a new directory.
+    /// `_things` may be a good location as it is in the top-level gitignore.
+    ///
+    /// You can view diffs in VS Code with: `code --diff <ref_dir>/<test>.syntax
+    /// tests/store/syntax/<test>.syntax`
+    #[arg(long)]
+    pub parser_compare: Option<Option<PathBuf>>,
+    // ^ I'm not using a subcommand here because then test patterns don't parse
+    // how you would expect and I'm too lazy to try to fix it.
 }

 impl CliArguments {
--- a/tests/src/collect.rs
+++ b/tests/src/collect.rs
@ -6,8 +6,8 @@ use std::str::FromStr;
 use std::sync::LazyLock;

 use ecow::{eco_format, EcoString};
-use typst::syntax::package::PackageVersion;
-use typst::syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
+use typst_syntax::package::PackageVersion;
+use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
 use unscanny::Scanner;

 /// Collects all tests from all files.
--- a/tests/src/logger.rs
+++ b/tests/src/logger.rs
@ -2,7 +2,16 @@ use std::io::{self, IsTerminal, StderrLock, Write};
 use std::time::{Duration, Instant};

 use crate::collect::Test;
-use crate::run::TestResult;
+
+/// The result of running a single test.
+pub struct TestResult {
+    /// The error log for this test. If empty, the test passed.
+    pub errors: String,
+    /// The info log for this test.
+    pub infos: String,
+    /// Whether the image was mismatched.
+    pub mismatched_image: bool,
+}

 /// Receives status updates by individual test runs.
 pub struct Logger<'a> {
@ -58,7 +67,7 @@ impl<'a> Logger<'a> {
            }
        };

-        if result.is_ok() {
+        if result.errors.is_empty() {
            self.passed += 1;
        } else {
            self.failed += 1;
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@ -12,6 +12,7 @@ use typst::WorldExt;
 use typst_pdf::PdfOptions;

 use crate::collect::{FileSize, NoteKind, Test};
+use crate::logger::TestResult;
 use crate::world::TestWorld;

 /// Runs a single test.
@ -21,23 +22,6 @@ pub fn run(test: &Test) -> TestResult {
    Runner::new(test).run()
 }

-/// The result of running a single test.
-pub struct TestResult {
-    /// The error log for this test. If empty, the test passed.
-    pub errors: String,
-    /// The info log for this test.
-    pub infos: String,
-    /// Whether the image was mismatched.
-    pub mismatched_image: bool,
-}
-
-impl TestResult {
-    /// Whether the test passed.
-    pub fn is_ok(&self) -> bool {
-        self.errors.is_empty()
-    }
-}
-
 /// Write a line to a log sink, defaulting to the test's error log.
 macro_rules! log {
    (into: $sink:expr, $($tts:tt)*) => {
--- a/tests/src/tests.rs
+++ b/tests/src/tests.rs
@ -1,13 +1,19 @@
 //! Typst's test runner.

+#![cfg_attr(not(feature = "default"), allow(dead_code, unused_imports))]
+
 mod args;
 mod collect;
-mod custom;
 mod logger;
+
+#[cfg(feature = "default")]
+mod custom;
+#[cfg(feature = "default")]
 mod run;
+#[cfg(feature = "default")]
 mod world;

-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::sync::LazyLock;
 use std::time::Duration;

@ -16,7 +22,8 @@ use parking_lot::Mutex;
 use rayon::iter::{ParallelBridge, ParallelIterator};

 use crate::args::{CliArguments, Command};
-use crate::logger::Logger;
+use crate::collect::Test;
+use crate::logger::{Logger, TestResult};

 /// The parsed command line arguments.
 static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse);
@ -27,6 +34,9 @@ const SUITE_PATH: &str = "tests/suite";
 /// The directory where the full test results are stored.
 const STORE_PATH: &str = "tests/store";

+/// The directory where syntax trees are stored.
+const SYNTAX_PATH: &str = "tests/store/syntax";
+
 /// The directory where the reference images are stored.
 const REF_PATH: &str = "tests/ref";

@ -89,6 +99,21 @@ fn test() {
        return;
    }

+    let parser_dirs = ARGS.parser_compare.clone().map(create_syntax_store);
+    #[cfg(not(feature = "default"))]
+    let parser_dirs = parser_dirs.or_else(|| Some(create_syntax_store(None)));
+
+    let runner = |test: &Test| {
+        if let Some((live_path, ref_path)) = &parser_dirs {
+            run_parser_test(test, live_path, ref_path)
+        } else {
+            #[cfg(feature = "default")]
+            return run::run(test);
+            #[cfg(not(feature = "default"))]
+            unreachable!();
+        }
+    };
+
    // Run the tests.
    let logger = Mutex::new(Logger::new(selected, skipped));
    std::thread::scope(|scope| {
@ -112,7 +137,7 @@ fn test() {
        // to `typst::utils::Deferred` yielding.
        tests.iter().par_bridge().for_each(|test| {
            logger.lock().start(test);
-            let result = std::panic::catch_unwind(|| run::run(test));
+            let result = std::panic::catch_unwind(|| runner(test));
            logger.lock().end(test, result);
        });

@ -142,3 +167,46 @@ fn undangle() {
        }
    }
 }
+
+fn create_syntax_store(ref_path: Option<PathBuf>) -> (&'static Path, Option<PathBuf>) {
+    if ref_path.as_ref().is_some_and(|p| !p.exists()) {
+        eprintln!("syntax reference path doesn't exist");
+        std::process::exit(1);
+    }
+
+    let live_path = Path::new(SYNTAX_PATH);
+    std::fs::remove_dir_all(live_path).ok();
+    std::fs::create_dir_all(live_path).unwrap();
+    (live_path, ref_path)
+}
+
+fn run_parser_test(
+    test: &Test,
+    live_path: &Path,
+    ref_path: &Option<PathBuf>,
+) -> TestResult {
+    let mut result = TestResult {
+        errors: String::new(),
+        infos: String::new(),
+        mismatched_image: false,
+    };
+
+    let syntax_file = live_path.join(format!("{}.syntax", test.name));
+    let tree = format!("{:#?}\n", test.source.root());
+    std::fs::write(syntax_file, &tree).unwrap();
+
+    let Some(ref_path) = ref_path else { return result };
+    let ref_file = ref_path.join(format!("{}.syntax", test.name));
+    match std::fs::read_to_string(&ref_file) {
+        Ok(ref_tree) => {
+            if tree != ref_tree {
+                result.errors = "differs".to_string();
+            }
+        }
+        Err(_) => {
+            result.errors = format!("missing reference: {}", ref_file.display());
+        }
+    }
+
+    result
+}
--- a/tests/suite/math/symbols.typ
+++ b/tests/suite/math/symbols.typ
@ -0,0 +1,29 @@
+// Test math symbol edge cases.
+
+--- math-symbol-basic ---
+#let sym = symbol("s", ("basic", "s"))
+#test($sym.basic$, $#"s"$)
+
+--- math-symbol-underscore ---
+#let sym = symbol("s", ("test_underscore", "s"))
+// Error: 6-10 unknown symbol modifier
+$sym.test_underscore$
+
+--- math-symbol-dash ---
+#let sym = symbol("s", ("test-dash", "s"))
+// Error: 6-10 unknown symbol modifier
+$sym.test-dash$
+
+--- math-symbol-double ---
+#let sym = symbol("s", ("test.basic", "s"))
+#test($sym.test.basic$, $#"s"$)
+
+--- math-symbol-double-underscore ---
+#let sym = symbol("s", ("one.test_underscore", "s"))
+// Error: 10-14 unknown symbol modifier
+$sym.one.test_underscore$
+
+--- math-symbol-double-dash ---
+#let sym = symbol("s", ("one.test-dash", "s"))
+// Error: 10-14 unknown symbol modifier
+$sym.one.test-dash$
--- a/tests/suite/model/heading.typ
+++ b/tests/suite/model/heading.typ
@ -38,7 +38,7 @@ multiline.
 --- heading-trailing-whitespace ---
 // Whether headings contain trailing whitespace with or without comments/labels.
 // Labels are special cased to immediately end headings in the parser, but also
-// have unique whitespace behavior.
+// #strike[have unique whitespace behavior] Now their behavior is consistent!

 #let join(..xs) = xs.pos().join()
 #let head(h) = heading(depth: 1, h)
@ -49,19 +49,20 @@ multiline.
 #test(head[h], [= h<a>])
 #test(head[h], [= h/**/<b>])

-// Label behaves differently than normal trailing space and comment.
-#test(head(join[h][ ]), [= h  ])
-#test(head(join[h][ ]), [= h  /**/])
+// #strike[Label behaves differently than normal trailing space and comment.]
+// Now they behave the same!
+#test(join(head[h])[ ], [= h  ])
+#test(join(head[h])[ ], [= h  /**/])
 #test(join(head[h])[ ], [= h  <c>])

 // Combinations.
-#test(head(join[h][ ][ ]), [= h  /**/  ])
+#test(join(head[h])[ ][ ], [= h  /**/  ])
 #test(join(head[h])[ ][ ], [= h  <d>  ])
-#test(head(join[h][ ]), [= h  /**/<e>])
+#test(join(head[h])[ ], [= h  /**/<e>])
 #test(join(head[h])[ ], [= h/**/  <f>])

-// The first space attaches, but not the second
-#test(join(head(join[h][ ]))[ ], [= h  /**/  <g>])
+// #strike[The first space attaches, but not the second] Now neither attaches!
+#test(join(head(join[h]))[ ][ ], [= h  /**/  <g>])

 --- heading-leading-whitespace ---
 // Test that leading whitespace and comments don't matter.
--- a/tests/suite/model/list.typ
+++ b/tests/suite/model/list.typ
@ -34,6 +34,51 @@ _Shopping list_
   - C
 - D

+--- list-indent-trivia-nesting ---
+// Test indent nesting behavior with odd trivia (comments and spaces).
+
+#let indented = [
+- a
+ /**/- b
+/**/ - c
+   /*spanning
+     multiple
+      lines */ - d
+    - e
+/**/       - f
+/**/  - g
+]
+// Current behavior is that list columns are based on the first non-whitespace
+// element in their line, so the block comments here determine the column the
+// list starts at
+
+#let item = list.item
+#let manual = {
+  [ ]
+  item({
+    [a]
+    [ ]
+    item[b]
+    [ ]; [ ]
+    item({
+      [c]
+      [ ]; [ ]
+      item[d]
+    })
+    [ ]
+    item({
+      [e]
+      [ ]; [ ]
+      item[f]
+      [ ]; [ ]
+      item[g]
+    })
+  })
+  [ ]
+}
+
+#test(indented, manual)
+
 --- list-tabs ---
 // This works because tabs are used consistently.
 	- A with 1 tab
--- a/tests/suite/scripting/blocks.typ
+++ b/tests/suite/scripting/blocks.typ
@ -135,6 +135,9 @@
 // Error: 2-3 unexpected closing brace
 #}

+--- single-right-bracket ---
+]
+
 --- content-block-in-markup-scope ---
 // Content blocks also create a scope.
 #[#let x = 1]