Refactor Parser (#5310)

This commit is contained in:
Laurenz 2024-11-04 10:17:49 +01:00 committed by GitHub
commit cb1aad3a0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1120 additions and 705 deletions

1
Cargo.lock generated
View File

@ -3018,6 +3018,7 @@ dependencies = [
"typst-pdf", "typst-pdf",
"typst-render", "typst-render",
"typst-svg", "typst-svg",
"typst-syntax",
"unscanny", "unscanny",
"walkdir", "walkdir",
] ]

View File

@ -0,0 +1,40 @@
# typst-syntax
Welcome to the Typst Syntax crate! This crate manages the syntactical structure
of Typst by holding some core abstractions like assigning source file ids,
parsing Typst syntax, creating an Abstract Syntax Tree (AST), initializing
source "spans" (for linking AST elements to their outputs in a document), and
syntax highlighting.
Below are quick descriptions of the files you might be editing if you find
yourself here :)
- `lexer.rs`: The lexical foundation of the parser, which converts a string of
characters into tokens.
- `parser.rs`: The main parser definition, preparing a Concrete Syntax Tree made
of nested vectors of `SyntaxNode`s.
- `reparser.rs`: The algorithm for reparsing the minimal required amount of
source text for efficient incremental compilation.
- `ast.rs`: The conversion layer between the Concrete Syntax Tree of the parser
and the Abstract Syntax Tree used for code evaluation.
- `node.rs` & `span.rs`: The underlying data structure for the Concrete Syntax
Tree and the definitions of source spans used for efficiently pointing to a
syntax node in things like diagnostics.
- `kind.rs` & `set.rs`: An enum with all syntactical tokens and nodes and
bit-set data structure for sets of `SyntaxKind`s.
- `highlight.rs`: Extracting of syntax highlighting information out of the
Concrete Syntax Tree (and outputting as HTML).
- `path.rs`, `file.rs`, `package.rs`: The system for interning project and
package paths as unique file IDs and resolving them in a virtual filesystem
(not actually for _opening_ files).
The structure of the parser is largely adapted from Rust Analyzer. Their
[documentation][ra] is a good reference for a number of the design decisions
around the parser and AST.
The reparsing algorithm is explained in Section 4 of [Martin's thesis][thesis]
(though it changed a bit since).
[ra]: https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md
[thesis]:
https://www.researchgate.net/publication/364622490_Fast_Typesetting_with_Incremental_Compilation

View File

@ -4,20 +4,18 @@ use unicode_script::{Script, UnicodeScript};
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
use unscanny::Scanner; use unscanny::Scanner;
use crate::{SyntaxError, SyntaxKind}; use crate::{SyntaxError, SyntaxKind, SyntaxNode};
/// Splits up a string of source code into tokens. /// An iterator over a source code string which returns tokens.
#[derive(Clone)] #[derive(Clone)]
pub(super) struct Lexer<'s> { pub(super) struct Lexer<'s> {
/// The underlying scanner. /// The scanner: contains the underlying string and location as a "cursor".
s: Scanner<'s>, s: Scanner<'s>,
/// The mode the lexer is in. This determines which kinds of tokens it /// The mode the lexer is in. This determines which kinds of tokens it
/// produces. /// produces.
mode: LexMode, mode: LexMode,
/// Whether the last token contained a newline. /// Whether the last token contained a newline.
newline: bool, newline: bool,
/// The state held by raw line lexing.
raw: Vec<(SyntaxKind, usize)>,
/// An error for the last token. /// An error for the last token.
error: Option<SyntaxError>, error: Option<SyntaxError>,
} }
@ -31,8 +29,6 @@ pub(super) enum LexMode {
Math, Math,
/// Keywords, literals and operators. /// Keywords, literals and operators.
Code, Code,
/// The contents of a raw block.
Raw,
} }
impl<'s> Lexer<'s> { impl<'s> Lexer<'s> {
@ -44,7 +40,6 @@ impl<'s> Lexer<'s> {
mode, mode,
newline: false, newline: false,
error: None, error: None,
raw: Vec::new(),
} }
} }
@ -74,9 +69,11 @@ impl<'s> Lexer<'s> {
self.newline self.newline
} }
/// Take out the last error, if any. /// The number of characters until the most recent newline from an index.
pub fn take_error(&mut self) -> Option<SyntaxError> { pub fn column(&self, index: usize) -> usize {
self.error.take() let mut s = self.s; // Make a new temporary scanner (cheap).
s.jump(index);
s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
} }
} }
@ -97,21 +94,14 @@ impl Lexer<'_> {
/// Shared methods with all [`LexMode`]. /// Shared methods with all [`LexMode`].
impl Lexer<'_> { impl Lexer<'_> {
/// Proceed to the next token and return its [`SyntaxKind`]. Note the /// Return the next token in our text. Returns both the [`SyntaxNode`]
/// token could be a [trivia](SyntaxKind::is_trivia). /// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
pub fn next(&mut self) -> SyntaxKind { pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
if self.mode == LexMode::Raw { debug_assert!(self.error.is_none());
let Some((kind, end)) = self.raw.pop() else { let start = self.s.cursor();
return SyntaxKind::End;
};
self.s.jump(end);
return kind;
}
self.newline = false; self.newline = false;
self.error = None; let kind = match self.s.eat() {
let start = self.s.cursor();
match self.s.eat() {
Some(c) if is_space(c, self.mode) => self.whitespace(start, c), Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
Some('/') if self.s.eat_if('/') => self.line_comment(), Some('/') if self.s.eat_if('/') => self.line_comment(),
Some('/') if self.s.eat_if('*') => self.block_comment(), Some('/') if self.s.eat_if('*') => self.block_comment(),
@ -123,22 +113,32 @@ impl Lexer<'_> {
); );
kind kind
} }
Some('`') if self.mode != LexMode::Math => return self.raw(),
Some(c) => match self.mode { Some(c) => match self.mode {
LexMode::Markup => self.markup(start, c), LexMode::Markup => self.markup(start, c),
LexMode::Math => self.math(start, c), LexMode::Math => match self.math(start, c) {
(kind, None) => kind,
(kind, Some(node)) => return (kind, node),
},
LexMode::Code => self.code(start, c), LexMode::Code => self.code(start, c),
LexMode::Raw => unreachable!(),
}, },
None => SyntaxKind::End, None => SyntaxKind::End,
} };
let text = self.s.from(start);
let node = match self.error.take() {
Some(error) => SyntaxNode::error(error, text),
None => SyntaxNode::leaf(kind, text),
};
(kind, node)
} }
/// Eat whitespace characters greedily. /// Eat whitespace characters greedily.
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind { fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
let more = self.s.eat_while(|c| is_space(c, self.mode)); let more = self.s.eat_while(|c| is_space(c, self.mode));
let newlines = match c { let newlines = match c {
// Optimize eating a single space.
' ' if more.is_empty() => 0, ' ' if more.is_empty() => 0,
_ => count_newlines(self.s.from(start)), _ => count_newlines(self.s.from(start)),
}; };
@ -187,7 +187,6 @@ impl Lexer<'_> {
fn markup(&mut self, start: usize, c: char) -> SyntaxKind { fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
match c { match c {
'\\' => self.backslash(), '\\' => self.backslash(),
'`' => self.raw(),
'h' if self.s.eat_if("ttp://") => self.link(), 'h' if self.s.eat_if("ttp://") => self.link(),
'h' if self.s.eat_if("ttps://") => self.link(), 'h' if self.s.eat_if("ttps://") => self.link(),
'<' if self.s.at(is_id_continue) => self.label(), '<' if self.s.at(is_id_continue) => self.label(),
@ -252,9 +251,10 @@ impl Lexer<'_> {
} }
} }
fn raw(&mut self) -> SyntaxKind { /// Lex an entire raw segment at once. This is a convenience to avoid going
/// to and from the parser for each raw section.
fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
let start = self.s.cursor() - 1; let start = self.s.cursor() - 1;
self.raw.clear();
// Determine number of opening backticks. // Determine number of opening backticks.
let mut backticks = 1; let mut backticks = 1;
@ -264,9 +264,11 @@ impl Lexer<'_> {
// Special case for ``. // Special case for ``.
if backticks == 2 { if backticks == 2 {
self.push_raw(SyntaxKind::RawDelim); let nodes = vec![
self.s.jump(start + 1); SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
return SyntaxKind::RawDelim; SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
];
return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
} }
// Find end of raw text. // Find end of raw text.
@ -275,43 +277,55 @@ impl Lexer<'_> {
match self.s.eat() { match self.s.eat() {
Some('`') => found += 1, Some('`') => found += 1,
Some(_) => found = 0, Some(_) => found = 0,
None => break, None => {
let msg = SyntaxError::new("unclosed raw text");
let error = SyntaxNode::error(msg, self.s.from(start));
return (SyntaxKind::Error, error);
}
} }
} }
if found != backticks {
return self.error("unclosed raw text");
}
let end = self.s.cursor(); let end = self.s.cursor();
if backticks >= 3 {
self.blocky_raw(start, end, backticks);
} else {
self.inline_raw(start, end, backticks);
}
// Closing delimiter. let mut nodes = Vec::with_capacity(3); // Will have at least 3.
self.push_raw(SyntaxKind::RawDelim);
// The saved tokens will be removed in reverse. // A closure for pushing a node onto our raw vector. Assumes the caller
self.raw.reverse(); // will move the scanner to the next location at each step.
let mut prev_start = start;
let mut push_raw = |kind, s: &Scanner| {
nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
prev_start = s.cursor();
};
// Opening delimiter. // Opening delimiter.
self.s.jump(start + backticks); self.s.jump(start + backticks);
SyntaxKind::RawDelim push_raw(SyntaxKind::RawDelim, &self.s);
if backticks >= 3 {
self.blocky_raw(end - backticks, &mut push_raw);
} else {
self.inline_raw(end - backticks, &mut push_raw);
}
// Closing delimiter.
self.s.jump(end);
push_raw(SyntaxKind::RawDelim, &self.s);
(SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
} }
fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) { fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
where
F: FnMut(SyntaxKind, &Scanner),
{
// Language tag. // Language tag.
self.s.jump(start + backticks);
if self.s.eat_if(is_id_start) { if self.s.eat_if(is_id_start) {
self.s.eat_while(is_id_continue); self.s.eat_while(is_id_continue);
self.push_raw(SyntaxKind::RawLang); push_raw(SyntaxKind::RawLang, &self.s);
} }
// Determine inner content between backticks. // Determine inner content between backticks.
self.s.eat_if(' '); self.s.eat_if(' ');
let inner = self.s.to(end - backticks); let inner = self.s.to(inner_end);
// Determine dedent level. // Determine dedent level.
let mut lines = split_newlines(inner); let mut lines = split_newlines(inner);
@ -357,41 +371,32 @@ impl Lexer<'_> {
let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum(); let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
self.s.eat_newline(); self.s.eat_newline();
self.s.advance(offset); self.s.advance(offset);
self.push_raw(SyntaxKind::RawTrimmed); push_raw(SyntaxKind::RawTrimmed, &self.s);
self.s.advance(line.len() - offset); self.s.advance(line.len() - offset);
self.push_raw(SyntaxKind::Text); push_raw(SyntaxKind::Text, &self.s);
} }
// Add final trimmed. // Add final trimmed.
if self.s.cursor() < end - backticks { if self.s.cursor() < inner_end {
self.s.jump(end - backticks); self.s.jump(inner_end);
self.push_raw(SyntaxKind::RawTrimmed); push_raw(SyntaxKind::RawTrimmed, &self.s);
} }
self.s.jump(end);
} }
fn inline_raw(&mut self, start: usize, end: usize, backticks: usize) { fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
self.s.jump(start + backticks); where
F: FnMut(SyntaxKind, &Scanner),
while self.s.cursor() < end - backticks { {
while self.s.cursor() < inner_end {
if self.s.at(is_newline) { if self.s.at(is_newline) {
self.push_raw(SyntaxKind::Text); push_raw(SyntaxKind::Text, &self.s);
self.s.eat_newline(); self.s.eat_newline();
self.push_raw(SyntaxKind::RawTrimmed); push_raw(SyntaxKind::RawTrimmed, &self.s);
continue; continue;
} }
self.s.eat(); self.s.eat();
} }
self.push_raw(SyntaxKind::Text); push_raw(SyntaxKind::Text, &self.s);
self.s.jump(end);
}
/// Push the current cursor that marks the end of a raw segment of
/// the given `kind`.
fn push_raw(&mut self, kind: SyntaxKind) {
let end = self.s.cursor();
self.raw.push((kind, end));
} }
fn link(&mut self) -> SyntaxKind { fn link(&mut self) -> SyntaxKind {
@ -512,8 +517,8 @@ impl Lexer<'_> {
/// Math. /// Math.
impl Lexer<'_> { impl Lexer<'_> {
fn math(&mut self, start: usize, c: char) -> SyntaxKind { fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
match c { let kind = match c {
'\\' => self.backslash(), '\\' => self.backslash(),
'"' => self.string(), '"' => self.string(),
@ -566,11 +571,41 @@ impl Lexer<'_> {
// Identifiers. // Identifiers.
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
self.s.eat_while(is_math_id_continue); self.s.eat_while(is_math_id_continue);
SyntaxKind::MathIdent let (kind, node) = self.math_ident_or_field(start);
return (kind, Some(node));
} }
// Other math atoms. // Other math atoms.
_ => self.math_text(start, c), _ => self.math_text(start, c),
};
(kind, None)
}
/// Parse a single `MathIdent` or an entire `FieldAccess`.
fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
let mut kind = SyntaxKind::MathIdent;
let mut node = SyntaxNode::leaf(kind, self.s.from(start));
while let Some(ident) = self.maybe_dot_ident() {
kind = SyntaxKind::FieldAccess;
let field_children = vec![
node,
SyntaxNode::leaf(SyntaxKind::Dot, '.'),
SyntaxNode::leaf(SyntaxKind::Ident, ident),
];
node = SyntaxNode::inner(kind, field_children);
}
(kind, node)
}
/// If at a dot and a math identifier, eat and return the identifier.
fn maybe_dot_ident(&mut self) -> Option<&str> {
if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
let ident_start = self.s.cursor();
self.s.eat();
self.s.eat_while(is_math_id_continue);
Some(self.s.from(ident_start))
} else {
None
} }
} }
@ -599,7 +634,6 @@ impl Lexer<'_> {
impl Lexer<'_> { impl Lexer<'_> {
fn code(&mut self, start: usize, c: char) -> SyntaxKind { fn code(&mut self, start: usize, c: char) -> SyntaxKind {
match c { match c {
'`' => self.raw(),
'<' if self.s.at(is_id_continue) => self.label(), '<' if self.s.at(is_id_continue) => self.label(),
'0'..='9' => self.number(start, c), '0'..='9' => self.number(start, c),
'.' if self.s.at(char::is_ascii_digit) => self.number(start, c), '.' if self.s.at(char::is_ascii_digit) => self.number(start, c),

File diff suppressed because it is too large Load Diff

View File

@ -157,19 +157,13 @@ fn try_reparse(
let new_range = shifted..shifted + new_len; let new_range = shifted..shifted + new_len;
let at_end = end == children.len(); let at_end = end == children.len();
// Stop parsing early if this kind is encountered.
let stop_kind = match parent_kind {
Some(_) => SyntaxKind::RightBracket,
None => SyntaxKind::End,
};
// Reparse! // Reparse!
let reparsed = reparse_markup( let reparsed = reparse_markup(
text, text,
new_range.clone(), new_range.clone(),
&mut at_start, &mut at_start,
&mut nesting, &mut nesting,
|kind| kind == stop_kind, parent_kind.is_none(),
); );
if let Some(newborns) = reparsed { if let Some(newborns) = reparsed {

View File

@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
pub const MATH_EXPR: SyntaxSet = syntax_set!( pub const MATH_EXPR: SyntaxSet = syntax_set!(
Hash, Hash,
MathIdent, MathIdent,
FieldAccess,
Text, Text,
MathShorthand, MathShorthand,
Linebreak, Linebreak,
@ -104,7 +105,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
Numeric, Numeric,
Str, Str,
Label, Label,
RawDelim, Raw,
); );
/// Syntax kinds that are unary operators. /// Syntax kinds that are unary operators.

View File

@ -11,14 +11,32 @@ name = "tests"
path = "src/tests.rs" path = "src/tests.rs"
harness = false harness = false
[features]
# Allow just compiling the parser when only testing typst-syntax. To do so,
# pass '--no-default-features' to 'cargo test'.
default = [
# "typst-syntax" intentionally not present
"typst",
"typst-assets",
"typst-dev-assets",
"typst-library",
"typst-pdf",
"typst-render",
"typst-svg",
"typst-svg",
]
[dependencies] [dependencies]
typst = { workspace = true } typst-syntax = { workspace = true }
typst-assets = { workspace = true, features = ["fonts"] } # Mark other Typst crates as optional so we can use '--no-default-features'
typst-dev-assets = { workspace = true } # to decrease compile times for parser testing.
typst-library = { workspace = true } typst = { workspace = true, optional = true }
typst-pdf = { workspace = true } typst-assets = { workspace = true, features = ["fonts"], optional = true }
typst-render = { workspace = true } typst-dev-assets = { workspace = true, optional = true }
typst-svg = { workspace = true } typst-library = { workspace = true, optional = true }
typst-pdf = { workspace = true, optional = true }
typst-render = { workspace = true, optional = true }
typst-svg = { workspace = true, optional = true }
clap = { workspace = true } clap = { workspace = true }
comemo = { workspace = true } comemo = { workspace = true }
ecow = { workspace = true } ecow = { workspace = true }

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 B

View File

@ -43,7 +43,9 @@ pub struct CliArguments {
/// Runs SVG export. /// Runs SVG export.
#[arg(long)] #[arg(long)]
pub svg: bool, pub svg: bool,
/// Displays the syntax tree. /// Displays the syntax tree before running tests.
///
/// Note: This is ignored if using '--syntax-compare'.
#[arg(long)] #[arg(long)]
pub syntax: bool, pub syntax: bool,
/// Displays only one line per test, hiding details about failures. /// Displays only one line per test, hiding details about failures.
@ -55,6 +57,29 @@ pub struct CliArguments {
/// How many threads to spawn when running the tests. /// How many threads to spawn when running the tests.
#[arg(short = 'j', long)] #[arg(short = 'j', long)]
pub num_threads: Option<usize>, pub num_threads: Option<usize>,
/// Changes testing behavior for debugging the parser: With no argument,
/// outputs the concrete syntax trees of tests as files in
/// 'tests/store/syntax/'. With a directory as argument, will treat it as a
/// reference of correct syntax tree files and will print which output
/// syntax trees differ (viewing the diffs is on you).
///
/// This overrides the normal testing system. It parses, but does not run
/// the test suite.
///
/// If `cargo test` is run with `--no-default-features`, then compiling will
/// not include Typst's core crates, only typst-syntax, greatly speeding up
/// debugging when changing the parser.
///
/// You can generate a correct reference directory by running on a known
/// good commit and copying the generated outputs to a new directory.
/// `_things` may be a good location as it is in the top-level gitignore.
///
/// You can view diffs in VS Code with: `code --diff <ref_dir>/<test>.syntax
/// tests/store/syntax/<test>.syntax`
#[arg(long)]
pub parser_compare: Option<Option<PathBuf>>,
// ^ I'm not using a subcommand here because then test patterns don't parse
// how you would expect and I'm too lazy to try to fix it.
} }
impl CliArguments { impl CliArguments {

View File

@ -6,8 +6,8 @@ use std::str::FromStr;
use std::sync::LazyLock; use std::sync::LazyLock;
use ecow::{eco_format, EcoString}; use ecow::{eco_format, EcoString};
use typst::syntax::package::PackageVersion; use typst_syntax::package::PackageVersion;
use typst::syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath}; use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
use unscanny::Scanner; use unscanny::Scanner;
/// Collects all tests from all files. /// Collects all tests from all files.

View File

@ -2,7 +2,16 @@ use std::io::{self, IsTerminal, StderrLock, Write};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use crate::collect::Test; use crate::collect::Test;
use crate::run::TestResult;
/// The result of running a single test.
pub struct TestResult {
/// The error log for this test. If empty, the test passed.
pub errors: String,
/// The info log for this test.
pub infos: String,
/// Whether the image was mismatched.
pub mismatched_image: bool,
}
/// Receives status updates by individual test runs. /// Receives status updates by individual test runs.
pub struct Logger<'a> { pub struct Logger<'a> {
@ -58,7 +67,7 @@ impl<'a> Logger<'a> {
} }
}; };
if result.is_ok() { if result.errors.is_empty() {
self.passed += 1; self.passed += 1;
} else { } else {
self.failed += 1; self.failed += 1;

View File

@ -12,6 +12,7 @@ use typst::WorldExt;
use typst_pdf::PdfOptions; use typst_pdf::PdfOptions;
use crate::collect::{FileSize, NoteKind, Test}; use crate::collect::{FileSize, NoteKind, Test};
use crate::logger::TestResult;
use crate::world::TestWorld; use crate::world::TestWorld;
/// Runs a single test. /// Runs a single test.
@ -21,23 +22,6 @@ pub fn run(test: &Test) -> TestResult {
Runner::new(test).run() Runner::new(test).run()
} }
/// The result of running a single test.
pub struct TestResult {
/// The error log for this test. If empty, the test passed.
pub errors: String,
/// The info log for this test.
pub infos: String,
/// Whether the image was mismatched.
pub mismatched_image: bool,
}
impl TestResult {
/// Whether the test passed.
pub fn is_ok(&self) -> bool {
self.errors.is_empty()
}
}
/// Write a line to a log sink, defaulting to the test's error log. /// Write a line to a log sink, defaulting to the test's error log.
macro_rules! log { macro_rules! log {
(into: $sink:expr, $($tts:tt)*) => { (into: $sink:expr, $($tts:tt)*) => {

View File

@ -1,13 +1,19 @@
//! Typst's test runner. //! Typst's test runner.
#![cfg_attr(not(feature = "default"), allow(dead_code, unused_imports))]
mod args; mod args;
mod collect; mod collect;
mod custom;
mod logger; mod logger;
#[cfg(feature = "default")]
mod custom;
#[cfg(feature = "default")]
mod run; mod run;
#[cfg(feature = "default")]
mod world; mod world;
use std::path::Path; use std::path::{Path, PathBuf};
use std::sync::LazyLock; use std::sync::LazyLock;
use std::time::Duration; use std::time::Duration;
@ -16,7 +22,8 @@ use parking_lot::Mutex;
use rayon::iter::{ParallelBridge, ParallelIterator}; use rayon::iter::{ParallelBridge, ParallelIterator};
use crate::args::{CliArguments, Command}; use crate::args::{CliArguments, Command};
use crate::logger::Logger; use crate::collect::Test;
use crate::logger::{Logger, TestResult};
/// The parsed command line arguments. /// The parsed command line arguments.
static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse); static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse);
@ -27,6 +34,9 @@ const SUITE_PATH: &str = "tests/suite";
/// The directory where the full test results are stored. /// The directory where the full test results are stored.
const STORE_PATH: &str = "tests/store"; const STORE_PATH: &str = "tests/store";
/// The directory where syntax trees are stored.
const SYNTAX_PATH: &str = "tests/store/syntax";
/// The directory where the reference images are stored. /// The directory where the reference images are stored.
const REF_PATH: &str = "tests/ref"; const REF_PATH: &str = "tests/ref";
@ -89,6 +99,21 @@ fn test() {
return; return;
} }
let parser_dirs = ARGS.parser_compare.clone().map(create_syntax_store);
#[cfg(not(feature = "default"))]
let parser_dirs = parser_dirs.or_else(|| Some(create_syntax_store(None)));
let runner = |test: &Test| {
if let Some((live_path, ref_path)) = &parser_dirs {
run_parser_test(test, live_path, ref_path)
} else {
#[cfg(feature = "default")]
return run::run(test);
#[cfg(not(feature = "default"))]
unreachable!();
}
};
// Run the tests. // Run the tests.
let logger = Mutex::new(Logger::new(selected, skipped)); let logger = Mutex::new(Logger::new(selected, skipped));
std::thread::scope(|scope| { std::thread::scope(|scope| {
@ -112,7 +137,7 @@ fn test() {
// to `typst::utils::Deferred` yielding. // to `typst::utils::Deferred` yielding.
tests.iter().par_bridge().for_each(|test| { tests.iter().par_bridge().for_each(|test| {
logger.lock().start(test); logger.lock().start(test);
let result = std::panic::catch_unwind(|| run::run(test)); let result = std::panic::catch_unwind(|| runner(test));
logger.lock().end(test, result); logger.lock().end(test, result);
}); });
@ -142,3 +167,46 @@ fn undangle() {
} }
} }
} }
fn create_syntax_store(ref_path: Option<PathBuf>) -> (&'static Path, Option<PathBuf>) {
if ref_path.as_ref().is_some_and(|p| !p.exists()) {
eprintln!("syntax reference path doesn't exist");
std::process::exit(1);
}
let live_path = Path::new(SYNTAX_PATH);
std::fs::remove_dir_all(live_path).ok();
std::fs::create_dir_all(live_path).unwrap();
(live_path, ref_path)
}
fn run_parser_test(
test: &Test,
live_path: &Path,
ref_path: &Option<PathBuf>,
) -> TestResult {
let mut result = TestResult {
errors: String::new(),
infos: String::new(),
mismatched_image: false,
};
let syntax_file = live_path.join(format!("{}.syntax", test.name));
let tree = format!("{:#?}\n", test.source.root());
std::fs::write(syntax_file, &tree).unwrap();
let Some(ref_path) = ref_path else { return result };
let ref_file = ref_path.join(format!("{}.syntax", test.name));
match std::fs::read_to_string(&ref_file) {
Ok(ref_tree) => {
if tree != ref_tree {
result.errors = "differs".to_string();
}
}
Err(_) => {
result.errors = format!("missing reference: {}", ref_file.display());
}
}
result
}

View File

@ -0,0 +1,29 @@
// Test math symbol edge cases.
--- math-symbol-basic ---
#let sym = symbol("s", ("basic", "s"))
#test($sym.basic$, $#"s"$)
--- math-symbol-underscore ---
#let sym = symbol("s", ("test_underscore", "s"))
// Error: 6-10 unknown symbol modifier
$sym.test_underscore$
--- math-symbol-dash ---
#let sym = symbol("s", ("test-dash", "s"))
// Error: 6-10 unknown symbol modifier
$sym.test-dash$
--- math-symbol-double ---
#let sym = symbol("s", ("test.basic", "s"))
#test($sym.test.basic$, $#"s"$)
--- math-symbol-double-underscore ---
#let sym = symbol("s", ("one.test_underscore", "s"))
// Error: 10-14 unknown symbol modifier
$sym.one.test_underscore$
--- math-symbol-double-dash ---
#let sym = symbol("s", ("one.test-dash", "s"))
// Error: 10-14 unknown symbol modifier
$sym.one.test-dash$

View File

@ -38,7 +38,7 @@ multiline.
--- heading-trailing-whitespace --- --- heading-trailing-whitespace ---
// Whether headings contain trailing whitespace with or without comments/labels. // Whether headings contain trailing whitespace with or without comments/labels.
// Labels are special cased to immediately end headings in the parser, but also // Labels are special cased to immediately end headings in the parser, but also
// have unique whitespace behavior. // #strike[have unique whitespace behavior] Now their behavior is consistent!
#let join(..xs) = xs.pos().join() #let join(..xs) = xs.pos().join()
#let head(h) = heading(depth: 1, h) #let head(h) = heading(depth: 1, h)
@ -49,19 +49,20 @@ multiline.
#test(head[h], [= h<a>]) #test(head[h], [= h<a>])
#test(head[h], [= h/**/<b>]) #test(head[h], [= h/**/<b>])
// Label behaves differently than normal trailing space and comment. // #strike[Label behaves differently than normal trailing space and comment.]
#test(head(join[h][ ]), [= h ]) // Now they behave the same!
#test(head(join[h][ ]), [= h /**/]) #test(join(head[h])[ ], [= h ])
#test(join(head[h])[ ], [= h /**/])
#test(join(head[h])[ ], [= h <c>]) #test(join(head[h])[ ], [= h <c>])
// Combinations. // Combinations.
#test(head(join[h][ ][ ]), [= h /**/ ]) #test(join(head[h])[ ][ ], [= h /**/ ])
#test(join(head[h])[ ][ ], [= h <d> ]) #test(join(head[h])[ ][ ], [= h <d> ])
#test(head(join[h][ ]), [= h /**/<e>]) #test(join(head[h])[ ], [= h /**/<e>])
#test(join(head[h])[ ], [= h/**/ <f>]) #test(join(head[h])[ ], [= h/**/ <f>])
// The first space attaches, but not the second // #strike[The first space attaches, but not the second] Now neither attaches!
#test(join(head(join[h][ ]))[ ], [= h /**/ <g>]) #test(join(head(join[h]))[ ][ ], [= h /**/ <g>])
--- heading-leading-whitespace --- --- heading-leading-whitespace ---
// Test that leading whitespace and comments don't matter. // Test that leading whitespace and comments don't matter.

View File

@ -34,6 +34,51 @@ _Shopping list_
- C - C
- D - D
--- list-indent-trivia-nesting ---
// Test indent nesting behavior with odd trivia (comments and spaces).
#let indented = [
- a
/**/- b
/**/ - c
/*spanning
multiple
lines */ - d
- e
/**/ - f
/**/ - g
]
// Current behavior is that list columns are based on the first non-whitespace
// element in their line, so the block comments here determine the column the
// list starts at
#let item = list.item
#let manual = {
[ ]
item({
[a]
[ ]
item[b]
[ ]; [ ]
item({
[c]
[ ]; [ ]
item[d]
})
[ ]
item({
[e]
[ ]; [ ]
item[f]
[ ]; [ ]
item[g]
})
})
[ ]
}
#test(indented, manual)
--- list-tabs --- --- list-tabs ---
// This works because tabs are used consistently. // This works because tabs are used consistently.
- A with 1 tab - A with 1 tab

View File

@ -135,6 +135,9 @@
// Error: 2-3 unexpected closing brace // Error: 2-3 unexpected closing brace
#} #}
--- single-right-bracket ---
]
--- content-block-in-markup-scope --- --- content-block-in-markup-scope ---
// Content blocks also create a scope. // Content blocks also create a scope.
#[#let x = 1] #[#let x = 1]