mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Refactor Parser (#5310)
This commit is contained in:
commit
cb1aad3a0c
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3018,6 +3018,7 @@ dependencies = [
|
||||
"typst-pdf",
|
||||
"typst-render",
|
||||
"typst-svg",
|
||||
"typst-syntax",
|
||||
"unscanny",
|
||||
"walkdir",
|
||||
]
|
||||
|
40
crates/typst-syntax/README.md
Normal file
40
crates/typst-syntax/README.md
Normal file
@ -0,0 +1,40 @@
|
||||
# typst-syntax
|
||||
|
||||
Welcome to the Typst Syntax crate! This crate manages the syntactical structure
|
||||
of Typst by holding some core abstractions like assigning source file ids,
|
||||
parsing Typst syntax, creating an Abstract Syntax Tree (AST), initializing
|
||||
source "spans" (for linking AST elements to their outputs in a document), and
|
||||
syntax highlighting.
|
||||
|
||||
Below are quick descriptions of the files you might be editing if you find
|
||||
yourself here :)
|
||||
|
||||
- `lexer.rs`: The lexical foundation of the parser, which converts a string of
|
||||
characters into tokens.
|
||||
- `parser.rs`: The main parser definition, preparing a Concrete Syntax Tree made
|
||||
of nested vectors of `SyntaxNode`s.
|
||||
- `reparser.rs`: The algorithm for reparsing the minimal required amount of
|
||||
source text for efficient incremental compilation.
|
||||
- `ast.rs`: The conversion layer between the Concrete Syntax Tree of the parser
|
||||
and the Abstract Syntax Tree used for code evaluation.
|
||||
- `node.rs` & `span.rs`: The underlying data structure for the Concrete Syntax
|
||||
Tree and the definitions of source spans used for efficiently pointing to a
|
||||
syntax node in things like diagnostics.
|
||||
- `kind.rs` & `set.rs`: An enum with all syntactical tokens and nodes and
|
||||
bit-set data structure for sets of `SyntaxKind`s.
|
||||
- `highlight.rs`: Extracting of syntax highlighting information out of the
|
||||
Concrete Syntax Tree (and outputting as HTML).
|
||||
- `path.rs`, `file.rs`, `package.rs`: The system for interning project and
|
||||
package paths as unique file IDs and resolving them in a virtual filesystem
|
||||
(not actually for _opening_ files).
|
||||
|
||||
The structure of the parser is largely adapted from Rust Analyzer. Their
|
||||
[documentation][ra] is a good reference for a number of the design decisions
|
||||
around the parser and AST.
|
||||
|
||||
The reparsing algorithm is explained in Section 4 of [Martin's thesis][thesis]
|
||||
(though it changed a bit since).
|
||||
|
||||
[ra]: https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md
|
||||
[thesis]:
|
||||
https://www.researchgate.net/publication/364622490_Fast_Typesetting_with_Incremental_Compilation
|
@ -4,20 +4,18 @@ use unicode_script::{Script, UnicodeScript};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use unscanny::Scanner;
|
||||
|
||||
use crate::{SyntaxError, SyntaxKind};
|
||||
use crate::{SyntaxError, SyntaxKind, SyntaxNode};
|
||||
|
||||
/// Splits up a string of source code into tokens.
|
||||
/// An iterator over a source code string which returns tokens.
|
||||
#[derive(Clone)]
|
||||
pub(super) struct Lexer<'s> {
|
||||
/// The underlying scanner.
|
||||
/// The scanner: contains the underlying string and location as a "cursor".
|
||||
s: Scanner<'s>,
|
||||
/// The mode the lexer is in. This determines which kinds of tokens it
|
||||
/// produces.
|
||||
mode: LexMode,
|
||||
/// Whether the last token contained a newline.
|
||||
newline: bool,
|
||||
/// The state held by raw line lexing.
|
||||
raw: Vec<(SyntaxKind, usize)>,
|
||||
/// An error for the last token.
|
||||
error: Option<SyntaxError>,
|
||||
}
|
||||
@ -31,8 +29,6 @@ pub(super) enum LexMode {
|
||||
Math,
|
||||
/// Keywords, literals and operators.
|
||||
Code,
|
||||
/// The contents of a raw block.
|
||||
Raw,
|
||||
}
|
||||
|
||||
impl<'s> Lexer<'s> {
|
||||
@ -44,7 +40,6 @@ impl<'s> Lexer<'s> {
|
||||
mode,
|
||||
newline: false,
|
||||
error: None,
|
||||
raw: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,9 +69,11 @@ impl<'s> Lexer<'s> {
|
||||
self.newline
|
||||
}
|
||||
|
||||
/// Take out the last error, if any.
|
||||
pub fn take_error(&mut self) -> Option<SyntaxError> {
|
||||
self.error.take()
|
||||
/// The number of characters until the most recent newline from an index.
|
||||
pub fn column(&self, index: usize) -> usize {
|
||||
let mut s = self.s; // Make a new temporary scanner (cheap).
|
||||
s.jump(index);
|
||||
s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,21 +94,14 @@ impl Lexer<'_> {
|
||||
|
||||
/// Shared methods with all [`LexMode`].
|
||||
impl Lexer<'_> {
|
||||
/// Proceed to the next token and return its [`SyntaxKind`]. Note the
|
||||
/// token could be a [trivia](SyntaxKind::is_trivia).
|
||||
pub fn next(&mut self) -> SyntaxKind {
|
||||
if self.mode == LexMode::Raw {
|
||||
let Some((kind, end)) = self.raw.pop() else {
|
||||
return SyntaxKind::End;
|
||||
};
|
||||
self.s.jump(end);
|
||||
return kind;
|
||||
}
|
||||
/// Return the next token in our text. Returns both the [`SyntaxNode`]
|
||||
/// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
|
||||
pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
|
||||
debug_assert!(self.error.is_none());
|
||||
let start = self.s.cursor();
|
||||
|
||||
self.newline = false;
|
||||
self.error = None;
|
||||
let start = self.s.cursor();
|
||||
match self.s.eat() {
|
||||
let kind = match self.s.eat() {
|
||||
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
|
||||
Some('/') if self.s.eat_if('/') => self.line_comment(),
|
||||
Some('/') if self.s.eat_if('*') => self.block_comment(),
|
||||
@ -123,22 +113,32 @@ impl Lexer<'_> {
|
||||
);
|
||||
kind
|
||||
}
|
||||
|
||||
Some('`') if self.mode != LexMode::Math => return self.raw(),
|
||||
Some(c) => match self.mode {
|
||||
LexMode::Markup => self.markup(start, c),
|
||||
LexMode::Math => self.math(start, c),
|
||||
LexMode::Math => match self.math(start, c) {
|
||||
(kind, None) => kind,
|
||||
(kind, Some(node)) => return (kind, node),
|
||||
},
|
||||
LexMode::Code => self.code(start, c),
|
||||
LexMode::Raw => unreachable!(),
|
||||
},
|
||||
|
||||
None => SyntaxKind::End,
|
||||
}
|
||||
};
|
||||
|
||||
let text = self.s.from(start);
|
||||
let node = match self.error.take() {
|
||||
Some(error) => SyntaxNode::error(error, text),
|
||||
None => SyntaxNode::leaf(kind, text),
|
||||
};
|
||||
(kind, node)
|
||||
}
|
||||
|
||||
/// Eat whitespace characters greedily.
|
||||
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
let more = self.s.eat_while(|c| is_space(c, self.mode));
|
||||
let newlines = match c {
|
||||
// Optimize eating a single space.
|
||||
' ' if more.is_empty() => 0,
|
||||
_ => count_newlines(self.s.from(start)),
|
||||
};
|
||||
@ -187,7 +187,6 @@ impl Lexer<'_> {
|
||||
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
'\\' => self.backslash(),
|
||||
'`' => self.raw(),
|
||||
'h' if self.s.eat_if("ttp://") => self.link(),
|
||||
'h' if self.s.eat_if("ttps://") => self.link(),
|
||||
'<' if self.s.at(is_id_continue) => self.label(),
|
||||
@ -252,9 +251,10 @@ impl Lexer<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
fn raw(&mut self) -> SyntaxKind {
|
||||
/// Lex an entire raw segment at once. This is a convenience to avoid going
|
||||
/// to and from the parser for each raw section.
|
||||
fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
|
||||
let start = self.s.cursor() - 1;
|
||||
self.raw.clear();
|
||||
|
||||
// Determine number of opening backticks.
|
||||
let mut backticks = 1;
|
||||
@ -264,9 +264,11 @@ impl Lexer<'_> {
|
||||
|
||||
// Special case for ``.
|
||||
if backticks == 2 {
|
||||
self.push_raw(SyntaxKind::RawDelim);
|
||||
self.s.jump(start + 1);
|
||||
return SyntaxKind::RawDelim;
|
||||
let nodes = vec![
|
||||
SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
|
||||
SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
|
||||
];
|
||||
return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
|
||||
}
|
||||
|
||||
// Find end of raw text.
|
||||
@ -275,43 +277,55 @@ impl Lexer<'_> {
|
||||
match self.s.eat() {
|
||||
Some('`') => found += 1,
|
||||
Some(_) => found = 0,
|
||||
None => break,
|
||||
None => {
|
||||
let msg = SyntaxError::new("unclosed raw text");
|
||||
let error = SyntaxNode::error(msg, self.s.from(start));
|
||||
return (SyntaxKind::Error, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if found != backticks {
|
||||
return self.error("unclosed raw text");
|
||||
}
|
||||
|
||||
let end = self.s.cursor();
|
||||
if backticks >= 3 {
|
||||
self.blocky_raw(start, end, backticks);
|
||||
} else {
|
||||
self.inline_raw(start, end, backticks);
|
||||
}
|
||||
|
||||
// Closing delimiter.
|
||||
self.push_raw(SyntaxKind::RawDelim);
|
||||
let mut nodes = Vec::with_capacity(3); // Will have at least 3.
|
||||
|
||||
// The saved tokens will be removed in reverse.
|
||||
self.raw.reverse();
|
||||
// A closure for pushing a node onto our raw vector. Assumes the caller
|
||||
// will move the scanner to the next location at each step.
|
||||
let mut prev_start = start;
|
||||
let mut push_raw = |kind, s: &Scanner| {
|
||||
nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
|
||||
prev_start = s.cursor();
|
||||
};
|
||||
|
||||
// Opening delimiter.
|
||||
self.s.jump(start + backticks);
|
||||
SyntaxKind::RawDelim
|
||||
push_raw(SyntaxKind::RawDelim, &self.s);
|
||||
|
||||
if backticks >= 3 {
|
||||
self.blocky_raw(end - backticks, &mut push_raw);
|
||||
} else {
|
||||
self.inline_raw(end - backticks, &mut push_raw);
|
||||
}
|
||||
|
||||
// Closing delimiter.
|
||||
self.s.jump(end);
|
||||
push_raw(SyntaxKind::RawDelim, &self.s);
|
||||
|
||||
(SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
|
||||
}
|
||||
|
||||
fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
|
||||
fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
|
||||
where
|
||||
F: FnMut(SyntaxKind, &Scanner),
|
||||
{
|
||||
// Language tag.
|
||||
self.s.jump(start + backticks);
|
||||
if self.s.eat_if(is_id_start) {
|
||||
self.s.eat_while(is_id_continue);
|
||||
self.push_raw(SyntaxKind::RawLang);
|
||||
push_raw(SyntaxKind::RawLang, &self.s);
|
||||
}
|
||||
|
||||
// Determine inner content between backticks.
|
||||
self.s.eat_if(' ');
|
||||
let inner = self.s.to(end - backticks);
|
||||
let inner = self.s.to(inner_end);
|
||||
|
||||
// Determine dedent level.
|
||||
let mut lines = split_newlines(inner);
|
||||
@ -357,41 +371,32 @@ impl Lexer<'_> {
|
||||
let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
|
||||
self.s.eat_newline();
|
||||
self.s.advance(offset);
|
||||
self.push_raw(SyntaxKind::RawTrimmed);
|
||||
push_raw(SyntaxKind::RawTrimmed, &self.s);
|
||||
self.s.advance(line.len() - offset);
|
||||
self.push_raw(SyntaxKind::Text);
|
||||
push_raw(SyntaxKind::Text, &self.s);
|
||||
}
|
||||
|
||||
// Add final trimmed.
|
||||
if self.s.cursor() < end - backticks {
|
||||
self.s.jump(end - backticks);
|
||||
self.push_raw(SyntaxKind::RawTrimmed);
|
||||
if self.s.cursor() < inner_end {
|
||||
self.s.jump(inner_end);
|
||||
push_raw(SyntaxKind::RawTrimmed, &self.s);
|
||||
}
|
||||
self.s.jump(end);
|
||||
}
|
||||
|
||||
fn inline_raw(&mut self, start: usize, end: usize, backticks: usize) {
|
||||
self.s.jump(start + backticks);
|
||||
|
||||
while self.s.cursor() < end - backticks {
|
||||
fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
|
||||
where
|
||||
F: FnMut(SyntaxKind, &Scanner),
|
||||
{
|
||||
while self.s.cursor() < inner_end {
|
||||
if self.s.at(is_newline) {
|
||||
self.push_raw(SyntaxKind::Text);
|
||||
push_raw(SyntaxKind::Text, &self.s);
|
||||
self.s.eat_newline();
|
||||
self.push_raw(SyntaxKind::RawTrimmed);
|
||||
push_raw(SyntaxKind::RawTrimmed, &self.s);
|
||||
continue;
|
||||
}
|
||||
self.s.eat();
|
||||
}
|
||||
self.push_raw(SyntaxKind::Text);
|
||||
|
||||
self.s.jump(end);
|
||||
}
|
||||
|
||||
/// Push the current cursor that marks the end of a raw segment of
|
||||
/// the given `kind`.
|
||||
fn push_raw(&mut self, kind: SyntaxKind) {
|
||||
let end = self.s.cursor();
|
||||
self.raw.push((kind, end));
|
||||
push_raw(SyntaxKind::Text, &self.s);
|
||||
}
|
||||
|
||||
fn link(&mut self) -> SyntaxKind {
|
||||
@ -512,8 +517,8 @@ impl Lexer<'_> {
|
||||
|
||||
/// Math.
|
||||
impl Lexer<'_> {
|
||||
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
|
||||
let kind = match c {
|
||||
'\\' => self.backslash(),
|
||||
'"' => self.string(),
|
||||
|
||||
@ -566,11 +571,41 @@ impl Lexer<'_> {
|
||||
// Identifiers.
|
||||
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
|
||||
self.s.eat_while(is_math_id_continue);
|
||||
SyntaxKind::MathIdent
|
||||
let (kind, node) = self.math_ident_or_field(start);
|
||||
return (kind, Some(node));
|
||||
}
|
||||
|
||||
// Other math atoms.
|
||||
_ => self.math_text(start, c),
|
||||
};
|
||||
(kind, None)
|
||||
}
|
||||
|
||||
/// Parse a single `MathIdent` or an entire `FieldAccess`.
|
||||
fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
|
||||
let mut kind = SyntaxKind::MathIdent;
|
||||
let mut node = SyntaxNode::leaf(kind, self.s.from(start));
|
||||
while let Some(ident) = self.maybe_dot_ident() {
|
||||
kind = SyntaxKind::FieldAccess;
|
||||
let field_children = vec![
|
||||
node,
|
||||
SyntaxNode::leaf(SyntaxKind::Dot, '.'),
|
||||
SyntaxNode::leaf(SyntaxKind::Ident, ident),
|
||||
];
|
||||
node = SyntaxNode::inner(kind, field_children);
|
||||
}
|
||||
(kind, node)
|
||||
}
|
||||
|
||||
/// If at a dot and a math identifier, eat and return the identifier.
|
||||
fn maybe_dot_ident(&mut self) -> Option<&str> {
|
||||
if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
|
||||
let ident_start = self.s.cursor();
|
||||
self.s.eat();
|
||||
self.s.eat_while(is_math_id_continue);
|
||||
Some(self.s.from(ident_start))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@ -599,7 +634,6 @@ impl Lexer<'_> {
|
||||
impl Lexer<'_> {
|
||||
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||
match c {
|
||||
'`' => self.raw(),
|
||||
'<' if self.s.at(is_id_continue) => self.label(),
|
||||
'0'..='9' => self.number(start, c),
|
||||
'.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -157,19 +157,13 @@ fn try_reparse(
|
||||
let new_range = shifted..shifted + new_len;
|
||||
let at_end = end == children.len();
|
||||
|
||||
// Stop parsing early if this kind is encountered.
|
||||
let stop_kind = match parent_kind {
|
||||
Some(_) => SyntaxKind::RightBracket,
|
||||
None => SyntaxKind::End,
|
||||
};
|
||||
|
||||
// Reparse!
|
||||
let reparsed = reparse_markup(
|
||||
text,
|
||||
new_range.clone(),
|
||||
&mut at_start,
|
||||
&mut nesting,
|
||||
|kind| kind == stop_kind,
|
||||
parent_kind.is_none(),
|
||||
);
|
||||
|
||||
if let Some(newborns) = reparsed {
|
||||
|
@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
|
||||
pub const MATH_EXPR: SyntaxSet = syntax_set!(
|
||||
Hash,
|
||||
MathIdent,
|
||||
FieldAccess,
|
||||
Text,
|
||||
MathShorthand,
|
||||
Linebreak,
|
||||
@ -104,7 +105,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
|
||||
Numeric,
|
||||
Str,
|
||||
Label,
|
||||
RawDelim,
|
||||
Raw,
|
||||
);
|
||||
|
||||
/// Syntax kinds that are unary operators.
|
||||
|
@ -11,14 +11,32 @@ name = "tests"
|
||||
path = "src/tests.rs"
|
||||
harness = false
|
||||
|
||||
[features]
|
||||
# Allow just compiling the parser when only testing typst-syntax. To do so,
|
||||
# pass '--no-default-features' to 'cargo test'.
|
||||
default = [
|
||||
# "typst-syntax" intentionally not present
|
||||
"typst",
|
||||
"typst-assets",
|
||||
"typst-dev-assets",
|
||||
"typst-library",
|
||||
"typst-pdf",
|
||||
"typst-render",
|
||||
"typst-svg",
|
||||
"typst-svg",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
typst = { workspace = true }
|
||||
typst-assets = { workspace = true, features = ["fonts"] }
|
||||
typst-dev-assets = { workspace = true }
|
||||
typst-library = { workspace = true }
|
||||
typst-pdf = { workspace = true }
|
||||
typst-render = { workspace = true }
|
||||
typst-svg = { workspace = true }
|
||||
typst-syntax = { workspace = true }
|
||||
# Mark other Typst crates as optional so we can use '--no-default-features'
|
||||
# to decrease compile times for parser testing.
|
||||
typst = { workspace = true, optional = true }
|
||||
typst-assets = { workspace = true, features = ["fonts"], optional = true }
|
||||
typst-dev-assets = { workspace = true, optional = true }
|
||||
typst-library = { workspace = true, optional = true }
|
||||
typst-pdf = { workspace = true, optional = true }
|
||||
typst-render = { workspace = true, optional = true }
|
||||
typst-svg = { workspace = true, optional = true }
|
||||
clap = { workspace = true }
|
||||
comemo = { workspace = true }
|
||||
ecow = { workspace = true }
|
||||
|
BIN
tests/ref/single-right-bracket.png
Normal file
BIN
tests/ref/single-right-bracket.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 118 B |
@ -43,7 +43,9 @@ pub struct CliArguments {
|
||||
/// Runs SVG export.
|
||||
#[arg(long)]
|
||||
pub svg: bool,
|
||||
/// Displays the syntax tree.
|
||||
/// Displays the syntax tree before running tests.
|
||||
///
|
||||
/// Note: This is ignored if using '--syntax-compare'.
|
||||
#[arg(long)]
|
||||
pub syntax: bool,
|
||||
/// Displays only one line per test, hiding details about failures.
|
||||
@ -55,6 +57,29 @@ pub struct CliArguments {
|
||||
/// How many threads to spawn when running the tests.
|
||||
#[arg(short = 'j', long)]
|
||||
pub num_threads: Option<usize>,
|
||||
/// Changes testing behavior for debugging the parser: With no argument,
|
||||
/// outputs the concrete syntax trees of tests as files in
|
||||
/// 'tests/store/syntax/'. With a directory as argument, will treat it as a
|
||||
/// reference of correct syntax tree files and will print which output
|
||||
/// syntax trees differ (viewing the diffs is on you).
|
||||
///
|
||||
/// This overrides the normal testing system. It parses, but does not run
|
||||
/// the test suite.
|
||||
///
|
||||
/// If `cargo test` is run with `--no-default-features`, then compiling will
|
||||
/// not include Typst's core crates, only typst-syntax, greatly speeding up
|
||||
/// debugging when changing the parser.
|
||||
///
|
||||
/// You can generate a correct reference directory by running on a known
|
||||
/// good commit and copying the generated outputs to a new directory.
|
||||
/// `_things` may be a good location as it is in the top-level gitignore.
|
||||
///
|
||||
/// You can view diffs in VS Code with: `code --diff <ref_dir>/<test>.syntax
|
||||
/// tests/store/syntax/<test>.syntax`
|
||||
#[arg(long)]
|
||||
pub parser_compare: Option<Option<PathBuf>>,
|
||||
// ^ I'm not using a subcommand here because then test patterns don't parse
|
||||
// how you would expect and I'm too lazy to try to fix it.
|
||||
}
|
||||
|
||||
impl CliArguments {
|
||||
|
@ -6,8 +6,8 @@ use std::str::FromStr;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use ecow::{eco_format, EcoString};
|
||||
use typst::syntax::package::PackageVersion;
|
||||
use typst::syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
|
||||
use typst_syntax::package::PackageVersion;
|
||||
use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
|
||||
use unscanny::Scanner;
|
||||
|
||||
/// Collects all tests from all files.
|
||||
|
@ -2,7 +2,16 @@ use std::io::{self, IsTerminal, StderrLock, Write};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::collect::Test;
|
||||
use crate::run::TestResult;
|
||||
|
||||
/// The result of running a single test.
|
||||
pub struct TestResult {
|
||||
/// The error log for this test. If empty, the test passed.
|
||||
pub errors: String,
|
||||
/// The info log for this test.
|
||||
pub infos: String,
|
||||
/// Whether the image was mismatched.
|
||||
pub mismatched_image: bool,
|
||||
}
|
||||
|
||||
/// Receives status updates by individual test runs.
|
||||
pub struct Logger<'a> {
|
||||
@ -58,7 +67,7 @@ impl<'a> Logger<'a> {
|
||||
}
|
||||
};
|
||||
|
||||
if result.is_ok() {
|
||||
if result.errors.is_empty() {
|
||||
self.passed += 1;
|
||||
} else {
|
||||
self.failed += 1;
|
||||
|
@ -12,6 +12,7 @@ use typst::WorldExt;
|
||||
use typst_pdf::PdfOptions;
|
||||
|
||||
use crate::collect::{FileSize, NoteKind, Test};
|
||||
use crate::logger::TestResult;
|
||||
use crate::world::TestWorld;
|
||||
|
||||
/// Runs a single test.
|
||||
@ -21,23 +22,6 @@ pub fn run(test: &Test) -> TestResult {
|
||||
Runner::new(test).run()
|
||||
}
|
||||
|
||||
/// The result of running a single test.
|
||||
pub struct TestResult {
|
||||
/// The error log for this test. If empty, the test passed.
|
||||
pub errors: String,
|
||||
/// The info log for this test.
|
||||
pub infos: String,
|
||||
/// Whether the image was mismatched.
|
||||
pub mismatched_image: bool,
|
||||
}
|
||||
|
||||
impl TestResult {
|
||||
/// Whether the test passed.
|
||||
pub fn is_ok(&self) -> bool {
|
||||
self.errors.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a line to a log sink, defaulting to the test's error log.
|
||||
macro_rules! log {
|
||||
(into: $sink:expr, $($tts:tt)*) => {
|
||||
|
@ -1,13 +1,19 @@
|
||||
//! Typst's test runner.
|
||||
|
||||
#![cfg_attr(not(feature = "default"), allow(dead_code, unused_imports))]
|
||||
|
||||
mod args;
|
||||
mod collect;
|
||||
mod custom;
|
||||
mod logger;
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
mod custom;
|
||||
#[cfg(feature = "default")]
|
||||
mod run;
|
||||
#[cfg(feature = "default")]
|
||||
mod world;
|
||||
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Duration;
|
||||
|
||||
@ -16,7 +22,8 @@ use parking_lot::Mutex;
|
||||
use rayon::iter::{ParallelBridge, ParallelIterator};
|
||||
|
||||
use crate::args::{CliArguments, Command};
|
||||
use crate::logger::Logger;
|
||||
use crate::collect::Test;
|
||||
use crate::logger::{Logger, TestResult};
|
||||
|
||||
/// The parsed command line arguments.
|
||||
static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse);
|
||||
@ -27,6 +34,9 @@ const SUITE_PATH: &str = "tests/suite";
|
||||
/// The directory where the full test results are stored.
|
||||
const STORE_PATH: &str = "tests/store";
|
||||
|
||||
/// The directory where syntax trees are stored.
|
||||
const SYNTAX_PATH: &str = "tests/store/syntax";
|
||||
|
||||
/// The directory where the reference images are stored.
|
||||
const REF_PATH: &str = "tests/ref";
|
||||
|
||||
@ -89,6 +99,21 @@ fn test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let parser_dirs = ARGS.parser_compare.clone().map(create_syntax_store);
|
||||
#[cfg(not(feature = "default"))]
|
||||
let parser_dirs = parser_dirs.or_else(|| Some(create_syntax_store(None)));
|
||||
|
||||
let runner = |test: &Test| {
|
||||
if let Some((live_path, ref_path)) = &parser_dirs {
|
||||
run_parser_test(test, live_path, ref_path)
|
||||
} else {
|
||||
#[cfg(feature = "default")]
|
||||
return run::run(test);
|
||||
#[cfg(not(feature = "default"))]
|
||||
unreachable!();
|
||||
}
|
||||
};
|
||||
|
||||
// Run the tests.
|
||||
let logger = Mutex::new(Logger::new(selected, skipped));
|
||||
std::thread::scope(|scope| {
|
||||
@ -112,7 +137,7 @@ fn test() {
|
||||
// to `typst::utils::Deferred` yielding.
|
||||
tests.iter().par_bridge().for_each(|test| {
|
||||
logger.lock().start(test);
|
||||
let result = std::panic::catch_unwind(|| run::run(test));
|
||||
let result = std::panic::catch_unwind(|| runner(test));
|
||||
logger.lock().end(test, result);
|
||||
});
|
||||
|
||||
@ -142,3 +167,46 @@ fn undangle() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn create_syntax_store(ref_path: Option<PathBuf>) -> (&'static Path, Option<PathBuf>) {
|
||||
if ref_path.as_ref().is_some_and(|p| !p.exists()) {
|
||||
eprintln!("syntax reference path doesn't exist");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let live_path = Path::new(SYNTAX_PATH);
|
||||
std::fs::remove_dir_all(live_path).ok();
|
||||
std::fs::create_dir_all(live_path).unwrap();
|
||||
(live_path, ref_path)
|
||||
}
|
||||
|
||||
fn run_parser_test(
|
||||
test: &Test,
|
||||
live_path: &Path,
|
||||
ref_path: &Option<PathBuf>,
|
||||
) -> TestResult {
|
||||
let mut result = TestResult {
|
||||
errors: String::new(),
|
||||
infos: String::new(),
|
||||
mismatched_image: false,
|
||||
};
|
||||
|
||||
let syntax_file = live_path.join(format!("{}.syntax", test.name));
|
||||
let tree = format!("{:#?}\n", test.source.root());
|
||||
std::fs::write(syntax_file, &tree).unwrap();
|
||||
|
||||
let Some(ref_path) = ref_path else { return result };
|
||||
let ref_file = ref_path.join(format!("{}.syntax", test.name));
|
||||
match std::fs::read_to_string(&ref_file) {
|
||||
Ok(ref_tree) => {
|
||||
if tree != ref_tree {
|
||||
result.errors = "differs".to_string();
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
result.errors = format!("missing reference: {}", ref_file.display());
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
29
tests/suite/math/symbols.typ
Normal file
29
tests/suite/math/symbols.typ
Normal file
@ -0,0 +1,29 @@
|
||||
// Test math symbol edge cases.
|
||||
|
||||
--- math-symbol-basic ---
|
||||
#let sym = symbol("s", ("basic", "s"))
|
||||
#test($sym.basic$, $#"s"$)
|
||||
|
||||
--- math-symbol-underscore ---
|
||||
#let sym = symbol("s", ("test_underscore", "s"))
|
||||
// Error: 6-10 unknown symbol modifier
|
||||
$sym.test_underscore$
|
||||
|
||||
--- math-symbol-dash ---
|
||||
#let sym = symbol("s", ("test-dash", "s"))
|
||||
// Error: 6-10 unknown symbol modifier
|
||||
$sym.test-dash$
|
||||
|
||||
--- math-symbol-double ---
|
||||
#let sym = symbol("s", ("test.basic", "s"))
|
||||
#test($sym.test.basic$, $#"s"$)
|
||||
|
||||
--- math-symbol-double-underscore ---
|
||||
#let sym = symbol("s", ("one.test_underscore", "s"))
|
||||
// Error: 10-14 unknown symbol modifier
|
||||
$sym.one.test_underscore$
|
||||
|
||||
--- math-symbol-double-dash ---
|
||||
#let sym = symbol("s", ("one.test-dash", "s"))
|
||||
// Error: 10-14 unknown symbol modifier
|
||||
$sym.one.test-dash$
|
@ -38,7 +38,7 @@ multiline.
|
||||
--- heading-trailing-whitespace ---
|
||||
// Whether headings contain trailing whitespace with or without comments/labels.
|
||||
// Labels are special cased to immediately end headings in the parser, but also
|
||||
// have unique whitespace behavior.
|
||||
// #strike[have unique whitespace behavior] Now their behavior is consistent!
|
||||
|
||||
#let join(..xs) = xs.pos().join()
|
||||
#let head(h) = heading(depth: 1, h)
|
||||
@ -49,19 +49,20 @@ multiline.
|
||||
#test(head[h], [= h<a>])
|
||||
#test(head[h], [= h/**/<b>])
|
||||
|
||||
// Label behaves differently than normal trailing space and comment.
|
||||
#test(head(join[h][ ]), [= h ])
|
||||
#test(head(join[h][ ]), [= h /**/])
|
||||
// #strike[Label behaves differently than normal trailing space and comment.]
|
||||
// Now they behave the same!
|
||||
#test(join(head[h])[ ], [= h ])
|
||||
#test(join(head[h])[ ], [= h /**/])
|
||||
#test(join(head[h])[ ], [= h <c>])
|
||||
|
||||
// Combinations.
|
||||
#test(head(join[h][ ][ ]), [= h /**/ ])
|
||||
#test(join(head[h])[ ][ ], [= h /**/ ])
|
||||
#test(join(head[h])[ ][ ], [= h <d> ])
|
||||
#test(head(join[h][ ]), [= h /**/<e>])
|
||||
#test(join(head[h])[ ], [= h /**/<e>])
|
||||
#test(join(head[h])[ ], [= h/**/ <f>])
|
||||
|
||||
// The first space attaches, but not the second
|
||||
#test(join(head(join[h][ ]))[ ], [= h /**/ <g>])
|
||||
// #strike[The first space attaches, but not the second] Now neither attaches!
|
||||
#test(join(head(join[h]))[ ][ ], [= h /**/ <g>])
|
||||
|
||||
--- heading-leading-whitespace ---
|
||||
// Test that leading whitespace and comments don't matter.
|
||||
|
@ -34,6 +34,51 @@ _Shopping list_
|
||||
- C
|
||||
- D
|
||||
|
||||
--- list-indent-trivia-nesting ---
|
||||
// Test indent nesting behavior with odd trivia (comments and spaces).
|
||||
|
||||
#let indented = [
|
||||
- a
|
||||
/**/- b
|
||||
/**/ - c
|
||||
/*spanning
|
||||
multiple
|
||||
lines */ - d
|
||||
- e
|
||||
/**/ - f
|
||||
/**/ - g
|
||||
]
|
||||
// Current behavior is that list columns are based on the first non-whitespace
|
||||
// element in their line, so the block comments here determine the column the
|
||||
// list starts at
|
||||
|
||||
#let item = list.item
|
||||
#let manual = {
|
||||
[ ]
|
||||
item({
|
||||
[a]
|
||||
[ ]
|
||||
item[b]
|
||||
[ ]; [ ]
|
||||
item({
|
||||
[c]
|
||||
[ ]; [ ]
|
||||
item[d]
|
||||
})
|
||||
[ ]
|
||||
item({
|
||||
[e]
|
||||
[ ]; [ ]
|
||||
item[f]
|
||||
[ ]; [ ]
|
||||
item[g]
|
||||
})
|
||||
})
|
||||
[ ]
|
||||
}
|
||||
|
||||
#test(indented, manual)
|
||||
|
||||
--- list-tabs ---
|
||||
// This works because tabs are used consistently.
|
||||
- A with 1 tab
|
||||
|
@ -135,6 +135,9 @@
|
||||
// Error: 2-3 unexpected closing brace
|
||||
#}
|
||||
|
||||
--- single-right-bracket ---
|
||||
]
|
||||
|
||||
--- content-block-in-markup-scope ---
|
||||
// Content blocks also create a scope.
|
||||
#[#let x = 1]
|
||||
|
Loading…
x
Reference in New Issue
Block a user