mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Refactor Parser (#5310)
This commit is contained in:
commit
cb1aad3a0c
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3018,6 +3018,7 @@ dependencies = [
|
|||||||
"typst-pdf",
|
"typst-pdf",
|
||||||
"typst-render",
|
"typst-render",
|
||||||
"typst-svg",
|
"typst-svg",
|
||||||
|
"typst-syntax",
|
||||||
"unscanny",
|
"unscanny",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
40
crates/typst-syntax/README.md
Normal file
40
crates/typst-syntax/README.md
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# typst-syntax
|
||||||
|
|
||||||
|
Welcome to the Typst Syntax crate! This crate manages the syntactical structure
|
||||||
|
of Typst by holding some core abstractions like assigning source file ids,
|
||||||
|
parsing Typst syntax, creating an Abstract Syntax Tree (AST), initializing
|
||||||
|
source "spans" (for linking AST elements to their outputs in a document), and
|
||||||
|
syntax highlighting.
|
||||||
|
|
||||||
|
Below are quick descriptions of the files you might be editing if you find
|
||||||
|
yourself here :)
|
||||||
|
|
||||||
|
- `lexer.rs`: The lexical foundation of the parser, which converts a string of
|
||||||
|
characters into tokens.
|
||||||
|
- `parser.rs`: The main parser definition, preparing a Concrete Syntax Tree made
|
||||||
|
of nested vectors of `SyntaxNode`s.
|
||||||
|
- `reparser.rs`: The algorithm for reparsing the minimal required amount of
|
||||||
|
source text for efficient incremental compilation.
|
||||||
|
- `ast.rs`: The conversion layer between the Concrete Syntax Tree of the parser
|
||||||
|
and the Abstract Syntax Tree used for code evaluation.
|
||||||
|
- `node.rs` & `span.rs`: The underlying data structure for the Concrete Syntax
|
||||||
|
Tree and the definitions of source spans used for efficiently pointing to a
|
||||||
|
syntax node in things like diagnostics.
|
||||||
|
- `kind.rs` & `set.rs`: An enum with all syntactical tokens and nodes and
|
||||||
|
bit-set data structure for sets of `SyntaxKind`s.
|
||||||
|
- `highlight.rs`: Extracting of syntax highlighting information out of the
|
||||||
|
Concrete Syntax Tree (and outputting as HTML).
|
||||||
|
- `path.rs`, `file.rs`, `package.rs`: The system for interning project and
|
||||||
|
package paths as unique file IDs and resolving them in a virtual filesystem
|
||||||
|
(not actually for _opening_ files).
|
||||||
|
|
||||||
|
The structure of the parser is largely adapted from Rust Analyzer. Their
|
||||||
|
[documentation][ra] is a good reference for a number of the design decisions
|
||||||
|
around the parser and AST.
|
||||||
|
|
||||||
|
The reparsing algorithm is explained in Section 4 of [Martin's thesis][thesis]
|
||||||
|
(though it changed a bit since).
|
||||||
|
|
||||||
|
[ra]: https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md
|
||||||
|
[thesis]:
|
||||||
|
https://www.researchgate.net/publication/364622490_Fast_Typesetting_with_Incremental_Compilation
|
@ -4,20 +4,18 @@ use unicode_script::{Script, UnicodeScript};
|
|||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
use unscanny::Scanner;
|
use unscanny::Scanner;
|
||||||
|
|
||||||
use crate::{SyntaxError, SyntaxKind};
|
use crate::{SyntaxError, SyntaxKind, SyntaxNode};
|
||||||
|
|
||||||
/// Splits up a string of source code into tokens.
|
/// An iterator over a source code string which returns tokens.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(super) struct Lexer<'s> {
|
pub(super) struct Lexer<'s> {
|
||||||
/// The underlying scanner.
|
/// The scanner: contains the underlying string and location as a "cursor".
|
||||||
s: Scanner<'s>,
|
s: Scanner<'s>,
|
||||||
/// The mode the lexer is in. This determines which kinds of tokens it
|
/// The mode the lexer is in. This determines which kinds of tokens it
|
||||||
/// produces.
|
/// produces.
|
||||||
mode: LexMode,
|
mode: LexMode,
|
||||||
/// Whether the last token contained a newline.
|
/// Whether the last token contained a newline.
|
||||||
newline: bool,
|
newline: bool,
|
||||||
/// The state held by raw line lexing.
|
|
||||||
raw: Vec<(SyntaxKind, usize)>,
|
|
||||||
/// An error for the last token.
|
/// An error for the last token.
|
||||||
error: Option<SyntaxError>,
|
error: Option<SyntaxError>,
|
||||||
}
|
}
|
||||||
@ -31,8 +29,6 @@ pub(super) enum LexMode {
|
|||||||
Math,
|
Math,
|
||||||
/// Keywords, literals and operators.
|
/// Keywords, literals and operators.
|
||||||
Code,
|
Code,
|
||||||
/// The contents of a raw block.
|
|
||||||
Raw,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Lexer<'s> {
|
impl<'s> Lexer<'s> {
|
||||||
@ -44,7 +40,6 @@ impl<'s> Lexer<'s> {
|
|||||||
mode,
|
mode,
|
||||||
newline: false,
|
newline: false,
|
||||||
error: None,
|
error: None,
|
||||||
raw: Vec::new(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,9 +69,11 @@ impl<'s> Lexer<'s> {
|
|||||||
self.newline
|
self.newline
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Take out the last error, if any.
|
/// The number of characters until the most recent newline from an index.
|
||||||
pub fn take_error(&mut self) -> Option<SyntaxError> {
|
pub fn column(&self, index: usize) -> usize {
|
||||||
self.error.take()
|
let mut s = self.s; // Make a new temporary scanner (cheap).
|
||||||
|
s.jump(index);
|
||||||
|
s.before().chars().rev().take_while(|&c| !is_newline(c)).count()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,21 +94,14 @@ impl Lexer<'_> {
|
|||||||
|
|
||||||
/// Shared methods with all [`LexMode`].
|
/// Shared methods with all [`LexMode`].
|
||||||
impl Lexer<'_> {
|
impl Lexer<'_> {
|
||||||
/// Proceed to the next token and return its [`SyntaxKind`]. Note the
|
/// Return the next token in our text. Returns both the [`SyntaxNode`]
|
||||||
/// token could be a [trivia](SyntaxKind::is_trivia).
|
/// and the raw [`SyntaxKind`] to make it more ergonomic to check the kind
|
||||||
pub fn next(&mut self) -> SyntaxKind {
|
pub fn next(&mut self) -> (SyntaxKind, SyntaxNode) {
|
||||||
if self.mode == LexMode::Raw {
|
debug_assert!(self.error.is_none());
|
||||||
let Some((kind, end)) = self.raw.pop() else {
|
let start = self.s.cursor();
|
||||||
return SyntaxKind::End;
|
|
||||||
};
|
|
||||||
self.s.jump(end);
|
|
||||||
return kind;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.newline = false;
|
self.newline = false;
|
||||||
self.error = None;
|
let kind = match self.s.eat() {
|
||||||
let start = self.s.cursor();
|
|
||||||
match self.s.eat() {
|
|
||||||
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
|
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
|
||||||
Some('/') if self.s.eat_if('/') => self.line_comment(),
|
Some('/') if self.s.eat_if('/') => self.line_comment(),
|
||||||
Some('/') if self.s.eat_if('*') => self.block_comment(),
|
Some('/') if self.s.eat_if('*') => self.block_comment(),
|
||||||
@ -123,22 +113,32 @@ impl Lexer<'_> {
|
|||||||
);
|
);
|
||||||
kind
|
kind
|
||||||
}
|
}
|
||||||
|
Some('`') if self.mode != LexMode::Math => return self.raw(),
|
||||||
Some(c) => match self.mode {
|
Some(c) => match self.mode {
|
||||||
LexMode::Markup => self.markup(start, c),
|
LexMode::Markup => self.markup(start, c),
|
||||||
LexMode::Math => self.math(start, c),
|
LexMode::Math => match self.math(start, c) {
|
||||||
|
(kind, None) => kind,
|
||||||
|
(kind, Some(node)) => return (kind, node),
|
||||||
|
},
|
||||||
LexMode::Code => self.code(start, c),
|
LexMode::Code => self.code(start, c),
|
||||||
LexMode::Raw => unreachable!(),
|
|
||||||
},
|
},
|
||||||
|
|
||||||
None => SyntaxKind::End,
|
None => SyntaxKind::End,
|
||||||
}
|
};
|
||||||
|
|
||||||
|
let text = self.s.from(start);
|
||||||
|
let node = match self.error.take() {
|
||||||
|
Some(error) => SyntaxNode::error(error, text),
|
||||||
|
None => SyntaxNode::leaf(kind, text),
|
||||||
|
};
|
||||||
|
(kind, node)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eat whitespace characters greedily.
|
/// Eat whitespace characters greedily.
|
||||||
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
|
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||||
let more = self.s.eat_while(|c| is_space(c, self.mode));
|
let more = self.s.eat_while(|c| is_space(c, self.mode));
|
||||||
let newlines = match c {
|
let newlines = match c {
|
||||||
|
// Optimize eating a single space.
|
||||||
' ' if more.is_empty() => 0,
|
' ' if more.is_empty() => 0,
|
||||||
_ => count_newlines(self.s.from(start)),
|
_ => count_newlines(self.s.from(start)),
|
||||||
};
|
};
|
||||||
@ -187,7 +187,6 @@ impl Lexer<'_> {
|
|||||||
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
|
fn markup(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||||
match c {
|
match c {
|
||||||
'\\' => self.backslash(),
|
'\\' => self.backslash(),
|
||||||
'`' => self.raw(),
|
|
||||||
'h' if self.s.eat_if("ttp://") => self.link(),
|
'h' if self.s.eat_if("ttp://") => self.link(),
|
||||||
'h' if self.s.eat_if("ttps://") => self.link(),
|
'h' if self.s.eat_if("ttps://") => self.link(),
|
||||||
'<' if self.s.at(is_id_continue) => self.label(),
|
'<' if self.s.at(is_id_continue) => self.label(),
|
||||||
@ -252,9 +251,10 @@ impl Lexer<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn raw(&mut self) -> SyntaxKind {
|
/// Lex an entire raw segment at once. This is a convenience to avoid going
|
||||||
|
/// to and from the parser for each raw section.
|
||||||
|
fn raw(&mut self) -> (SyntaxKind, SyntaxNode) {
|
||||||
let start = self.s.cursor() - 1;
|
let start = self.s.cursor() - 1;
|
||||||
self.raw.clear();
|
|
||||||
|
|
||||||
// Determine number of opening backticks.
|
// Determine number of opening backticks.
|
||||||
let mut backticks = 1;
|
let mut backticks = 1;
|
||||||
@ -264,9 +264,11 @@ impl Lexer<'_> {
|
|||||||
|
|
||||||
// Special case for ``.
|
// Special case for ``.
|
||||||
if backticks == 2 {
|
if backticks == 2 {
|
||||||
self.push_raw(SyntaxKind::RawDelim);
|
let nodes = vec![
|
||||||
self.s.jump(start + 1);
|
SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
|
||||||
return SyntaxKind::RawDelim;
|
SyntaxNode::leaf(SyntaxKind::RawDelim, "`"),
|
||||||
|
];
|
||||||
|
return (SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find end of raw text.
|
// Find end of raw text.
|
||||||
@ -275,43 +277,55 @@ impl Lexer<'_> {
|
|||||||
match self.s.eat() {
|
match self.s.eat() {
|
||||||
Some('`') => found += 1,
|
Some('`') => found += 1,
|
||||||
Some(_) => found = 0,
|
Some(_) => found = 0,
|
||||||
None => break,
|
None => {
|
||||||
|
let msg = SyntaxError::new("unclosed raw text");
|
||||||
|
let error = SyntaxNode::error(msg, self.s.from(start));
|
||||||
|
return (SyntaxKind::Error, error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if found != backticks {
|
|
||||||
return self.error("unclosed raw text");
|
|
||||||
}
|
|
||||||
|
|
||||||
let end = self.s.cursor();
|
let end = self.s.cursor();
|
||||||
if backticks >= 3 {
|
|
||||||
self.blocky_raw(start, end, backticks);
|
|
||||||
} else {
|
|
||||||
self.inline_raw(start, end, backticks);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Closing delimiter.
|
let mut nodes = Vec::with_capacity(3); // Will have at least 3.
|
||||||
self.push_raw(SyntaxKind::RawDelim);
|
|
||||||
|
|
||||||
// The saved tokens will be removed in reverse.
|
// A closure for pushing a node onto our raw vector. Assumes the caller
|
||||||
self.raw.reverse();
|
// will move the scanner to the next location at each step.
|
||||||
|
let mut prev_start = start;
|
||||||
|
let mut push_raw = |kind, s: &Scanner| {
|
||||||
|
nodes.push(SyntaxNode::leaf(kind, s.from(prev_start)));
|
||||||
|
prev_start = s.cursor();
|
||||||
|
};
|
||||||
|
|
||||||
// Opening delimiter.
|
// Opening delimiter.
|
||||||
self.s.jump(start + backticks);
|
self.s.jump(start + backticks);
|
||||||
SyntaxKind::RawDelim
|
push_raw(SyntaxKind::RawDelim, &self.s);
|
||||||
|
|
||||||
|
if backticks >= 3 {
|
||||||
|
self.blocky_raw(end - backticks, &mut push_raw);
|
||||||
|
} else {
|
||||||
|
self.inline_raw(end - backticks, &mut push_raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Closing delimiter.
|
||||||
|
self.s.jump(end);
|
||||||
|
push_raw(SyntaxKind::RawDelim, &self.s);
|
||||||
|
|
||||||
|
(SyntaxKind::Raw, SyntaxNode::inner(SyntaxKind::Raw, nodes))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn blocky_raw(&mut self, start: usize, end: usize, backticks: usize) {
|
fn blocky_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
|
||||||
|
where
|
||||||
|
F: FnMut(SyntaxKind, &Scanner),
|
||||||
|
{
|
||||||
// Language tag.
|
// Language tag.
|
||||||
self.s.jump(start + backticks);
|
|
||||||
if self.s.eat_if(is_id_start) {
|
if self.s.eat_if(is_id_start) {
|
||||||
self.s.eat_while(is_id_continue);
|
self.s.eat_while(is_id_continue);
|
||||||
self.push_raw(SyntaxKind::RawLang);
|
push_raw(SyntaxKind::RawLang, &self.s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine inner content between backticks.
|
// Determine inner content between backticks.
|
||||||
self.s.eat_if(' ');
|
self.s.eat_if(' ');
|
||||||
let inner = self.s.to(end - backticks);
|
let inner = self.s.to(inner_end);
|
||||||
|
|
||||||
// Determine dedent level.
|
// Determine dedent level.
|
||||||
let mut lines = split_newlines(inner);
|
let mut lines = split_newlines(inner);
|
||||||
@ -357,41 +371,32 @@ impl Lexer<'_> {
|
|||||||
let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
|
let offset: usize = line.chars().take(dedent).map(char::len_utf8).sum();
|
||||||
self.s.eat_newline();
|
self.s.eat_newline();
|
||||||
self.s.advance(offset);
|
self.s.advance(offset);
|
||||||
self.push_raw(SyntaxKind::RawTrimmed);
|
push_raw(SyntaxKind::RawTrimmed, &self.s);
|
||||||
self.s.advance(line.len() - offset);
|
self.s.advance(line.len() - offset);
|
||||||
self.push_raw(SyntaxKind::Text);
|
push_raw(SyntaxKind::Text, &self.s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add final trimmed.
|
// Add final trimmed.
|
||||||
if self.s.cursor() < end - backticks {
|
if self.s.cursor() < inner_end {
|
||||||
self.s.jump(end - backticks);
|
self.s.jump(inner_end);
|
||||||
self.push_raw(SyntaxKind::RawTrimmed);
|
push_raw(SyntaxKind::RawTrimmed, &self.s);
|
||||||
}
|
}
|
||||||
self.s.jump(end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline_raw(&mut self, start: usize, end: usize, backticks: usize) {
|
fn inline_raw<F>(&mut self, inner_end: usize, mut push_raw: F)
|
||||||
self.s.jump(start + backticks);
|
where
|
||||||
|
F: FnMut(SyntaxKind, &Scanner),
|
||||||
while self.s.cursor() < end - backticks {
|
{
|
||||||
|
while self.s.cursor() < inner_end {
|
||||||
if self.s.at(is_newline) {
|
if self.s.at(is_newline) {
|
||||||
self.push_raw(SyntaxKind::Text);
|
push_raw(SyntaxKind::Text, &self.s);
|
||||||
self.s.eat_newline();
|
self.s.eat_newline();
|
||||||
self.push_raw(SyntaxKind::RawTrimmed);
|
push_raw(SyntaxKind::RawTrimmed, &self.s);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
self.s.eat();
|
self.s.eat();
|
||||||
}
|
}
|
||||||
self.push_raw(SyntaxKind::Text);
|
push_raw(SyntaxKind::Text, &self.s);
|
||||||
|
|
||||||
self.s.jump(end);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Push the current cursor that marks the end of a raw segment of
|
|
||||||
/// the given `kind`.
|
|
||||||
fn push_raw(&mut self, kind: SyntaxKind) {
|
|
||||||
let end = self.s.cursor();
|
|
||||||
self.raw.push((kind, end));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn link(&mut self) -> SyntaxKind {
|
fn link(&mut self) -> SyntaxKind {
|
||||||
@ -512,8 +517,8 @@ impl Lexer<'_> {
|
|||||||
|
|
||||||
/// Math.
|
/// Math.
|
||||||
impl Lexer<'_> {
|
impl Lexer<'_> {
|
||||||
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
|
fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
|
||||||
match c {
|
let kind = match c {
|
||||||
'\\' => self.backslash(),
|
'\\' => self.backslash(),
|
||||||
'"' => self.string(),
|
'"' => self.string(),
|
||||||
|
|
||||||
@ -566,11 +571,41 @@ impl Lexer<'_> {
|
|||||||
// Identifiers.
|
// Identifiers.
|
||||||
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
|
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
|
||||||
self.s.eat_while(is_math_id_continue);
|
self.s.eat_while(is_math_id_continue);
|
||||||
SyntaxKind::MathIdent
|
let (kind, node) = self.math_ident_or_field(start);
|
||||||
|
return (kind, Some(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Other math atoms.
|
// Other math atoms.
|
||||||
_ => self.math_text(start, c),
|
_ => self.math_text(start, c),
|
||||||
|
};
|
||||||
|
(kind, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a single `MathIdent` or an entire `FieldAccess`.
|
||||||
|
fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
|
||||||
|
let mut kind = SyntaxKind::MathIdent;
|
||||||
|
let mut node = SyntaxNode::leaf(kind, self.s.from(start));
|
||||||
|
while let Some(ident) = self.maybe_dot_ident() {
|
||||||
|
kind = SyntaxKind::FieldAccess;
|
||||||
|
let field_children = vec![
|
||||||
|
node,
|
||||||
|
SyntaxNode::leaf(SyntaxKind::Dot, '.'),
|
||||||
|
SyntaxNode::leaf(SyntaxKind::Ident, ident),
|
||||||
|
];
|
||||||
|
node = SyntaxNode::inner(kind, field_children);
|
||||||
|
}
|
||||||
|
(kind, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If at a dot and a math identifier, eat and return the identifier.
|
||||||
|
fn maybe_dot_ident(&mut self) -> Option<&str> {
|
||||||
|
if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
|
||||||
|
let ident_start = self.s.cursor();
|
||||||
|
self.s.eat();
|
||||||
|
self.s.eat_while(is_math_id_continue);
|
||||||
|
Some(self.s.from(ident_start))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -599,7 +634,6 @@ impl Lexer<'_> {
|
|||||||
impl Lexer<'_> {
|
impl Lexer<'_> {
|
||||||
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
|
fn code(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||||
match c {
|
match c {
|
||||||
'`' => self.raw(),
|
|
||||||
'<' if self.s.at(is_id_continue) => self.label(),
|
'<' if self.s.at(is_id_continue) => self.label(),
|
||||||
'0'..='9' => self.number(start, c),
|
'0'..='9' => self.number(start, c),
|
||||||
'.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
|
'.' if self.s.at(char::is_ascii_digit) => self.number(start, c),
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -157,19 +157,13 @@ fn try_reparse(
|
|||||||
let new_range = shifted..shifted + new_len;
|
let new_range = shifted..shifted + new_len;
|
||||||
let at_end = end == children.len();
|
let at_end = end == children.len();
|
||||||
|
|
||||||
// Stop parsing early if this kind is encountered.
|
|
||||||
let stop_kind = match parent_kind {
|
|
||||||
Some(_) => SyntaxKind::RightBracket,
|
|
||||||
None => SyntaxKind::End,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Reparse!
|
// Reparse!
|
||||||
let reparsed = reparse_markup(
|
let reparsed = reparse_markup(
|
||||||
text,
|
text,
|
||||||
new_range.clone(),
|
new_range.clone(),
|
||||||
&mut at_start,
|
&mut at_start,
|
||||||
&mut nesting,
|
&mut nesting,
|
||||||
|kind| kind == stop_kind,
|
parent_kind.is_none(),
|
||||||
);
|
);
|
||||||
|
|
||||||
if let Some(newborns) = reparsed {
|
if let Some(newborns) = reparsed {
|
||||||
|
@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
|
|||||||
pub const MATH_EXPR: SyntaxSet = syntax_set!(
|
pub const MATH_EXPR: SyntaxSet = syntax_set!(
|
||||||
Hash,
|
Hash,
|
||||||
MathIdent,
|
MathIdent,
|
||||||
|
FieldAccess,
|
||||||
Text,
|
Text,
|
||||||
MathShorthand,
|
MathShorthand,
|
||||||
Linebreak,
|
Linebreak,
|
||||||
@ -104,7 +105,7 @@ pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
|
|||||||
Numeric,
|
Numeric,
|
||||||
Str,
|
Str,
|
||||||
Label,
|
Label,
|
||||||
RawDelim,
|
Raw,
|
||||||
);
|
);
|
||||||
|
|
||||||
/// Syntax kinds that are unary operators.
|
/// Syntax kinds that are unary operators.
|
||||||
|
@ -11,14 +11,32 @@ name = "tests"
|
|||||||
path = "src/tests.rs"
|
path = "src/tests.rs"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[features]
|
||||||
|
# Allow just compiling the parser when only testing typst-syntax. To do so,
|
||||||
|
# pass '--no-default-features' to 'cargo test'.
|
||||||
|
default = [
|
||||||
|
# "typst-syntax" intentionally not present
|
||||||
|
"typst",
|
||||||
|
"typst-assets",
|
||||||
|
"typst-dev-assets",
|
||||||
|
"typst-library",
|
||||||
|
"typst-pdf",
|
||||||
|
"typst-render",
|
||||||
|
"typst-svg",
|
||||||
|
"typst-svg",
|
||||||
|
]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
typst = { workspace = true }
|
typst-syntax = { workspace = true }
|
||||||
typst-assets = { workspace = true, features = ["fonts"] }
|
# Mark other Typst crates as optional so we can use '--no-default-features'
|
||||||
typst-dev-assets = { workspace = true }
|
# to decrease compile times for parser testing.
|
||||||
typst-library = { workspace = true }
|
typst = { workspace = true, optional = true }
|
||||||
typst-pdf = { workspace = true }
|
typst-assets = { workspace = true, features = ["fonts"], optional = true }
|
||||||
typst-render = { workspace = true }
|
typst-dev-assets = { workspace = true, optional = true }
|
||||||
typst-svg = { workspace = true }
|
typst-library = { workspace = true, optional = true }
|
||||||
|
typst-pdf = { workspace = true, optional = true }
|
||||||
|
typst-render = { workspace = true, optional = true }
|
||||||
|
typst-svg = { workspace = true, optional = true }
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
comemo = { workspace = true }
|
comemo = { workspace = true }
|
||||||
ecow = { workspace = true }
|
ecow = { workspace = true }
|
||||||
|
BIN
tests/ref/single-right-bracket.png
Normal file
BIN
tests/ref/single-right-bracket.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 118 B |
@ -43,7 +43,9 @@ pub struct CliArguments {
|
|||||||
/// Runs SVG export.
|
/// Runs SVG export.
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub svg: bool,
|
pub svg: bool,
|
||||||
/// Displays the syntax tree.
|
/// Displays the syntax tree before running tests.
|
||||||
|
///
|
||||||
|
/// Note: This is ignored if using '--syntax-compare'.
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub syntax: bool,
|
pub syntax: bool,
|
||||||
/// Displays only one line per test, hiding details about failures.
|
/// Displays only one line per test, hiding details about failures.
|
||||||
@ -55,6 +57,29 @@ pub struct CliArguments {
|
|||||||
/// How many threads to spawn when running the tests.
|
/// How many threads to spawn when running the tests.
|
||||||
#[arg(short = 'j', long)]
|
#[arg(short = 'j', long)]
|
||||||
pub num_threads: Option<usize>,
|
pub num_threads: Option<usize>,
|
||||||
|
/// Changes testing behavior for debugging the parser: With no argument,
|
||||||
|
/// outputs the concrete syntax trees of tests as files in
|
||||||
|
/// 'tests/store/syntax/'. With a directory as argument, will treat it as a
|
||||||
|
/// reference of correct syntax tree files and will print which output
|
||||||
|
/// syntax trees differ (viewing the diffs is on you).
|
||||||
|
///
|
||||||
|
/// This overrides the normal testing system. It parses, but does not run
|
||||||
|
/// the test suite.
|
||||||
|
///
|
||||||
|
/// If `cargo test` is run with `--no-default-features`, then compiling will
|
||||||
|
/// not include Typst's core crates, only typst-syntax, greatly speeding up
|
||||||
|
/// debugging when changing the parser.
|
||||||
|
///
|
||||||
|
/// You can generate a correct reference directory by running on a known
|
||||||
|
/// good commit and copying the generated outputs to a new directory.
|
||||||
|
/// `_things` may be a good location as it is in the top-level gitignore.
|
||||||
|
///
|
||||||
|
/// You can view diffs in VS Code with: `code --diff <ref_dir>/<test>.syntax
|
||||||
|
/// tests/store/syntax/<test>.syntax`
|
||||||
|
#[arg(long)]
|
||||||
|
pub parser_compare: Option<Option<PathBuf>>,
|
||||||
|
// ^ I'm not using a subcommand here because then test patterns don't parse
|
||||||
|
// how you would expect and I'm too lazy to try to fix it.
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CliArguments {
|
impl CliArguments {
|
||||||
|
@ -6,8 +6,8 @@ use std::str::FromStr;
|
|||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
use ecow::{eco_format, EcoString};
|
use ecow::{eco_format, EcoString};
|
||||||
use typst::syntax::package::PackageVersion;
|
use typst_syntax::package::PackageVersion;
|
||||||
use typst::syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
|
use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
|
||||||
use unscanny::Scanner;
|
use unscanny::Scanner;
|
||||||
|
|
||||||
/// Collects all tests from all files.
|
/// Collects all tests from all files.
|
||||||
|
@ -2,7 +2,16 @@ use std::io::{self, IsTerminal, StderrLock, Write};
|
|||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use crate::collect::Test;
|
use crate::collect::Test;
|
||||||
use crate::run::TestResult;
|
|
||||||
|
/// The result of running a single test.
|
||||||
|
pub struct TestResult {
|
||||||
|
/// The error log for this test. If empty, the test passed.
|
||||||
|
pub errors: String,
|
||||||
|
/// The info log for this test.
|
||||||
|
pub infos: String,
|
||||||
|
/// Whether the image was mismatched.
|
||||||
|
pub mismatched_image: bool,
|
||||||
|
}
|
||||||
|
|
||||||
/// Receives status updates by individual test runs.
|
/// Receives status updates by individual test runs.
|
||||||
pub struct Logger<'a> {
|
pub struct Logger<'a> {
|
||||||
@ -58,7 +67,7 @@ impl<'a> Logger<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if result.is_ok() {
|
if result.errors.is_empty() {
|
||||||
self.passed += 1;
|
self.passed += 1;
|
||||||
} else {
|
} else {
|
||||||
self.failed += 1;
|
self.failed += 1;
|
||||||
|
@ -12,6 +12,7 @@ use typst::WorldExt;
|
|||||||
use typst_pdf::PdfOptions;
|
use typst_pdf::PdfOptions;
|
||||||
|
|
||||||
use crate::collect::{FileSize, NoteKind, Test};
|
use crate::collect::{FileSize, NoteKind, Test};
|
||||||
|
use crate::logger::TestResult;
|
||||||
use crate::world::TestWorld;
|
use crate::world::TestWorld;
|
||||||
|
|
||||||
/// Runs a single test.
|
/// Runs a single test.
|
||||||
@ -21,23 +22,6 @@ pub fn run(test: &Test) -> TestResult {
|
|||||||
Runner::new(test).run()
|
Runner::new(test).run()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The result of running a single test.
|
|
||||||
pub struct TestResult {
|
|
||||||
/// The error log for this test. If empty, the test passed.
|
|
||||||
pub errors: String,
|
|
||||||
/// The info log for this test.
|
|
||||||
pub infos: String,
|
|
||||||
/// Whether the image was mismatched.
|
|
||||||
pub mismatched_image: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TestResult {
|
|
||||||
/// Whether the test passed.
|
|
||||||
pub fn is_ok(&self) -> bool {
|
|
||||||
self.errors.is_empty()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Write a line to a log sink, defaulting to the test's error log.
|
/// Write a line to a log sink, defaulting to the test's error log.
|
||||||
macro_rules! log {
|
macro_rules! log {
|
||||||
(into: $sink:expr, $($tts:tt)*) => {
|
(into: $sink:expr, $($tts:tt)*) => {
|
||||||
|
@ -1,13 +1,19 @@
|
|||||||
//! Typst's test runner.
|
//! Typst's test runner.
|
||||||
|
|
||||||
|
#![cfg_attr(not(feature = "default"), allow(dead_code, unused_imports))]
|
||||||
|
|
||||||
mod args;
|
mod args;
|
||||||
mod collect;
|
mod collect;
|
||||||
mod custom;
|
|
||||||
mod logger;
|
mod logger;
|
||||||
|
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
mod custom;
|
||||||
|
#[cfg(feature = "default")]
|
||||||
mod run;
|
mod run;
|
||||||
|
#[cfg(feature = "default")]
|
||||||
mod world;
|
mod world;
|
||||||
|
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
@ -16,7 +22,8 @@ use parking_lot::Mutex;
|
|||||||
use rayon::iter::{ParallelBridge, ParallelIterator};
|
use rayon::iter::{ParallelBridge, ParallelIterator};
|
||||||
|
|
||||||
use crate::args::{CliArguments, Command};
|
use crate::args::{CliArguments, Command};
|
||||||
use crate::logger::Logger;
|
use crate::collect::Test;
|
||||||
|
use crate::logger::{Logger, TestResult};
|
||||||
|
|
||||||
/// The parsed command line arguments.
|
/// The parsed command line arguments.
|
||||||
static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse);
|
static ARGS: LazyLock<CliArguments> = LazyLock::new(CliArguments::parse);
|
||||||
@ -27,6 +34,9 @@ const SUITE_PATH: &str = "tests/suite";
|
|||||||
/// The directory where the full test results are stored.
|
/// The directory where the full test results are stored.
|
||||||
const STORE_PATH: &str = "tests/store";
|
const STORE_PATH: &str = "tests/store";
|
||||||
|
|
||||||
|
/// The directory where syntax trees are stored.
|
||||||
|
const SYNTAX_PATH: &str = "tests/store/syntax";
|
||||||
|
|
||||||
/// The directory where the reference images are stored.
|
/// The directory where the reference images are stored.
|
||||||
const REF_PATH: &str = "tests/ref";
|
const REF_PATH: &str = "tests/ref";
|
||||||
|
|
||||||
@ -89,6 +99,21 @@ fn test() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let parser_dirs = ARGS.parser_compare.clone().map(create_syntax_store);
|
||||||
|
#[cfg(not(feature = "default"))]
|
||||||
|
let parser_dirs = parser_dirs.or_else(|| Some(create_syntax_store(None)));
|
||||||
|
|
||||||
|
let runner = |test: &Test| {
|
||||||
|
if let Some((live_path, ref_path)) = &parser_dirs {
|
||||||
|
run_parser_test(test, live_path, ref_path)
|
||||||
|
} else {
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
return run::run(test);
|
||||||
|
#[cfg(not(feature = "default"))]
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Run the tests.
|
// Run the tests.
|
||||||
let logger = Mutex::new(Logger::new(selected, skipped));
|
let logger = Mutex::new(Logger::new(selected, skipped));
|
||||||
std::thread::scope(|scope| {
|
std::thread::scope(|scope| {
|
||||||
@ -112,7 +137,7 @@ fn test() {
|
|||||||
// to `typst::utils::Deferred` yielding.
|
// to `typst::utils::Deferred` yielding.
|
||||||
tests.iter().par_bridge().for_each(|test| {
|
tests.iter().par_bridge().for_each(|test| {
|
||||||
logger.lock().start(test);
|
logger.lock().start(test);
|
||||||
let result = std::panic::catch_unwind(|| run::run(test));
|
let result = std::panic::catch_unwind(|| runner(test));
|
||||||
logger.lock().end(test, result);
|
logger.lock().end(test, result);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -142,3 +167,46 @@ fn undangle() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn create_syntax_store(ref_path: Option<PathBuf>) -> (&'static Path, Option<PathBuf>) {
|
||||||
|
if ref_path.as_ref().is_some_and(|p| !p.exists()) {
|
||||||
|
eprintln!("syntax reference path doesn't exist");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let live_path = Path::new(SYNTAX_PATH);
|
||||||
|
std::fs::remove_dir_all(live_path).ok();
|
||||||
|
std::fs::create_dir_all(live_path).unwrap();
|
||||||
|
(live_path, ref_path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_parser_test(
|
||||||
|
test: &Test,
|
||||||
|
live_path: &Path,
|
||||||
|
ref_path: &Option<PathBuf>,
|
||||||
|
) -> TestResult {
|
||||||
|
let mut result = TestResult {
|
||||||
|
errors: String::new(),
|
||||||
|
infos: String::new(),
|
||||||
|
mismatched_image: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let syntax_file = live_path.join(format!("{}.syntax", test.name));
|
||||||
|
let tree = format!("{:#?}\n", test.source.root());
|
||||||
|
std::fs::write(syntax_file, &tree).unwrap();
|
||||||
|
|
||||||
|
let Some(ref_path) = ref_path else { return result };
|
||||||
|
let ref_file = ref_path.join(format!("{}.syntax", test.name));
|
||||||
|
match std::fs::read_to_string(&ref_file) {
|
||||||
|
Ok(ref_tree) => {
|
||||||
|
if tree != ref_tree {
|
||||||
|
result.errors = "differs".to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
result.errors = format!("missing reference: {}", ref_file.display());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
29
tests/suite/math/symbols.typ
Normal file
29
tests/suite/math/symbols.typ
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
// Test math symbol edge cases.
|
||||||
|
|
||||||
|
--- math-symbol-basic ---
|
||||||
|
#let sym = symbol("s", ("basic", "s"))
|
||||||
|
#test($sym.basic$, $#"s"$)
|
||||||
|
|
||||||
|
--- math-symbol-underscore ---
|
||||||
|
#let sym = symbol("s", ("test_underscore", "s"))
|
||||||
|
// Error: 6-10 unknown symbol modifier
|
||||||
|
$sym.test_underscore$
|
||||||
|
|
||||||
|
--- math-symbol-dash ---
|
||||||
|
#let sym = symbol("s", ("test-dash", "s"))
|
||||||
|
// Error: 6-10 unknown symbol modifier
|
||||||
|
$sym.test-dash$
|
||||||
|
|
||||||
|
--- math-symbol-double ---
|
||||||
|
#let sym = symbol("s", ("test.basic", "s"))
|
||||||
|
#test($sym.test.basic$, $#"s"$)
|
||||||
|
|
||||||
|
--- math-symbol-double-underscore ---
|
||||||
|
#let sym = symbol("s", ("one.test_underscore", "s"))
|
||||||
|
// Error: 10-14 unknown symbol modifier
|
||||||
|
$sym.one.test_underscore$
|
||||||
|
|
||||||
|
--- math-symbol-double-dash ---
|
||||||
|
#let sym = symbol("s", ("one.test-dash", "s"))
|
||||||
|
// Error: 10-14 unknown symbol modifier
|
||||||
|
$sym.one.test-dash$
|
@ -38,7 +38,7 @@ multiline.
|
|||||||
--- heading-trailing-whitespace ---
|
--- heading-trailing-whitespace ---
|
||||||
// Whether headings contain trailing whitespace with or without comments/labels.
|
// Whether headings contain trailing whitespace with or without comments/labels.
|
||||||
// Labels are special cased to immediately end headings in the parser, but also
|
// Labels are special cased to immediately end headings in the parser, but also
|
||||||
// have unique whitespace behavior.
|
// #strike[have unique whitespace behavior] Now their behavior is consistent!
|
||||||
|
|
||||||
#let join(..xs) = xs.pos().join()
|
#let join(..xs) = xs.pos().join()
|
||||||
#let head(h) = heading(depth: 1, h)
|
#let head(h) = heading(depth: 1, h)
|
||||||
@ -49,19 +49,20 @@ multiline.
|
|||||||
#test(head[h], [= h<a>])
|
#test(head[h], [= h<a>])
|
||||||
#test(head[h], [= h/**/<b>])
|
#test(head[h], [= h/**/<b>])
|
||||||
|
|
||||||
// Label behaves differently than normal trailing space and comment.
|
// #strike[Label behaves differently than normal trailing space and comment.]
|
||||||
#test(head(join[h][ ]), [= h ])
|
// Now they behave the same!
|
||||||
#test(head(join[h][ ]), [= h /**/])
|
#test(join(head[h])[ ], [= h ])
|
||||||
|
#test(join(head[h])[ ], [= h /**/])
|
||||||
#test(join(head[h])[ ], [= h <c>])
|
#test(join(head[h])[ ], [= h <c>])
|
||||||
|
|
||||||
// Combinations.
|
// Combinations.
|
||||||
#test(head(join[h][ ][ ]), [= h /**/ ])
|
#test(join(head[h])[ ][ ], [= h /**/ ])
|
||||||
#test(join(head[h])[ ][ ], [= h <d> ])
|
#test(join(head[h])[ ][ ], [= h <d> ])
|
||||||
#test(head(join[h][ ]), [= h /**/<e>])
|
#test(join(head[h])[ ], [= h /**/<e>])
|
||||||
#test(join(head[h])[ ], [= h/**/ <f>])
|
#test(join(head[h])[ ], [= h/**/ <f>])
|
||||||
|
|
||||||
// The first space attaches, but not the second
|
// #strike[The first space attaches, but not the second] Now neither attaches!
|
||||||
#test(join(head(join[h][ ]))[ ], [= h /**/ <g>])
|
#test(join(head(join[h]))[ ][ ], [= h /**/ <g>])
|
||||||
|
|
||||||
--- heading-leading-whitespace ---
|
--- heading-leading-whitespace ---
|
||||||
// Test that leading whitespace and comments don't matter.
|
// Test that leading whitespace and comments don't matter.
|
||||||
|
@ -34,6 +34,51 @@ _Shopping list_
|
|||||||
- C
|
- C
|
||||||
- D
|
- D
|
||||||
|
|
||||||
|
--- list-indent-trivia-nesting ---
|
||||||
|
// Test indent nesting behavior with odd trivia (comments and spaces).
|
||||||
|
|
||||||
|
#let indented = [
|
||||||
|
- a
|
||||||
|
/**/- b
|
||||||
|
/**/ - c
|
||||||
|
/*spanning
|
||||||
|
multiple
|
||||||
|
lines */ - d
|
||||||
|
- e
|
||||||
|
/**/ - f
|
||||||
|
/**/ - g
|
||||||
|
]
|
||||||
|
// Current behavior is that list columns are based on the first non-whitespace
|
||||||
|
// element in their line, so the block comments here determine the column the
|
||||||
|
// list starts at
|
||||||
|
|
||||||
|
#let item = list.item
|
||||||
|
#let manual = {
|
||||||
|
[ ]
|
||||||
|
item({
|
||||||
|
[a]
|
||||||
|
[ ]
|
||||||
|
item[b]
|
||||||
|
[ ]; [ ]
|
||||||
|
item({
|
||||||
|
[c]
|
||||||
|
[ ]; [ ]
|
||||||
|
item[d]
|
||||||
|
})
|
||||||
|
[ ]
|
||||||
|
item({
|
||||||
|
[e]
|
||||||
|
[ ]; [ ]
|
||||||
|
item[f]
|
||||||
|
[ ]; [ ]
|
||||||
|
item[g]
|
||||||
|
})
|
||||||
|
})
|
||||||
|
[ ]
|
||||||
|
}
|
||||||
|
|
||||||
|
#test(indented, manual)
|
||||||
|
|
||||||
--- list-tabs ---
|
--- list-tabs ---
|
||||||
// This works because tabs are used consistently.
|
// This works because tabs are used consistently.
|
||||||
- A with 1 tab
|
- A with 1 tab
|
||||||
|
@ -135,6 +135,9 @@
|
|||||||
// Error: 2-3 unexpected closing brace
|
// Error: 2-3 unexpected closing brace
|
||||||
#}
|
#}
|
||||||
|
|
||||||
|
--- single-right-bracket ---
|
||||||
|
]
|
||||||
|
|
||||||
--- content-block-in-markup-scope ---
|
--- content-block-in-markup-scope ---
|
||||||
// Content blocks also create a scope.
|
// Content blocks also create a scope.
|
||||||
#[#let x = 1]
|
#[#let x = 1]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user