Better math argument parsing (#5008)

This commit is contained in:
Max 2025-01-09 10:49:06 +00:00 committed by GitHub
parent e2b37fef33
commit be6629c7cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 308 additions and 80 deletions

View File

@ -685,8 +685,7 @@ mod tests {
// Named-params.
test(s, "$ foo(bar: y) $", &["foo"]);
// This should be updated when we improve named-param parsing:
test(s, "$ foo(x-y: 1, bar-z: 2) $", &["bar", "foo"]);
test(s, "$ foo(x-y: 1, bar-z: 2) $", &["foo"]);
// Field access in math.
test(s, "$ foo.bar $", &["foo"]);

View File

@ -82,8 +82,9 @@ use crate::text::TextElem;
/// - Within them, Typst is still in "math mode". Thus, you can write math
/// directly into them, but need to use hash syntax to pass code expressions
/// (except for strings, which are available in the math syntax).
/// - They support positional and named arguments, but don't support trailing
/// content blocks and argument spreading.
/// - They support positional and named arguments, as well as argument
/// spreading.
/// - They don't support trailing content blocks.
/// - They provide additional syntax for 2-dimensional argument lists. The
/// semicolon (`;`) merges preceding arguments separated by commas into an
/// array argument.
@ -92,6 +93,7 @@ use crate::text::TextElem;
/// $ frac(a^2, 2) $
/// $ vec(1, 2, delim: "[") $
/// $ mat(1, 2; 3, 4) $
/// $ mat(..#range(1, 5).chunks(2)) $
/// $ lim_x =
/// op("lim", limits: #true)_x $
/// ```

View File

@ -616,6 +616,11 @@ impl Lexer<'_> {
'~' if self.s.eat_if('>') => SyntaxKind::MathShorthand,
'*' | '-' | '~' => SyntaxKind::MathShorthand,
'.' => SyntaxKind::Dot,
',' => SyntaxKind::Comma,
';' => SyntaxKind::Semicolon,
')' => SyntaxKind::RightParen,
'#' => SyntaxKind::Hash,
'_' => SyntaxKind::Underscore,
'$' => SyntaxKind::Dollar,
@ -685,6 +690,45 @@ impl Lexer<'_> {
}
SyntaxKind::Text
}
/// Handle named arguments in math function call.
pub fn maybe_math_named_arg(&mut self, start: usize) -> Option<SyntaxNode> {
let cursor = self.s.cursor();
self.s.jump(start);
if self.s.eat_if(is_id_start) {
self.s.eat_while(is_id_continue);
// Check that a colon directly follows the identifier, and not the
// `:=` or `::=` math shorthands.
if self.s.at(':') && !self.s.at(":=") && !self.s.at("::=") {
// Check that the identifier is not just `_`.
let node = if self.s.from(start) != "_" {
SyntaxNode::leaf(SyntaxKind::Ident, self.s.from(start))
} else {
let msg = SyntaxError::new("expected identifier, found underscore");
SyntaxNode::error(msg, self.s.from(start))
};
return Some(node);
}
}
self.s.jump(cursor);
None
}
/// Handle spread arguments in math function call.
pub fn maybe_math_spread_arg(&mut self, start: usize) -> Option<SyntaxNode> {
let cursor = self.s.cursor();
self.s.jump(start);
if self.s.eat_if("..") {
// Check that neither a space nor a dot follows the spread syntax.
// A dot would clash with the `...` math shorthand.
if !self.space_or_end() && !self.s.at('.') {
let node = SyntaxNode::leaf(SyntaxKind::Dots, self.s.from(start));
return Some(node);
}
}
self.s.jump(cursor);
None
}
}
/// Code.

View File

@ -217,16 +217,20 @@ fn math(p: &mut Parser, stop_set: SyntaxSet) {
p.wrap(m, SyntaxKind::Math);
}
/// Parses a sequence of math expressions.
fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) {
/// Parses a sequence of math expressions. Returns the number of expressions
/// parsed.
fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) -> usize {
debug_assert!(stop_set.contains(SyntaxKind::End));
let mut count = 0;
while !p.at_set(stop_set) {
if p.at_set(set::MATH_EXPR) {
math_expr(p);
count += 1;
} else {
p.unexpected();
}
}
count
}
/// Parses a single math expression: This includes math elements like
@ -254,6 +258,13 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
}
}
SyntaxKind::Dot
| SyntaxKind::Comma
| SyntaxKind::Semicolon
| SyntaxKind::RightParen => {
p.convert_and_eat(SyntaxKind::Text);
}
SyntaxKind::Text | SyntaxKind::MathShorthand => {
continuable = matches!(
math_class(p.current_text()),
@ -398,7 +409,13 @@ fn math_delimited(p: &mut Parser) {
while !p.at_set(syntax_set!(Dollar, End)) {
if math_class(p.current_text()) == Some(MathClass::Closing) {
p.wrap(m2, SyntaxKind::Math);
p.eat();
// We could be at the shorthand `|]`, which shouldn't be converted
// to a `Text` kind.
if p.at(SyntaxKind::RightParen) {
p.convert_and_eat(SyntaxKind::Text);
} else {
p.eat();
}
p.wrap(m, SyntaxKind::MathDelimited);
return;
}
@ -455,94 +472,90 @@ fn math_args(p: &mut Parser) {
let m = p.marker();
p.convert_and_eat(SyntaxKind::LeftParen);
let mut namable = true;
let mut named = None;
let mut positional = true;
let mut has_arrays = false;
let mut array = p.marker();
let mut arg = p.marker();
// The number of math expressions per argument.
let mut count = 0;
while !p.at_set(syntax_set!(Dollar, End)) {
if namable
&& (p.at(SyntaxKind::MathIdent) || p.at(SyntaxKind::Text))
&& p.text[p.current_end()..].starts_with(':')
{
p.convert_and_eat(SyntaxKind::Ident);
p.convert_and_eat(SyntaxKind::Colon);
named = Some(arg);
arg = p.marker();
array = p.marker();
}
let mut maybe_array_start = p.marker();
let mut seen = HashSet::new();
while !p.at_set(syntax_set!(End, Dollar, RightParen)) {
positional = math_arg(p, &mut seen);
match p.current_text() {
")" => break,
";" => {
maybe_wrap_in_math(p, arg, count, named);
p.wrap(array, SyntaxKind::Array);
p.convert_and_eat(SyntaxKind::Semicolon);
array = p.marker();
arg = p.marker();
count = 0;
namable = true;
named = None;
has_arrays = true;
continue;
}
"," => {
maybe_wrap_in_math(p, arg, count, named);
p.convert_and_eat(SyntaxKind::Comma);
arg = p.marker();
count = 0;
namable = true;
if named.is_some() {
array = p.marker();
named = None;
match p.current() {
SyntaxKind::Comma => {
p.eat();
if !positional {
maybe_array_start = p.marker();
}
continue;
}
_ => {}
}
SyntaxKind::Semicolon => {
if !positional {
maybe_array_start = p.marker();
}
if p.at_set(set::MATH_EXPR) {
math_expr(p);
count += 1;
} else {
p.unexpected();
}
namable = false;
}
if arg != p.marker() {
maybe_wrap_in_math(p, arg, count, named);
if named.is_some() {
array = p.marker();
// Parses an array: `a, b, c;`.
// The semicolon merges preceding arguments separated by commas
// into an array argument.
p.wrap(maybe_array_start, SyntaxKind::Array);
p.eat();
maybe_array_start = p.marker();
has_arrays = true;
}
SyntaxKind::End | SyntaxKind::Dollar | SyntaxKind::RightParen => {}
_ => p.expected("comma or semicolon"),
}
}
if has_arrays && array != p.marker() {
p.wrap(array, SyntaxKind::Array);
}
if p.at(SyntaxKind::Text) && p.current_text() == ")" {
p.convert_and_eat(SyntaxKind::RightParen);
} else {
p.expected("closing paren");
p.balanced = false;
// Check if we need to wrap the preceding arguments in an array.
if maybe_array_start != p.marker() && has_arrays && positional {
p.wrap(maybe_array_start, SyntaxKind::Array);
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
p.wrap(m, SyntaxKind::Args);
}
/// Wrap math function arguments to join adjacent math content or create an
/// empty 'Math' node for when we have 0 args.
/// Parses a single argument in a math argument list.
///
/// We don't wrap when `count == 1`, since wrapping would change the type of the
/// expression from potentially non-content to content. Ex: `$ func(#12pt) $`
/// would change the type from size to content if wrapped.
fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, count: usize, named: Option<Marker>) {
/// Returns whether the parsed argument was positional or not.
fn math_arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) -> bool {
let m = p.marker();
let start = p.current_start();
if p.at(SyntaxKind::Dot) {
// Parses a spread argument: `..args`.
if let Some(spread) = p.lexer.maybe_math_spread_arg(start) {
p.token.node = spread;
p.eat();
math_expr(p);
p.wrap(m, SyntaxKind::Spread);
return true;
}
}
let mut positional = true;
if p.at_set(syntax_set!(Text, MathIdent, Underscore)) {
// Parses a named argument: `thickness: #12pt`.
if let Some(named) = p.lexer.maybe_math_named_arg(start) {
p.token.node = named;
let text = p.current_text();
p.eat();
p.convert_and_eat(SyntaxKind::Colon);
if !seen.insert(text) {
p[m].convert_to_error(eco_format!("duplicate argument: {text}"));
}
positional = false;
}
}
// Parses a normal positional argument.
let arg = p.marker();
let count = math_exprs(p, syntax_set!(End, Dollar, Comma, Semicolon, RightParen));
if count == 0 {
// Named argument requires a value.
if !positional {
p.expected("expression");
}
// Flush trivia so that the new empty Math node will be wrapped _inside_
// any `SyntaxKind::Array` elements created in `math_args`.
// (And if we don't follow by wrapping in an array, it has no effect.)
@ -553,13 +566,19 @@ fn maybe_wrap_in_math(p: &mut Parser, arg: Marker, count: usize, named: Option<M
p.flush_trivia();
}
// Wrap math function arguments to join adjacent math content or create an
// empty 'Math' node for when we have 0 args. We don't wrap when
// `count == 1`, since wrapping would change the type of the expression
// from potentially non-content to content. Ex: `$ func(#12pt) $` would
// change the type from size to content if wrapped.
if count != 1 {
p.wrap(arg, SyntaxKind::Math);
}
if let Some(m) = named {
if !positional {
p.wrap(m, SyntaxKind::Named);
}
positional
}
/// Parses the contents of a code block.

View File

@ -59,6 +59,10 @@ pub const MATH_EXPR: SyntaxSet = syntax_set!(
Hash,
MathIdent,
FieldAccess,
Dot,
Comma,
Semicolon,
RightParen,
Text,
MathShorthand,
Linebreak,

Binary file not shown.

After

Width:  |  Height:  |  Size: 526 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 489 B

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1017 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -8,6 +8,112 @@ $ pi(a,) $
$ pi(a,b) $
$ pi(a,b,) $
--- math-call-unclosed-func ---
#let func(x) = x
// Error: 6-7 unclosed delimiter
$func(a$
--- math-call-unclosed-non-func ---
// Error: 5-6 unclosed delimiter
$sin(x$
--- math-call-named-args ---
#let func1(my: none) = my
#let func2(_my: none) = _my
#let func3(my-body: none) = my-body
#let func4(_my-body: none) = _my-body
#let func5(m: none) = m
$ func1(my: a) $
$ func2(_my: a) $
$ func3(my-body: a) $
$ func4(_my-body: a) $
$ func5(m: a) $
$ func5(m: sigma : f) $
$ func5(m: sigma:pi) $
--- math-call-named-args-no-expr ---
#let func(m: none) = m
// Error: 10 expected expression
$ func(m: ) $
--- math-call-named-args-duplicate ---
#let func(my: none) = my
// Error: 15-17 duplicate argument: my
$ func(my: a, my: b) $
--- math-call-named-args-shorthand-clash-1 ---
#let func(m: none) = m
// Error: 18-21 unexpected argument
$func(m: =) func(m:=)$
--- math-call-named-args-shorthand-clash-2 ---
#let func(m: none) = m
// Error: 41-45 unexpected argument
$func(m::) func(m: :=) func(m:: =) func(m::=)$
--- math-call-named-single-underscore ---
#let func(x) = x
// Error: 8-9 expected identifier, found underscore
$ func(_: a) $
--- math-call-named-single-char-error ---
#let func(m: none) = m
// Error: 8-13 unexpected argument
$ func(m : a) $
--- math-call-named-args-repr ---
#let args(..body) = body
#let check(it, r) = test-repr(it.body.text, r)
#check($args(_a: a)$, "arguments(_a: [a])")
#check($args(_a-b: a)$, "arguments(_a-b: [a])")
#check($args(a-b: a)$, "arguments(a-b: [a])")
#check($args(a-b-c: a)$, "arguments(a-b-c: [a])")
#check($args(a--c: a)$, "arguments(a--c: [a])")
#check($args(a: a-b)$, "arguments(a: sequence([a], [], [b]))")
#check($args(a-b: a-b)$, "arguments(a-b: sequence([a], [], [b]))")
#check($args(a-b)$, "arguments(sequence([a], [], [b]))")
--- math-call-spread-content-error ---
#let args(..body) = body
// Error: 7-16 cannot spread content
$args(..(a + b))$
--- math-call-spread-multiple-exprs ---
#let args(..body) = body
// Error: 10 expected comma or semicolon
$args(..a + b)$
--- math-call-spread-unexpected-dots ---
#let args(..body) = body
// Error: 8-10 unexpected dots
$args(#..range(1, 5).chunks(2))$
--- math-call-spread-shorthand-clash ---
#let func(body) = body
$func(...)$
--- math-call-spread-repr ---
#let args(..body) = body
#let check(it, r) = test-repr(it.body.text, r)
#check($args(..#range(0, 4).chunks(2))$, "arguments((0, 1), (2, 3))")
#check($#args(range(1, 5).chunks(2))$, "arguments(((1, 2), (3, 4)))")
#check($#args(..range(1, 5).chunks(2))$, "arguments((1, 2), (3, 4))")
#check($args(#(..range(2, 6).chunks(2)))$, "arguments(((2, 3), (4, 5)))")
#let nums = range(0, 4).chunks(2)
#check($args(..nums)$, "arguments((0, 1), (2, 3))")
#check($args(..nums;)$, "arguments(((0, 1), (2, 3)))")
#check($args(..nums, ..nums)$, "arguments((0, 1), (2, 3), (0, 1), (2, 3))")
#check($args(..nums, 4, 5)$, "arguments((0, 1), (2, 3), [4], [5])")
#check($args(..nums, ..#range(4, 6))$, "arguments((0, 1), (2, 3), 4, 5)")
#check($args(..nums, #range(4, 6))$, "arguments((0, 1), (2, 3), (4, 5))")
#check($args(..nums, 1, 2; 3, 4)$, "arguments(((0, 1), (2, 3), [1], [2]), ([3], [4]))")
#check($args(1, 2; ..nums)$, "arguments(([1], [2]), ((0, 1), (2, 3)))")
#check($args(1, 2; 3, 4)$, "arguments(([1], [2]), ([3], [4]))")
#check($args(1, 2; 3, 4; ..#range(5, 7))$, "arguments(([1], [2]), ([3], [4]), (5, 6))")
#check($args(1, 2; 3, 4, ..#range(5, 7))$, "arguments(([1], [2]), ([3], [4], 5, 6))")
#check($args(1, 2; 3, 4, ..#range(5, 7);)$, "arguments(([1], [2]), ([3], [4], 5, 6))")
#check($args(1, 2; 3, 4, ..#range(5, 7),)$, "arguments(([1], [2]), ([3], [4], 5, 6))")
--- math-call-repr ---
#let args(..body) = body
#let check(it, r) = test-repr(it.body.text, r)
@ -35,6 +141,34 @@ $ mat(#"code"; "wins") $
#check($args(a,b;c)$, "arguments(([a], [b]), ([c],))")
#check($args(a,b;c,d;e,f)$, "arguments(([a], [b]), ([c], [d]), ([e], [f]))")
--- math-call-2d-named-repr ---
#let args(..body) = (body.pos(), body.named())
#let check(it, r) = test-repr(it.body.text, r)
#check($args(a: b)$, "((), (a: [b]))")
#check($args(1, 2; 3, 4)$, "((([1], [2]), ([3], [4])), (:))")
#check($args(a: b, 1, 2; 3, 4)$, "((([1], [2]), ([3], [4])), (a: [b]))")
#check($args(1, a: b, 2; 3, 4)$, "(([1], ([2],), ([3], [4])), (a: [b]))")
#check($args(1, 2, a: b; 3, 4)$, "(([1], [2], (), ([3], [4])), (a: [b]))")
#check($args(1, 2; a: b, 3, 4)$, "((([1], [2]), ([3], [4])), (a: [b]))")
#check($args(1, 2; 3, a: b, 4)$, "((([1], [2]), [3], ([4],)), (a: [b]))")
#check($args(1, 2; 3, 4, a: b)$, "((([1], [2]), [3], [4]), (a: [b]))")
#check($args(a: b, 1, 2, 3, c: d)$, "(([1], [2], [3]), (a: [b], c: [d]))")
#check($args(1, 2, 3; a: b)$, "((([1], [2], [3]),), (a: [b]))")
#check($args(a-b: a,, e:f;; d)$, "(([], (), ([],), ([d],)), (a-b: [a], e: [f]))")
#check($args(a: b, ..#range(0, 4))$, "((0, 1, 2, 3), (a: [b]))")
--- math-call-2d-escape-repr ---
#let args(..body) = body
#let check(it, r) = test-repr(it.body.text, r)
#check($args(a\;b)$, "arguments(sequence([a], [;], [b]))")
#check($args(a\,b;c)$, "arguments((sequence([a], [,], [b]),), ([c],))")
#check($args(b\;c\,d;e)$, "arguments((sequence([b], [;], [c], [,], [d]),), ([e],))")
#check($args(a\: b)$, "arguments(sequence([a], [:], [ ], [b]))")
#check($args(a : b)$, "arguments(sequence([a], [ ], [:], [ ], [b]))")
#check($args(\..a)$, "arguments(sequence([.], [.], [a]))")
#check($args(.. a)$, "arguments(sequence([.], [.], [ ], [a]))")
#check($args(a..b)$, "arguments(sequence([a], [.], [.], [b]))")
--- math-call-2d-repr-structure ---
#let args(..body) = body
#let check(it, r) = test-repr(it.body.text, r)

View File

@ -54,6 +54,30 @@ $ a + mat(delim: #none, 1, 2; 3, 4) + b $
$ mat(1, 2; 3, 4; delim: "[") $,
)
--- math-mat-spread ---
// Test argument spreading in matrix.
$ mat(..#range(1, 5).chunks(2))
mat(#(..range(2).map(_ => range(2)))) $
#let nums = ((1,) * 5).intersperse(0).chunks(3)
$ mat(..nums, delim: "[") $
--- math-mat-spread-1d ---
$ mat(..#range(1, 5) ; 1, ..#range(2, 5))
mat(..#range(1, 3), ..#range(3, 5) ; ..#range(1, 4), 4) $
--- math-mat-spread-2d ---
#let nums = range(0, 2).map(i => (i, i+1))
$ mat(..nums, delim: "|",)
mat(..nums; delim: "|",) $
$ mat(..nums) mat(..nums;) \
mat(..nums;,) mat(..nums,) $
--- math-mat-spread-expected-array-error ---
#let nums = range(0, 2).map(i => (i, i+1))
// Error: 15-16 expected array, found content
$ mat(..nums, 0, 1) $
--- math-mat-gap ---
#set math.mat(gap: 1em)
$ mat(1, 2; 3, 4) $
@ -61,6 +85,8 @@ $ mat(1, 2; 3, 4) $
--- math-mat-gaps ---
#set math.mat(row-gap: 1em, column-gap: 2em)
$ mat(1, 2; 3, 4) $
$ mat(column-gap: #1em, 1, 2; 3, 4)
mat(row-gap: #2em, 1, 2; 3, 4) $
--- math-mat-augment ---
// Test matrix line drawing (augmentation).