Parse braced expressions and bracketed calls in headers 🗳

- Refactors the tokenizer to be lazy: It does not emit pre-parsed function tokens, but instead allows it's mode to be changed. The modes are tracked on a stack to allow nested compute/typesetting (pop/push).
- Introduces delimited groups into the parser, which make it easy to parse delimited expressions without handling the delimiters in the parsing code for the group's content. A group is started with `start_group`. When reaching the group's end (matching delimiter) the eat and peek methods will simply return `None` instead of the delimiter, stopping the content parser and bubbling up the call stack until `end_group` is called to clear up the situation.
This commit is contained in:
Laurenz 2020-08-17 16:25:09 +02:00
parent 8a80503188
commit 3cbca56a71
6 changed files with 396 additions and 388 deletions

View File

@ -1,18 +1,17 @@
use criterion::{criterion_group, criterion_main, Criterion};
use typstc::syntax::parsing::parse;
use typstc::syntax::span::Pos;
// 28 not too dense lines.
const COMA: &str = include_str!("../tests/coma.typ");
fn parsing_benchmark(c: &mut Criterion) {
c.bench_function("parse-coma-28-lines", |b| {
b.iter(|| parse(COMA, Pos::ZERO))
b.iter(|| parse(COMA))
});
let long = COMA.repeat(100);
c.bench_function("parse-coma-2800-lines", |b| {
b.iter(|| parse(&long, Pos::ZERO))
b.iter(|| parse(&long))
});
}

View File

@ -270,7 +270,7 @@ impl<V> SpannedEntry<V> {
/// Create an entry with the same span for key and value.
pub fn val(val: Spanned<V>) -> Self {
Self { key: Span::ZERO, val }
Self { key: val.span, val }
}
/// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>`

View File

@ -123,7 +123,7 @@ impl<'a> TreeLayouter<'a> {
..self.ctx
}).await;
self.feedback.extend_offset(pass.feedback, call.span.start);
self.feedback.extend(pass.feedback);
if let Value::Commands(commands) = pass.output {
for command in commands {

View File

@ -87,7 +87,7 @@ impl Typesetter {
/// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
parse(src, Pos::ZERO)
parse(src)
}
/// Layout a syntax tree and return the produced layout.

View File

@ -12,94 +12,110 @@ use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
use super::Ident;
/// Parse a string of source code.
///
/// All spans in the resulting tree and feedback are offset by the given
/// `offset` position. This is used to make spans of a function body relative to
/// the start of the function as a whole as opposed to the start of the
/// function's body.
pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
let mut tree = SyntaxTree::new();
let mut par = SyntaxTree::new();
let mut feedback = Feedback::new();
for token in Tokens::new(src, offset, TokenMode::Body) {
let span = token.span;
let node = match token.v {
// Starting from two newlines counts as a paragraph break, a single
// newline does not.
Token::Space(newlines) => if newlines < 2 {
SyntaxNode::Spacing
} else {
// End the current paragraph if it is not empty.
if let (Some(first), Some(last)) = (par.first(), par.last()) {
let span = Span::merge(first.span, last.span);
let node = SyntaxNode::Par(std::mem::take(&mut par));
tree.push(Spanned::new(node, span));
}
continue;
}
Token::Function { header, body, terminated } => {
let parsed = FuncParser::new(header, body).parse();
feedback.extend_offset(parsed.feedback, span.start);
if !terminated {
error!(@feedback, Span::at(span.end), "expected closing bracket");
}
SyntaxNode::Call(parsed.output)
}
Token::Star => SyntaxNode::ToggleBolder,
Token::Underscore => SyntaxNode::ToggleItalic,
Token::Backslash => SyntaxNode::Linebreak,
Token::Raw { raw, terminated } => {
if !terminated {
error!(@feedback, Span::at(span.end), "expected backtick");
}
SyntaxNode::Raw(unescape_raw(raw))
}
Token::Text(text) => SyntaxNode::Text(text.to_string()),
Token::LineComment(_) | Token::BlockComment(_) => continue,
unexpected => {
error!(@feedback, span, "unexpected {}", unexpected.name());
continue;
}
};
par.push(Spanned::new(node, span));
}
if let (Some(first), Some(last)) = (par.first(), par.last()) {
let span = Span::merge(first.span, last.span);
let node = SyntaxNode::Par(par);
tree.push(Spanned::new(node, span));
}
Pass::new(tree, feedback)
pub fn parse(src: &str) -> Pass<SyntaxTree> {
Parser::new(src).parse()
}
struct FuncParser<'s> {
struct Parser<'s> {
tokens: Tokens<'s>,
peeked: Option<Option<Spanned<Token<'s>>>>,
body: Option<Spanned<&'s str>>,
delimiters: Vec<(Pos, Token<'static>)>,
feedback: Feedback,
}
impl<'s> FuncParser<'s> {
fn new(header: &'s str, body: Option<Spanned<&'s str>>) -> Self {
impl<'s> Parser<'s> {
fn new(src: &'s str) -> Self {
Self {
// Start at column 1 because the opening bracket is also part of
// the function, but not part of the `header` string.
tokens: Tokens::new(header, Pos::new(0, 1), TokenMode::Header),
tokens: Tokens::new(src, TokenMode::Body),
peeked: None,
body,
delimiters: vec![],
feedback: Feedback::new(),
}
}
fn parse(mut self) -> Pass<CallExpr> {
let after_bracket = self.pos();
fn parse(mut self) -> Pass<SyntaxTree> {
let tree = self.parse_body_contents();
Pass::new(tree, self.feedback)
}
}
// Typesetting content.
impl Parser<'_> {
fn parse_body_contents(&mut self) -> SyntaxTree {
let mut tree = SyntaxTree::new();
let mut par = SyntaxTree::new();
while let Some(token) = self.peek() {
par.push(match token.v {
// Starting from two newlines counts as a paragraph break, a single
// newline does not.
Token::Space(newlines) => if newlines < 2 {
self.with_span(SyntaxNode::Spacing)
} else {
// End the current paragraph if it is not empty.
if let (Some(first), Some(last)) = (par.first(), par.last()) {
let span = Span::merge(first.span, last.span);
let node = SyntaxNode::Par(std::mem::take(&mut par));
tree.push(Spanned::new(node, span));
}
self.eat();
continue;
}
Token::LineComment(_) | Token::BlockComment(_) => {
self.eat();
continue
}
Token::LeftBracket => {
self.parse_bracket_call().map(|c| SyntaxNode::Call(c))
}
Token::Star => self.with_span(SyntaxNode::ToggleBolder),
Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
Token::Backslash => self.with_span(SyntaxNode::Linebreak),
Token::Raw { raw, terminated } => {
if !terminated {
error!(
@self.feedback, Span::at(token.span.end),
"expected backtick",
);
}
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
}
Token::Text(text) => {
self.with_span(SyntaxNode::Text(text.to_string()))
}
unexpected => {
self.eat();
error!(
@self.feedback, token.span,
"unexpected {}", unexpected.name(),
);
continue;
}
});
}
if let (Some(first), Some(last)) = (par.first(), par.last()) {
let span = Span::merge(first.span, last.span);
let node = SyntaxNode::Par(par);
tree.push(Spanned::new(node, span));
}
tree
}
}
// Function calls.
impl Parser<'_> {
fn parse_bracket_call(&mut self) -> Spanned<CallExpr> {
self.start_group(Delimiter::Bracket);
self.tokens.push_mode(TokenMode::Header);
let after_bracket = self.pos();
self.skip_white();
let name = self.parse_ident().unwrap_or_else(|| {
self.expected_found_or_at("function name", after_bracket);
@ -107,36 +123,105 @@ impl<'s> FuncParser<'s> {
});
self.skip_white();
let mut args = match self.eat().map(Spanned::value) {
Some(Token::Colon) => self.parse_table(false).0.v,
let mut args = match self.eatv() {
Some(Token::Colon) => self.parse_table_contents().0,
Some(_) => {
self.expected_at("colon", name.span.end);
while self.eat().is_some() {}
TableExpr::new()
}
None => TableExpr::new(),
};
if let Some(body) = self.body {
args.push(SpannedEntry::val(body.map(|src| {
let parsed = parse(src, body.span.start);
self.feedback.extend(parsed.feedback);
Expr::Tree(parsed.output)
})));
self.tokens.pop_mode();
let mut span = self.end_group();
if self.check(Token::LeftBracket) {
self.start_group(Delimiter::Bracket);
self.tokens.push_mode(TokenMode::Body);
let body = self.parse_body_contents();
self.tokens.pop_mode();
let body_span = self.end_group();
let expr = Expr::Tree(body);
args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
span.expand(body_span);
}
Pass::new(CallExpr { name, args }, self.feedback)
Spanned::new(CallExpr { name, args }, span)
}
fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
self.start_group(Delimiter::Paren);
let args = self.parse_table_contents().0;
let args_span = self.end_group();
let span = Span::merge(name.span, args_span);
Spanned::new(CallExpr { name, args }, span)
}
}
// Parsing expressions and values
impl FuncParser<'_> {
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
self.peek().and_then(|token| match token.v {
Token::Ident(id) => self.eat_span(Ident(id.to_string())),
_ => None,
})
}
// Tables.
impl Parser<'_> {
fn parse_table_contents(&mut self) -> (TableExpr, bool) {
let mut table = TableExpr::new();
let mut comma_and_keyless = true;
while { self.skip_white(); !self.eof() } {
let (key, val) = if let Some(ident) = self.parse_ident() {
self.skip_white();
match self.peekv() {
Some(Token::Equals) => {
self.eat();
self.skip_white();
(Some(ident), try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
}))
}
Some(Token::LeftParen) => {
let call = self.parse_paren_call(ident);
(None, call.map(|c| Expr::Call(c)))
}
_ => (None, ident.map(|id| Expr::Ident(id)))
}
} else {
(None, try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
}))
};
let behind = val.span.end;
if let Some(key) = key {
comma_and_keyless = false;
table.insert(key.v.0, SpannedEntry::new(key.span, val));
self.feedback.decorations
.push(Spanned::new(Decoration::TableKey, key.span));
} else {
table.push(SpannedEntry::val(val));
}
if { self.skip_white(); self.eof() } {
break;
}
self.expect_at(Token::Comma, behind);
comma_and_keyless = false;
}
let coercable = comma_and_keyless && !table.is_empty();
(table, coercable)
}
}
// Expressions and values.
impl Parser<'_> {
fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
self.parse_binops("summand", Self::parse_term, |token| match token {
Token::Plus => Some(Expr::Add),
@ -206,37 +291,37 @@ impl FuncParser<'_> {
fn parse_value(&mut self) -> Option<Spanned<Expr>> {
let Spanned { v: token, span } = self.peek()?;
match token {
Some(match token {
// This could be a function call or an identifier.
Token::Ident(id) => {
let name = Spanned::new(Ident(id.to_string()), span);
self.eat();
self.skip_white();
Some(if self.check(Token::LeftParen) {
self.parse_func_call(name).map(|call| Expr::Call(call))
if self.check(Token::LeftParen) {
self.parse_paren_call(name).map(|call| Expr::Call(call))
} else {
name.map(|id| Expr::Ident(id))
})
}
}
Token::Str { string, terminated } => {
if !terminated {
self.expected_at("quote", span.end);
}
self.eat_span(Expr::Str(unescape_string(string)))
self.with_span(Expr::Str(unescape_string(string)))
}
Token::Bool(b) => self.eat_span(Expr::Bool(b)),
Token::Number(n) => self.eat_span(Expr::Number(n)),
Token::Length(s) => self.eat_span(Expr::Length(s)),
Token::Bool(b) => self.with_span(Expr::Bool(b)),
Token::Number(n) => self.with_span(Expr::Number(n)),
Token::Length(s) => self.with_span(Expr::Length(s)),
Token::Hex(s) => {
if let Ok(color) = RgbaColor::from_str(s) {
self.eat_span(Expr::Color(color))
self.with_span(Expr::Color(color))
} else {
// Heal color by assuming black.
error!(@self.feedback, span, "invalid color");
let healed = RgbaColor::new_healed(0, 0, 0, 255);
self.eat_span(Expr::Color(healed))
self.with_span(Expr::Color(healed))
}
}
@ -244,128 +329,54 @@ impl FuncParser<'_> {
// a table in any case and coerce the table into a value if it is
// coercable (length 1 and no trailing comma).
Token::LeftParen => {
let (table, coercable) = self.parse_table(true);
Some(if coercable {
table.map(|v| {
v.into_values()
.next()
.expect("table is coercable").val.v
})
self.start_group(Delimiter::Paren);
let (table, coercable) = self.parse_table_contents();
let span = self.end_group();
let expr = if coercable {
table.into_values()
.next()
.expect("table is coercable").val.v
} else {
table.map(|tab| Expr::Table(tab))
})
Expr::Table(table)
};
Spanned::new(expr, span)
}
// This is a content expression.
Token::LeftBrace => {
self.start_group(Delimiter::Brace);
self.tokens.push_mode(TokenMode::Body);
let tree = self.parse_body_contents();
self.tokens.pop_mode();
let span = self.end_group();
Spanned::new(Expr::Tree(tree), span)
}
// This is a bracketed function call.
Token::LeftBracket => {
let call = self.parse_bracket_call();
let tree = vec![call.map(|c| SyntaxNode::Call(c))];
Spanned::new(Expr::Tree(tree), span)
}
_ => return None,
})
}
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
self.peek().and_then(|token| match token.v {
Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
_ => None,
}
}
fn parse_func_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
let args = self.parse_table(true).0;
let span = Span::merge(name.span, args.span);
Spanned::new(CallExpr { name, args: args.v }, span)
}
/// Set `parens` to true, when this should expect an opening paren and stop
/// at the balanced closing paren (this is the case for normal tables and
/// round-paren function calls). Set it to false, when this is used to parse
/// the top-level function arguments.
///
/// The returned boolean tells you whether the table can be coerced into an
/// expression (this is the case when it's length 1 and has no trailing
/// comma).
fn parse_table(&mut self, parens: bool) -> (Spanned<TableExpr>, bool) {
let start = self.pos();
if parens {
self.assert(Token::LeftParen);
}
let mut table = TableExpr::new();
let mut coercable = true;
loop {
self.skip_white();
if self.eof() || (parens && self.check(Token::RightParen)) {
break;
}
let behind_arg;
if let Some(ident) = self.parse_ident() {
// This could be a keyword argument, a function call or a simple
// identifier.
self.skip_white();
if self.check_eat(Token::Equals).is_some() {
self.skip_white();
let key = ident;
self.feedback.decorations
.push(Spanned::new(Decoration::TableKey, key.span));
let val = try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
});
coercable = false;
behind_arg = val.span.end;
table.insert(key.v.0, SpannedEntry::new(key.span, val));
} else if self.check(Token::LeftParen) {
let call = self.parse_func_call(ident);
let expr = call.map(|call| Expr::Call(call));
behind_arg = expr.span.end;
table.push(SpannedEntry::val(expr));
} else {
let expr = ident.map(|id| Expr::Ident(id));
behind_arg = expr.span.end;
table.push(SpannedEntry::val(expr));
}
} else {
// It's a positional argument.
let expr = try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
});
behind_arg = expr.span.end;
table.push(SpannedEntry::val(expr));
}
self.skip_white();
if self.eof() || (parens && self.check(Token::RightParen)) {
break;
}
self.expect_at(Token::Comma, behind_arg);
coercable = false;
}
if parens {
self.expect(Token::RightParen);
}
coercable = coercable && !table.is_empty();
let end = self.pos();
(Spanned::new(table, Span::new(start, end)), coercable)
})
}
}
// Error handling
impl FuncParser<'_> {
fn expect(&mut self, token: Token<'_>) -> bool {
if self.check(token) {
self.eat();
true
} else {
self.expected(token.name());
false
}
}
// Error handling.
impl Parser<'_> {
fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
if self.check(token) {
self.eat();
@ -400,40 +411,58 @@ impl FuncParser<'_> {
}
}
// Parsing primitives
impl<'s> FuncParser<'s> {
fn skip_white(&mut self) {
loop {
match self.peek().map(Spanned::value) {
Some(Token::Space(_))
| Some(Token::LineComment(_))
| Some(Token::BlockComment(_)) => { self.eat(); }
_ => break,
// Parsing primitives.
impl<'s> Parser<'s> {
fn start_group(&mut self, delimiter: Delimiter) {
let start = self.pos();
self.assert(delimiter.start());
self.delimiters.push((start, delimiter.end()));
}
fn end_group(&mut self) -> Span {
assert_eq!(self.peek(), None, "unfinished group");
let (start, end_token) = self.delimiters.pop()
.expect("group was not started");
match self.peeked.unwrap() {
Some(token) if token.v == end_token => {
self.peeked = None;
Span::new(start, token.span.end)
}
_ => {
let end = self.pos();
error!(
@self.feedback, Span::at(end),
"expected {}", end_token.name(),
);
Span::new(start, end)
}
}
}
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
self.peeked.take().unwrap_or_else(|| self.tokens.next())
fn skip_white(&mut self) {
while matches!(
self.peekv(),
Some(Token::Space(_)) |
Some(Token::LineComment(_)) |
Some(Token::BlockComment(_))
) {
self.eat();
}
}
fn eat_span<T>(&mut self, v: T) -> Option<Spanned<T>> {
self.eat().map(|spanned| spanned.map(|_| v))
fn eatv(&mut self) -> Option<Token<'s>> {
self.eat().map(Spanned::value)
}
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
let tokens = &mut self.tokens;
*self.peeked.get_or_insert_with(|| tokens.next())
fn peekv(&mut self) -> Option<Token<'s>> {
self.peek().map(Spanned::value)
}
fn assert(&mut self, token: Token<'_>) {
assert!(self.check_eat(token).is_some());
}
fn check(&mut self, token: Token<'_>) -> bool {
self.peek().map(Spanned::value) == Some(token)
}
fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
if self.check(token) {
self.eat()
@ -442,10 +471,39 @@ impl<'s> FuncParser<'s> {
}
}
fn check(&mut self, token: Token<'_>) -> bool {
self.peekv() == Some(token)
}
fn with_span<T>(&mut self, v: T) -> Spanned<T> {
let span = self.eat().expect("expected token").span;
Spanned::new(v, span)
}
fn eof(&mut self) -> bool {
self.peek().is_none()
}
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
let token = self.peek()?;
self.peeked = None;
Some(token)
}
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
let tokens = &mut self.tokens;
let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
// Check for unclosed groups.
if Delimiter::is_delimiter(token.v) {
if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
return None;
}
}
Some(token)
}
fn pos(&self) -> Pos {
self.peeked
.flatten()
@ -454,6 +512,38 @@ impl<'s> FuncParser<'s> {
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum Delimiter {
Paren,
Bracket,
Brace,
}
impl Delimiter {
fn is_delimiter(token: Token<'_>) -> bool {
matches!(
token,
Token::RightParen | Token::RightBracket | Token::RightBrace
)
}
fn start(self) -> Token<'static> {
match self {
Self::Paren => Token::LeftParen,
Self::Bracket => Token::LeftBracket,
Self::Brace => Token::LeftBrace,
}
}
fn end(self) -> Token<'static> {
match self {
Self::Paren => Token::RightParen,
Self::Bracket => Token::RightBracket,
Self::Brace => Token::RightBrace,
}
}
}
fn unescape_string(string: &str) -> String {
let mut iter = string.chars();
let mut out = String::with_capacity(string.len());
@ -608,7 +698,7 @@ mod tests {
macro_rules! test {
(@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
let exp = Tree![@$($tts)*];
let pass = parse($src, Pos::ZERO);
let pass = parse($src);
check($src, exp, pass.output, $spans);
};
}
@ -624,7 +714,7 @@ mod tests {
macro_rules! e {
($src:expr => $($tts:tt)*) => {
let exp = vec![$($tts)*];
let pass = parse($src, Pos::ZERO);
let pass = parse($src);
let found = pass.feedback.diagnostics.iter()
.map(|s| s.as_ref().map(|e| e.message.as_str()))
.collect::<Vec<_>>();
@ -636,7 +726,7 @@ mod tests {
macro_rules! d {
($src:expr => $($tts:tt)*) => {
let exp = vec![$($tts)*];
let pass = parse($src, Pos::ZERO);
let pass = parse($src);
check($src, exp, pass.feedback.decorations, true);
};
}
@ -717,6 +807,15 @@ mod tests {
e!("[val : 12, /* \n */ 14]" => );
}
#[test]
fn test_parse_groups() {
e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
s(0,2, 0,2, "expected closing bracket"));
e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
s(0,5, 0,6, "unexpected closing brace"));
}
#[test]
fn test_parse_function_names() {
// No closing bracket.
@ -760,19 +859,29 @@ mod tests {
t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]);
e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
// Raw in body.
t!("[val][`Hi]`" => P![F!("val"; Tree![P![R!["Hi]"]]])]);
e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
// Crazy.
t!("[v][[v][v][v]]" => P![F!("v"; Tree![P![
F!("v"; Tree![P![T("v")]]), F!("v")
]])]);
// Spanned.
ts!(" [box][Oh my]" => s(0,0, 0,13, P![
s(0,0, 0,1, S),
s(0,1, 0,13, F!(s(0,1, 0,4, "box");
s(0,6, 0,11, Tree![s(0,6, 0,11, P![
s(0,6, 0,8, T("Oh")), s(0,8, 0,9, S), s(0,9, 0,11, T("my"))
s(0,1, 0,13, F!(s(0,2, 0,5, "box");
s(0,6, 0,13, Tree![s(0,7, 0,12, P![
s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
])])
))
]));
}
#[test]
fn test_parse_simple_values() {
fn test_parse_values() {
// Simple.
v!("_" => Id("_"));
v!("name" => Id("name"));
v!("α" => Id("α"));
@ -787,6 +896,12 @@ mod tests {
v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
// Content.
v!("{_hi_}" => Tree![P![I, T("hi"), I]]);
e!("[val: {_hi_}]" => );
v!("[hi]" => Tree![F!["hi"]]);
e!("[val: [hi]]" => );
// Healed colors.
v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
@ -925,7 +1040,7 @@ mod tests {
v!("(\x07 abc,)" => Table![Id("abc")]);
e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
e!("[val: [hi]]" => s(0,6, 0,10, "expected value, found function"));
e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren"));
// Expected comma.
v!("(true false)" => Table![Bool(true), Bool(false)]);

View File

@ -22,27 +22,10 @@ pub enum Token<'s> {
/// can contain nested block comments.
BlockComment(&'s str),
/// A function invocation.
Function {
/// The header string:
/// ```typst
/// [header: args][body]
/// ^^^^^^^^^^^^
/// ```
header: &'s str,
/// The spanned body string:
/// ```typst
/// [header][hello *world*]
/// ^^^^^^^^^^^^^
/// ^-- The span is relative to right before this bracket
/// ```
body: Option<Spanned<&'s str>>,
/// Whether the last closing bracket was present.
/// - `[func]` or `[func][body]` => terminated
/// - `[func` or `[func][body` => not terminated
terminated: bool,
},
/// A left bracket starting a function invocation or body: `[`.
LeftBracket,
/// A right bracket ending a function invocation or body: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
@ -119,7 +102,8 @@ impl<'s> Token<'s> {
Space(_) => "space",
LineComment(_) => "line comment",
BlockComment(_) => "block comment",
Function { .. } => "function",
LeftBracket => "opening bracket",
RightBracket => "closing bracket",
LeftParen => "opening paren",
RightParen => "closing paren",
LeftBrace => "opening brace",
@ -141,7 +125,6 @@ impl<'s> Token<'s> {
Backslash => "backslash",
Raw { .. } => "raw text",
Text(_) => "text",
Invalid("]") => "closing bracket",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
}
@ -152,8 +135,9 @@ impl<'s> Token<'s> {
#[derive(Debug)]
pub struct Tokens<'s> {
src: &'s str,
mode: TokenMode,
iter: Peekable<Chars<'s>>,
mode: TokenMode,
stack: Vec<TokenMode>,
pos: Pos,
index: usize,
}
@ -172,16 +156,29 @@ impl<'s> Tokens<'s> {
///
/// The first token's span starts an the given `offset` position instead of
/// the zero position.
pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self {
pub fn new(src: &'s str, mode: TokenMode) -> Self {
Self {
src,
mode,
iter: src.chars().peekable(),
pos: offset,
mode,
stack: vec![],
pos: Pos::ZERO,
index: 0,
}
}
/// Change the token mode and push the old one on a stack.
pub fn push_mode(&mut self, mode: TokenMode) {
self.stack.push(self.mode);
self.mode = mode;
}
/// Pop the old token mode from the stack. This panics if there is no mode
/// on the stack.
pub fn pop_mode(&mut self) {
self.mode = self.stack.pop().expect("no pushed mode");
}
/// The index in the string at which the last token ends and next token will
/// start.
pub fn index(&self) -> usize {
@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> {
// Whitespace.
c if c.is_whitespace() => self.read_whitespace(start),
// Functions.
'[' => self.read_function(start),
']' => Invalid("]"),
// Functions and blocks.
'[' => LeftBracket,
']' => RightBracket,
'{' => LeftBrace,
'}' => RightBrace,
// Syntactic elements in function headers.
'(' if self.mode == Header => LeftParen,
')' if self.mode == Header => RightParen,
'{' if self.mode == Header => LeftBrace,
'}' if self.mode == Header => RightBrace,
':' if self.mode == Header => Colon,
',' if self.mode == Header => Comma,
'=' if self.mode == Header => Equals,
@ -322,52 +319,6 @@ impl<'s> Tokens<'s> {
Space(end.line - start.line)
}
fn read_function(&mut self, start: Pos) -> Token<'s> {
let (header, terminated) = self.read_function_part(Header);
self.eat();
if self.peek() != Some('[') {
return Function { header, body: None, terminated };
}
self.eat();
let body_start = self.pos() - start;
let (body, terminated) = self.read_function_part(Body);
let body_end = self.pos() - start;
let span = Span::new(body_start, body_end);
self.eat();
Function { header, body: Some(Spanned { v: body, span }), terminated }
}
fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) {
let start = self.index();
let mut terminated = false;
while let Some(n) = self.peek() {
if n == ']' {
terminated = true;
break;
}
self.eat();
match n {
'[' => { self.read_function(Pos::ZERO); }
'/' if self.peek() == Some('/') => { self.read_line_comment(); }
'/' if self.peek() == Some('*') => { self.read_block_comment(); }
'"' if mode == Header => { self.read_string(); }
'`' if mode == Body => { self.read_raw(); }
'\\' => { self.eat(); }
_ => {}
}
}
let end = self.index();
(&self.src[start..end], terminated)
}
fn read_string(&mut self) -> Token<'s> {
let (string, terminated) = self.read_until_unescaped('"');
Str { string, terminated }
@ -540,6 +491,7 @@ mod tests {
use Token::{
Space as S,
LineComment as LC, BlockComment as BC,
LeftBracket as L, RightBracket as R,
LeftParen as LP, RightParen as RP,
LeftBrace as LB, RightBrace as RB,
Ident as Id,
@ -557,25 +509,12 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
macro_rules! F {
($h:expr, None, $t:expr) => {
Token::Function { header: $h, body: None, terminated: $t }
};
($h:expr, $b:expr, $t:expr) => {
Token::Function {
header: $h,
body: Some(Into::<Spanned<&str>>::into($b)),
terminated: $t,
}
};
}
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
macro_rules! test {
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
let found = Tokens::new($src, Pos::ZERO, $mode).collect::<Vec<_>>();
let found = Tokens::new($src, $mode).collect::<Vec<_>>();
check($src, exp, found, $spans);
}
}
@ -616,7 +555,7 @@ mod tests {
fn tokenize_body_only_tokens() {
t!(Body, "_*" => Underscore, Star);
t!(Body, "***" => Star, Star, Star);
t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star);
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
t!(Body, "`raw`" => Raw("raw", true));
t!(Body, "`[func]`" => Raw("[func]", true));
@ -674,50 +613,6 @@ mod tests {
t!(Header, "\"🌎\"" => Str("🌎", true));
}
#[test]
fn tokenize_functions() {
t!(Body, "a[f]" => T("a"), F!("f", None, true));
t!(Body, "[f]a" => F!("f", None, true), T("a"));
t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true));
t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a"));
t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true));
t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(","));
t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true));
t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true));
t!(Header, "[" => F!("", None, false));
t!(Header, "]" => Invalid("]"));
}
#[test]
fn tokenize_correct_end_of_function() {
// End of function with strings and carets in headers
t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false));
t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true));
t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true));
t!(Body, "[f: `]" => F!("f: `", None, true));
// End of function with strings and carets in bodies
t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true));
t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true));
t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false));
t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true));
t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true));
t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false));
t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true));
t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false));
t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true));
// End of function with comments
t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false));
t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true));
t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true));
t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false));
// End of function with escaped brackets
t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true));
t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true));
}
#[test]
fn tokenize_escaped_symbols() {
t!(Body, r"\\" => T(r"\"));
@ -746,7 +641,6 @@ mod tests {
fn tokenize_with_spans() {
ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star));
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));