mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Parse braced expressions and bracketed calls in headers 🗳
- Refactors the tokenizer to be lazy: It does not emit pre-parsed function tokens, but instead allows it's mode to be changed. The modes are tracked on a stack to allow nested compute/typesetting (pop/push). - Introduces delimited groups into the parser, which make it easy to parse delimited expressions without handling the delimiters in the parsing code for the group's content. A group is started with `start_group`. When reaching the group's end (matching delimiter) the eat and peek methods will simply return `None` instead of the delimiter, stopping the content parser and bubbling up the call stack until `end_group` is called to clear up the situation.
This commit is contained in:
parent
8a80503188
commit
3cbca56a71
@ -1,18 +1,17 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use typstc::syntax::parsing::parse;
|
||||
use typstc::syntax::span::Pos;
|
||||
|
||||
// 28 not too dense lines.
|
||||
const COMA: &str = include_str!("../tests/coma.typ");
|
||||
|
||||
fn parsing_benchmark(c: &mut Criterion) {
|
||||
c.bench_function("parse-coma-28-lines", |b| {
|
||||
b.iter(|| parse(COMA, Pos::ZERO))
|
||||
b.iter(|| parse(COMA))
|
||||
});
|
||||
|
||||
let long = COMA.repeat(100);
|
||||
c.bench_function("parse-coma-2800-lines", |b| {
|
||||
b.iter(|| parse(&long, Pos::ZERO))
|
||||
b.iter(|| parse(&long))
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -270,7 +270,7 @@ impl<V> SpannedEntry<V> {
|
||||
|
||||
/// Create an entry with the same span for key and value.
|
||||
pub fn val(val: Spanned<V>) -> Self {
|
||||
Self { key: Span::ZERO, val }
|
||||
Self { key: val.span, val }
|
||||
}
|
||||
|
||||
/// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>`
|
||||
|
@ -123,7 +123,7 @@ impl<'a> TreeLayouter<'a> {
|
||||
..self.ctx
|
||||
}).await;
|
||||
|
||||
self.feedback.extend_offset(pass.feedback, call.span.start);
|
||||
self.feedback.extend(pass.feedback);
|
||||
|
||||
if let Value::Commands(commands) = pass.output {
|
||||
for command in commands {
|
||||
|
@ -87,7 +87,7 @@ impl Typesetter {
|
||||
|
||||
/// Parse source code into a syntax tree.
|
||||
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
|
||||
parse(src, Pos::ZERO)
|
||||
parse(src)
|
||||
}
|
||||
|
||||
/// Layout a syntax tree and return the produced layout.
|
||||
|
@ -12,94 +12,110 @@ use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
|
||||
use super::Ident;
|
||||
|
||||
/// Parse a string of source code.
|
||||
///
|
||||
/// All spans in the resulting tree and feedback are offset by the given
|
||||
/// `offset` position. This is used to make spans of a function body relative to
|
||||
/// the start of the function as a whole as opposed to the start of the
|
||||
/// function's body.
|
||||
pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
|
||||
let mut tree = SyntaxTree::new();
|
||||
let mut par = SyntaxTree::new();
|
||||
let mut feedback = Feedback::new();
|
||||
|
||||
for token in Tokens::new(src, offset, TokenMode::Body) {
|
||||
let span = token.span;
|
||||
let node = match token.v {
|
||||
// Starting from two newlines counts as a paragraph break, a single
|
||||
// newline does not.
|
||||
Token::Space(newlines) => if newlines < 2 {
|
||||
SyntaxNode::Spacing
|
||||
} else {
|
||||
// End the current paragraph if it is not empty.
|
||||
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
||||
let span = Span::merge(first.span, last.span);
|
||||
let node = SyntaxNode::Par(std::mem::take(&mut par));
|
||||
tree.push(Spanned::new(node, span));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
Token::Function { header, body, terminated } => {
|
||||
let parsed = FuncParser::new(header, body).parse();
|
||||
feedback.extend_offset(parsed.feedback, span.start);
|
||||
if !terminated {
|
||||
error!(@feedback, Span::at(span.end), "expected closing bracket");
|
||||
}
|
||||
SyntaxNode::Call(parsed.output)
|
||||
}
|
||||
|
||||
Token::Star => SyntaxNode::ToggleBolder,
|
||||
Token::Underscore => SyntaxNode::ToggleItalic,
|
||||
Token::Backslash => SyntaxNode::Linebreak,
|
||||
Token::Raw { raw, terminated } => {
|
||||
if !terminated {
|
||||
error!(@feedback, Span::at(span.end), "expected backtick");
|
||||
}
|
||||
SyntaxNode::Raw(unescape_raw(raw))
|
||||
}
|
||||
Token::Text(text) => SyntaxNode::Text(text.to_string()),
|
||||
|
||||
Token::LineComment(_) | Token::BlockComment(_) => continue,
|
||||
unexpected => {
|
||||
error!(@feedback, span, "unexpected {}", unexpected.name());
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
par.push(Spanned::new(node, span));
|
||||
}
|
||||
|
||||
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
||||
let span = Span::merge(first.span, last.span);
|
||||
let node = SyntaxNode::Par(par);
|
||||
tree.push(Spanned::new(node, span));
|
||||
}
|
||||
|
||||
Pass::new(tree, feedback)
|
||||
pub fn parse(src: &str) -> Pass<SyntaxTree> {
|
||||
Parser::new(src).parse()
|
||||
}
|
||||
|
||||
struct FuncParser<'s> {
|
||||
struct Parser<'s> {
|
||||
tokens: Tokens<'s>,
|
||||
peeked: Option<Option<Spanned<Token<'s>>>>,
|
||||
body: Option<Spanned<&'s str>>,
|
||||
delimiters: Vec<(Pos, Token<'static>)>,
|
||||
feedback: Feedback,
|
||||
}
|
||||
|
||||
impl<'s> FuncParser<'s> {
|
||||
fn new(header: &'s str, body: Option<Spanned<&'s str>>) -> Self {
|
||||
impl<'s> Parser<'s> {
|
||||
fn new(src: &'s str) -> Self {
|
||||
Self {
|
||||
// Start at column 1 because the opening bracket is also part of
|
||||
// the function, but not part of the `header` string.
|
||||
tokens: Tokens::new(header, Pos::new(0, 1), TokenMode::Header),
|
||||
tokens: Tokens::new(src, TokenMode::Body),
|
||||
peeked: None,
|
||||
body,
|
||||
delimiters: vec![],
|
||||
feedback: Feedback::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(mut self) -> Pass<CallExpr> {
|
||||
let after_bracket = self.pos();
|
||||
fn parse(mut self) -> Pass<SyntaxTree> {
|
||||
let tree = self.parse_body_contents();
|
||||
Pass::new(tree, self.feedback)
|
||||
}
|
||||
}
|
||||
|
||||
// Typesetting content.
|
||||
impl Parser<'_> {
|
||||
fn parse_body_contents(&mut self) -> SyntaxTree {
|
||||
let mut tree = SyntaxTree::new();
|
||||
let mut par = SyntaxTree::new();
|
||||
|
||||
while let Some(token) = self.peek() {
|
||||
par.push(match token.v {
|
||||
// Starting from two newlines counts as a paragraph break, a single
|
||||
// newline does not.
|
||||
Token::Space(newlines) => if newlines < 2 {
|
||||
self.with_span(SyntaxNode::Spacing)
|
||||
} else {
|
||||
// End the current paragraph if it is not empty.
|
||||
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
||||
let span = Span::merge(first.span, last.span);
|
||||
let node = SyntaxNode::Par(std::mem::take(&mut par));
|
||||
tree.push(Spanned::new(node, span));
|
||||
}
|
||||
self.eat();
|
||||
continue;
|
||||
}
|
||||
Token::LineComment(_) | Token::BlockComment(_) => {
|
||||
self.eat();
|
||||
continue
|
||||
}
|
||||
|
||||
Token::LeftBracket => {
|
||||
self.parse_bracket_call().map(|c| SyntaxNode::Call(c))
|
||||
}
|
||||
|
||||
Token::Star => self.with_span(SyntaxNode::ToggleBolder),
|
||||
Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
|
||||
Token::Backslash => self.with_span(SyntaxNode::Linebreak),
|
||||
|
||||
Token::Raw { raw, terminated } => {
|
||||
if !terminated {
|
||||
error!(
|
||||
@self.feedback, Span::at(token.span.end),
|
||||
"expected backtick",
|
||||
);
|
||||
}
|
||||
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
|
||||
}
|
||||
|
||||
Token::Text(text) => {
|
||||
self.with_span(SyntaxNode::Text(text.to_string()))
|
||||
}
|
||||
|
||||
unexpected => {
|
||||
self.eat();
|
||||
error!(
|
||||
@self.feedback, token.span,
|
||||
"unexpected {}", unexpected.name(),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
||||
let span = Span::merge(first.span, last.span);
|
||||
let node = SyntaxNode::Par(par);
|
||||
tree.push(Spanned::new(node, span));
|
||||
}
|
||||
|
||||
tree
|
||||
}
|
||||
}
|
||||
|
||||
// Function calls.
|
||||
impl Parser<'_> {
|
||||
fn parse_bracket_call(&mut self) -> Spanned<CallExpr> {
|
||||
self.start_group(Delimiter::Bracket);
|
||||
self.tokens.push_mode(TokenMode::Header);
|
||||
|
||||
let after_bracket = self.pos();
|
||||
self.skip_white();
|
||||
let name = self.parse_ident().unwrap_or_else(|| {
|
||||
self.expected_found_or_at("function name", after_bracket);
|
||||
@ -107,36 +123,105 @@ impl<'s> FuncParser<'s> {
|
||||
});
|
||||
|
||||
self.skip_white();
|
||||
let mut args = match self.eat().map(Spanned::value) {
|
||||
Some(Token::Colon) => self.parse_table(false).0.v,
|
||||
let mut args = match self.eatv() {
|
||||
Some(Token::Colon) => self.parse_table_contents().0,
|
||||
Some(_) => {
|
||||
self.expected_at("colon", name.span.end);
|
||||
while self.eat().is_some() {}
|
||||
TableExpr::new()
|
||||
}
|
||||
None => TableExpr::new(),
|
||||
};
|
||||
|
||||
if let Some(body) = self.body {
|
||||
args.push(SpannedEntry::val(body.map(|src| {
|
||||
let parsed = parse(src, body.span.start);
|
||||
self.feedback.extend(parsed.feedback);
|
||||
Expr::Tree(parsed.output)
|
||||
})));
|
||||
self.tokens.pop_mode();
|
||||
let mut span = self.end_group();
|
||||
|
||||
if self.check(Token::LeftBracket) {
|
||||
self.start_group(Delimiter::Bracket);
|
||||
self.tokens.push_mode(TokenMode::Body);
|
||||
|
||||
let body = self.parse_body_contents();
|
||||
|
||||
self.tokens.pop_mode();
|
||||
let body_span = self.end_group();
|
||||
|
||||
let expr = Expr::Tree(body);
|
||||
args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
|
||||
span.expand(body_span);
|
||||
}
|
||||
|
||||
Pass::new(CallExpr { name, args }, self.feedback)
|
||||
Spanned::new(CallExpr { name, args }, span)
|
||||
}
|
||||
|
||||
fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
|
||||
self.start_group(Delimiter::Paren);
|
||||
let args = self.parse_table_contents().0;
|
||||
let args_span = self.end_group();
|
||||
let span = Span::merge(name.span, args_span);
|
||||
Spanned::new(CallExpr { name, args }, span)
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing expressions and values
|
||||
impl FuncParser<'_> {
|
||||
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
|
||||
self.peek().and_then(|token| match token.v {
|
||||
Token::Ident(id) => self.eat_span(Ident(id.to_string())),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
// Tables.
|
||||
impl Parser<'_> {
|
||||
fn parse_table_contents(&mut self) -> (TableExpr, bool) {
|
||||
let mut table = TableExpr::new();
|
||||
let mut comma_and_keyless = true;
|
||||
|
||||
while { self.skip_white(); !self.eof() } {
|
||||
let (key, val) = if let Some(ident) = self.parse_ident() {
|
||||
self.skip_white();
|
||||
|
||||
match self.peekv() {
|
||||
Some(Token::Equals) => {
|
||||
self.eat();
|
||||
self.skip_white();
|
||||
|
||||
(Some(ident), try_opt_or!(self.parse_expr(), {
|
||||
self.expected("value");
|
||||
continue;
|
||||
}))
|
||||
}
|
||||
|
||||
Some(Token::LeftParen) => {
|
||||
let call = self.parse_paren_call(ident);
|
||||
(None, call.map(|c| Expr::Call(c)))
|
||||
}
|
||||
|
||||
_ => (None, ident.map(|id| Expr::Ident(id)))
|
||||
}
|
||||
} else {
|
||||
(None, try_opt_or!(self.parse_expr(), {
|
||||
self.expected("value");
|
||||
continue;
|
||||
}))
|
||||
};
|
||||
|
||||
let behind = val.span.end;
|
||||
if let Some(key) = key {
|
||||
comma_and_keyless = false;
|
||||
table.insert(key.v.0, SpannedEntry::new(key.span, val));
|
||||
self.feedback.decorations
|
||||
.push(Spanned::new(Decoration::TableKey, key.span));
|
||||
} else {
|
||||
table.push(SpannedEntry::val(val));
|
||||
}
|
||||
|
||||
if { self.skip_white(); self.eof() } {
|
||||
break;
|
||||
}
|
||||
|
||||
self.expect_at(Token::Comma, behind);
|
||||
comma_and_keyless = false;
|
||||
}
|
||||
|
||||
let coercable = comma_and_keyless && !table.is_empty();
|
||||
(table, coercable)
|
||||
}
|
||||
}
|
||||
|
||||
// Expressions and values.
|
||||
impl Parser<'_> {
|
||||
fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
|
||||
self.parse_binops("summand", Self::parse_term, |token| match token {
|
||||
Token::Plus => Some(Expr::Add),
|
||||
@ -206,37 +291,37 @@ impl FuncParser<'_> {
|
||||
|
||||
fn parse_value(&mut self) -> Option<Spanned<Expr>> {
|
||||
let Spanned { v: token, span } = self.peek()?;
|
||||
match token {
|
||||
Some(match token {
|
||||
// This could be a function call or an identifier.
|
||||
Token::Ident(id) => {
|
||||
let name = Spanned::new(Ident(id.to_string()), span);
|
||||
self.eat();
|
||||
self.skip_white();
|
||||
Some(if self.check(Token::LeftParen) {
|
||||
self.parse_func_call(name).map(|call| Expr::Call(call))
|
||||
if self.check(Token::LeftParen) {
|
||||
self.parse_paren_call(name).map(|call| Expr::Call(call))
|
||||
} else {
|
||||
name.map(|id| Expr::Ident(id))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Token::Str { string, terminated } => {
|
||||
if !terminated {
|
||||
self.expected_at("quote", span.end);
|
||||
}
|
||||
self.eat_span(Expr::Str(unescape_string(string)))
|
||||
self.with_span(Expr::Str(unescape_string(string)))
|
||||
}
|
||||
|
||||
Token::Bool(b) => self.eat_span(Expr::Bool(b)),
|
||||
Token::Number(n) => self.eat_span(Expr::Number(n)),
|
||||
Token::Length(s) => self.eat_span(Expr::Length(s)),
|
||||
Token::Bool(b) => self.with_span(Expr::Bool(b)),
|
||||
Token::Number(n) => self.with_span(Expr::Number(n)),
|
||||
Token::Length(s) => self.with_span(Expr::Length(s)),
|
||||
Token::Hex(s) => {
|
||||
if let Ok(color) = RgbaColor::from_str(s) {
|
||||
self.eat_span(Expr::Color(color))
|
||||
self.with_span(Expr::Color(color))
|
||||
} else {
|
||||
// Heal color by assuming black.
|
||||
error!(@self.feedback, span, "invalid color");
|
||||
let healed = RgbaColor::new_healed(0, 0, 0, 255);
|
||||
self.eat_span(Expr::Color(healed))
|
||||
self.with_span(Expr::Color(healed))
|
||||
}
|
||||
}
|
||||
|
||||
@ -244,128 +329,54 @@ impl FuncParser<'_> {
|
||||
// a table in any case and coerce the table into a value if it is
|
||||
// coercable (length 1 and no trailing comma).
|
||||
Token::LeftParen => {
|
||||
let (table, coercable) = self.parse_table(true);
|
||||
Some(if coercable {
|
||||
table.map(|v| {
|
||||
v.into_values()
|
||||
.next()
|
||||
.expect("table is coercable").val.v
|
||||
})
|
||||
self.start_group(Delimiter::Paren);
|
||||
let (table, coercable) = self.parse_table_contents();
|
||||
let span = self.end_group();
|
||||
|
||||
let expr = if coercable {
|
||||
table.into_values()
|
||||
.next()
|
||||
.expect("table is coercable").val.v
|
||||
} else {
|
||||
table.map(|tab| Expr::Table(tab))
|
||||
})
|
||||
Expr::Table(table)
|
||||
};
|
||||
|
||||
Spanned::new(expr, span)
|
||||
}
|
||||
|
||||
// This is a content expression.
|
||||
Token::LeftBrace => {
|
||||
self.start_group(Delimiter::Brace);
|
||||
self.tokens.push_mode(TokenMode::Body);
|
||||
|
||||
let tree = self.parse_body_contents();
|
||||
|
||||
self.tokens.pop_mode();
|
||||
let span = self.end_group();
|
||||
Spanned::new(Expr::Tree(tree), span)
|
||||
}
|
||||
|
||||
// This is a bracketed function call.
|
||||
Token::LeftBracket => {
|
||||
let call = self.parse_bracket_call();
|
||||
let tree = vec![call.map(|c| SyntaxNode::Call(c))];
|
||||
Spanned::new(Expr::Tree(tree), span)
|
||||
}
|
||||
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
|
||||
self.peek().and_then(|token| match token.v {
|
||||
Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_func_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
|
||||
let args = self.parse_table(true).0;
|
||||
let span = Span::merge(name.span, args.span);
|
||||
Spanned::new(CallExpr { name, args: args.v }, span)
|
||||
}
|
||||
|
||||
/// Set `parens` to true, when this should expect an opening paren and stop
|
||||
/// at the balanced closing paren (this is the case for normal tables and
|
||||
/// round-paren function calls). Set it to false, when this is used to parse
|
||||
/// the top-level function arguments.
|
||||
///
|
||||
/// The returned boolean tells you whether the table can be coerced into an
|
||||
/// expression (this is the case when it's length 1 and has no trailing
|
||||
/// comma).
|
||||
fn parse_table(&mut self, parens: bool) -> (Spanned<TableExpr>, bool) {
|
||||
let start = self.pos();
|
||||
if parens {
|
||||
self.assert(Token::LeftParen);
|
||||
}
|
||||
|
||||
let mut table = TableExpr::new();
|
||||
let mut coercable = true;
|
||||
|
||||
loop {
|
||||
self.skip_white();
|
||||
if self.eof() || (parens && self.check(Token::RightParen)) {
|
||||
break;
|
||||
}
|
||||
|
||||
let behind_arg;
|
||||
|
||||
if let Some(ident) = self.parse_ident() {
|
||||
// This could be a keyword argument, a function call or a simple
|
||||
// identifier.
|
||||
self.skip_white();
|
||||
|
||||
if self.check_eat(Token::Equals).is_some() {
|
||||
self.skip_white();
|
||||
|
||||
let key = ident;
|
||||
self.feedback.decorations
|
||||
.push(Spanned::new(Decoration::TableKey, key.span));
|
||||
|
||||
let val = try_opt_or!(self.parse_expr(), {
|
||||
self.expected("value");
|
||||
continue;
|
||||
});
|
||||
|
||||
coercable = false;
|
||||
behind_arg = val.span.end;
|
||||
table.insert(key.v.0, SpannedEntry::new(key.span, val));
|
||||
|
||||
} else if self.check(Token::LeftParen) {
|
||||
let call = self.parse_func_call(ident);
|
||||
let expr = call.map(|call| Expr::Call(call));
|
||||
|
||||
behind_arg = expr.span.end;
|
||||
table.push(SpannedEntry::val(expr));
|
||||
} else {
|
||||
let expr = ident.map(|id| Expr::Ident(id));
|
||||
|
||||
behind_arg = expr.span.end;
|
||||
table.push(SpannedEntry::val(expr));
|
||||
}
|
||||
} else {
|
||||
// It's a positional argument.
|
||||
let expr = try_opt_or!(self.parse_expr(), {
|
||||
self.expected("value");
|
||||
continue;
|
||||
});
|
||||
behind_arg = expr.span.end;
|
||||
table.push(SpannedEntry::val(expr));
|
||||
}
|
||||
|
||||
self.skip_white();
|
||||
if self.eof() || (parens && self.check(Token::RightParen)) {
|
||||
break;
|
||||
}
|
||||
|
||||
self.expect_at(Token::Comma, behind_arg);
|
||||
coercable = false;
|
||||
}
|
||||
|
||||
if parens {
|
||||
self.expect(Token::RightParen);
|
||||
}
|
||||
|
||||
coercable = coercable && !table.is_empty();
|
||||
|
||||
let end = self.pos();
|
||||
(Spanned::new(table, Span::new(start, end)), coercable)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Error handling
|
||||
impl FuncParser<'_> {
|
||||
fn expect(&mut self, token: Token<'_>) -> bool {
|
||||
if self.check(token) {
|
||||
self.eat();
|
||||
true
|
||||
} else {
|
||||
self.expected(token.name());
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
// Error handling.
|
||||
impl Parser<'_> {
|
||||
fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
|
||||
if self.check(token) {
|
||||
self.eat();
|
||||
@ -400,40 +411,58 @@ impl FuncParser<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing primitives
|
||||
impl<'s> FuncParser<'s> {
|
||||
fn skip_white(&mut self) {
|
||||
loop {
|
||||
match self.peek().map(Spanned::value) {
|
||||
Some(Token::Space(_))
|
||||
| Some(Token::LineComment(_))
|
||||
| Some(Token::BlockComment(_)) => { self.eat(); }
|
||||
_ => break,
|
||||
// Parsing primitives.
|
||||
impl<'s> Parser<'s> {
|
||||
fn start_group(&mut self, delimiter: Delimiter) {
|
||||
let start = self.pos();
|
||||
self.assert(delimiter.start());
|
||||
self.delimiters.push((start, delimiter.end()));
|
||||
}
|
||||
|
||||
fn end_group(&mut self) -> Span {
|
||||
assert_eq!(self.peek(), None, "unfinished group");
|
||||
let (start, end_token) = self.delimiters.pop()
|
||||
.expect("group was not started");
|
||||
|
||||
match self.peeked.unwrap() {
|
||||
Some(token) if token.v == end_token => {
|
||||
self.peeked = None;
|
||||
Span::new(start, token.span.end)
|
||||
}
|
||||
_ => {
|
||||
let end = self.pos();
|
||||
error!(
|
||||
@self.feedback, Span::at(end),
|
||||
"expected {}", end_token.name(),
|
||||
);
|
||||
Span::new(start, end)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
self.peeked.take().unwrap_or_else(|| self.tokens.next())
|
||||
fn skip_white(&mut self) {
|
||||
while matches!(
|
||||
self.peekv(),
|
||||
Some(Token::Space(_)) |
|
||||
Some(Token::LineComment(_)) |
|
||||
Some(Token::BlockComment(_))
|
||||
) {
|
||||
self.eat();
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_span<T>(&mut self, v: T) -> Option<Spanned<T>> {
|
||||
self.eat().map(|spanned| spanned.map(|_| v))
|
||||
fn eatv(&mut self) -> Option<Token<'s>> {
|
||||
self.eat().map(Spanned::value)
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
let tokens = &mut self.tokens;
|
||||
*self.peeked.get_or_insert_with(|| tokens.next())
|
||||
fn peekv(&mut self) -> Option<Token<'s>> {
|
||||
self.peek().map(Spanned::value)
|
||||
}
|
||||
|
||||
fn assert(&mut self, token: Token<'_>) {
|
||||
assert!(self.check_eat(token).is_some());
|
||||
}
|
||||
|
||||
fn check(&mut self, token: Token<'_>) -> bool {
|
||||
self.peek().map(Spanned::value) == Some(token)
|
||||
}
|
||||
|
||||
fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
|
||||
if self.check(token) {
|
||||
self.eat()
|
||||
@ -442,10 +471,39 @@ impl<'s> FuncParser<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
fn check(&mut self, token: Token<'_>) -> bool {
|
||||
self.peekv() == Some(token)
|
||||
}
|
||||
|
||||
fn with_span<T>(&mut self, v: T) -> Spanned<T> {
|
||||
let span = self.eat().expect("expected token").span;
|
||||
Spanned::new(v, span)
|
||||
}
|
||||
|
||||
fn eof(&mut self) -> bool {
|
||||
self.peek().is_none()
|
||||
}
|
||||
|
||||
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
let token = self.peek()?;
|
||||
self.peeked = None;
|
||||
Some(token)
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
let tokens = &mut self.tokens;
|
||||
let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
|
||||
|
||||
// Check for unclosed groups.
|
||||
if Delimiter::is_delimiter(token.v) {
|
||||
if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(token)
|
||||
}
|
||||
|
||||
fn pos(&self) -> Pos {
|
||||
self.peeked
|
||||
.flatten()
|
||||
@ -454,6 +512,38 @@ impl<'s> FuncParser<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
enum Delimiter {
|
||||
Paren,
|
||||
Bracket,
|
||||
Brace,
|
||||
}
|
||||
|
||||
impl Delimiter {
|
||||
fn is_delimiter(token: Token<'_>) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Token::RightParen | Token::RightBracket | Token::RightBrace
|
||||
)
|
||||
}
|
||||
|
||||
fn start(self) -> Token<'static> {
|
||||
match self {
|
||||
Self::Paren => Token::LeftParen,
|
||||
Self::Bracket => Token::LeftBracket,
|
||||
Self::Brace => Token::LeftBrace,
|
||||
}
|
||||
}
|
||||
|
||||
fn end(self) -> Token<'static> {
|
||||
match self {
|
||||
Self::Paren => Token::RightParen,
|
||||
Self::Bracket => Token::RightBracket,
|
||||
Self::Brace => Token::RightBrace,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn unescape_string(string: &str) -> String {
|
||||
let mut iter = string.chars();
|
||||
let mut out = String::with_capacity(string.len());
|
||||
@ -608,7 +698,7 @@ mod tests {
|
||||
macro_rules! test {
|
||||
(@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
|
||||
let exp = Tree![@$($tts)*];
|
||||
let pass = parse($src, Pos::ZERO);
|
||||
let pass = parse($src);
|
||||
check($src, exp, pass.output, $spans);
|
||||
};
|
||||
}
|
||||
@ -624,7 +714,7 @@ mod tests {
|
||||
macro_rules! e {
|
||||
($src:expr => $($tts:tt)*) => {
|
||||
let exp = vec![$($tts)*];
|
||||
let pass = parse($src, Pos::ZERO);
|
||||
let pass = parse($src);
|
||||
let found = pass.feedback.diagnostics.iter()
|
||||
.map(|s| s.as_ref().map(|e| e.message.as_str()))
|
||||
.collect::<Vec<_>>();
|
||||
@ -636,7 +726,7 @@ mod tests {
|
||||
macro_rules! d {
|
||||
($src:expr => $($tts:tt)*) => {
|
||||
let exp = vec![$($tts)*];
|
||||
let pass = parse($src, Pos::ZERO);
|
||||
let pass = parse($src);
|
||||
check($src, exp, pass.feedback.decorations, true);
|
||||
};
|
||||
}
|
||||
@ -717,6 +807,15 @@ mod tests {
|
||||
e!("[val : 12, /* \n */ 14]" => );
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_groups() {
|
||||
e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
|
||||
s(0,2, 0,2, "expected closing bracket"));
|
||||
|
||||
e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
|
||||
s(0,5, 0,6, "unexpected closing brace"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_function_names() {
|
||||
// No closing bracket.
|
||||
@ -760,19 +859,29 @@ mod tests {
|
||||
t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]);
|
||||
e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
|
||||
|
||||
// Raw in body.
|
||||
t!("[val][`Hi]`" => P![F!("val"; Tree![P![R!["Hi]"]]])]);
|
||||
e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
|
||||
|
||||
// Crazy.
|
||||
t!("[v][[v][v][v]]" => P![F!("v"; Tree![P![
|
||||
F!("v"; Tree![P![T("v")]]), F!("v")
|
||||
]])]);
|
||||
|
||||
// Spanned.
|
||||
ts!(" [box][Oh my]" => s(0,0, 0,13, P![
|
||||
s(0,0, 0,1, S),
|
||||
s(0,1, 0,13, F!(s(0,1, 0,4, "box");
|
||||
s(0,6, 0,11, Tree![s(0,6, 0,11, P![
|
||||
s(0,6, 0,8, T("Oh")), s(0,8, 0,9, S), s(0,9, 0,11, T("my"))
|
||||
s(0,1, 0,13, F!(s(0,2, 0,5, "box");
|
||||
s(0,6, 0,13, Tree![s(0,7, 0,12, P![
|
||||
s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
|
||||
])])
|
||||
))
|
||||
]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_simple_values() {
|
||||
fn test_parse_values() {
|
||||
// Simple.
|
||||
v!("_" => Id("_"));
|
||||
v!("name" => Id("name"));
|
||||
v!("α" => Id("α"));
|
||||
@ -787,6 +896,12 @@ mod tests {
|
||||
v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
|
||||
v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
|
||||
|
||||
// Content.
|
||||
v!("{_hi_}" => Tree![P![I, T("hi"), I]]);
|
||||
e!("[val: {_hi_}]" => );
|
||||
v!("[hi]" => Tree![F!["hi"]]);
|
||||
e!("[val: [hi]]" => );
|
||||
|
||||
// Healed colors.
|
||||
v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
|
||||
e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
|
||||
@ -925,7 +1040,7 @@ mod tests {
|
||||
v!("(\x07 abc,)" => Table![Id("abc")]);
|
||||
e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
|
||||
e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
|
||||
e!("[val: [hi]]" => s(0,6, 0,10, "expected value, found function"));
|
||||
e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren"));
|
||||
|
||||
// Expected comma.
|
||||
v!("(true false)" => Table![Bool(true), Bool(false)]);
|
||||
|
@ -22,27 +22,10 @@ pub enum Token<'s> {
|
||||
/// can contain nested block comments.
|
||||
BlockComment(&'s str),
|
||||
|
||||
/// A function invocation.
|
||||
Function {
|
||||
/// The header string:
|
||||
/// ```typst
|
||||
/// [header: args][body]
|
||||
/// ^^^^^^^^^^^^
|
||||
/// ```
|
||||
header: &'s str,
|
||||
/// The spanned body string:
|
||||
/// ```typst
|
||||
/// [header][hello *world*]
|
||||
/// ^^^^^^^^^^^^^
|
||||
/// ^-- The span is relative to right before this bracket
|
||||
/// ```
|
||||
body: Option<Spanned<&'s str>>,
|
||||
/// Whether the last closing bracket was present.
|
||||
/// - `[func]` or `[func][body]` => terminated
|
||||
/// - `[func` or `[func][body` => not terminated
|
||||
terminated: bool,
|
||||
},
|
||||
|
||||
/// A left bracket starting a function invocation or body: `[`.
|
||||
LeftBracket,
|
||||
/// A right bracket ending a function invocation or body: `]`.
|
||||
RightBracket,
|
||||
/// A left parenthesis in a function header: `(`.
|
||||
LeftParen,
|
||||
/// A right parenthesis in a function header: `)`.
|
||||
@ -119,7 +102,8 @@ impl<'s> Token<'s> {
|
||||
Space(_) => "space",
|
||||
LineComment(_) => "line comment",
|
||||
BlockComment(_) => "block comment",
|
||||
Function { .. } => "function",
|
||||
LeftBracket => "opening bracket",
|
||||
RightBracket => "closing bracket",
|
||||
LeftParen => "opening paren",
|
||||
RightParen => "closing paren",
|
||||
LeftBrace => "opening brace",
|
||||
@ -141,7 +125,6 @@ impl<'s> Token<'s> {
|
||||
Backslash => "backslash",
|
||||
Raw { .. } => "raw text",
|
||||
Text(_) => "text",
|
||||
Invalid("]") => "closing bracket",
|
||||
Invalid("*/") => "end of block comment",
|
||||
Invalid(_) => "invalid token",
|
||||
}
|
||||
@ -152,8 +135,9 @@ impl<'s> Token<'s> {
|
||||
#[derive(Debug)]
|
||||
pub struct Tokens<'s> {
|
||||
src: &'s str,
|
||||
mode: TokenMode,
|
||||
iter: Peekable<Chars<'s>>,
|
||||
mode: TokenMode,
|
||||
stack: Vec<TokenMode>,
|
||||
pos: Pos,
|
||||
index: usize,
|
||||
}
|
||||
@ -172,16 +156,29 @@ impl<'s> Tokens<'s> {
|
||||
///
|
||||
/// The first token's span starts an the given `offset` position instead of
|
||||
/// the zero position.
|
||||
pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self {
|
||||
pub fn new(src: &'s str, mode: TokenMode) -> Self {
|
||||
Self {
|
||||
src,
|
||||
mode,
|
||||
iter: src.chars().peekable(),
|
||||
pos: offset,
|
||||
mode,
|
||||
stack: vec![],
|
||||
pos: Pos::ZERO,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Change the token mode and push the old one on a stack.
|
||||
pub fn push_mode(&mut self, mode: TokenMode) {
|
||||
self.stack.push(self.mode);
|
||||
self.mode = mode;
|
||||
}
|
||||
|
||||
/// Pop the old token mode from the stack. This panics if there is no mode
|
||||
/// on the stack.
|
||||
pub fn pop_mode(&mut self) {
|
||||
self.mode = self.stack.pop().expect("no pushed mode");
|
||||
}
|
||||
|
||||
/// The index in the string at which the last token ends and next token will
|
||||
/// start.
|
||||
pub fn index(&self) -> usize {
|
||||
@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
// Whitespace.
|
||||
c if c.is_whitespace() => self.read_whitespace(start),
|
||||
|
||||
// Functions.
|
||||
'[' => self.read_function(start),
|
||||
']' => Invalid("]"),
|
||||
// Functions and blocks.
|
||||
'[' => LeftBracket,
|
||||
']' => RightBracket,
|
||||
'{' => LeftBrace,
|
||||
'}' => RightBrace,
|
||||
|
||||
// Syntactic elements in function headers.
|
||||
'(' if self.mode == Header => LeftParen,
|
||||
')' if self.mode == Header => RightParen,
|
||||
'{' if self.mode == Header => LeftBrace,
|
||||
'}' if self.mode == Header => RightBrace,
|
||||
':' if self.mode == Header => Colon,
|
||||
',' if self.mode == Header => Comma,
|
||||
'=' if self.mode == Header => Equals,
|
||||
@ -322,52 +319,6 @@ impl<'s> Tokens<'s> {
|
||||
Space(end.line - start.line)
|
||||
}
|
||||
|
||||
fn read_function(&mut self, start: Pos) -> Token<'s> {
|
||||
let (header, terminated) = self.read_function_part(Header);
|
||||
self.eat();
|
||||
|
||||
if self.peek() != Some('[') {
|
||||
return Function { header, body: None, terminated };
|
||||
}
|
||||
|
||||
self.eat();
|
||||
|
||||
let body_start = self.pos() - start;
|
||||
let (body, terminated) = self.read_function_part(Body);
|
||||
let body_end = self.pos() - start;
|
||||
let span = Span::new(body_start, body_end);
|
||||
|
||||
self.eat();
|
||||
|
||||
Function { header, body: Some(Spanned { v: body, span }), terminated }
|
||||
}
|
||||
|
||||
fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) {
|
||||
let start = self.index();
|
||||
let mut terminated = false;
|
||||
|
||||
while let Some(n) = self.peek() {
|
||||
if n == ']' {
|
||||
terminated = true;
|
||||
break;
|
||||
}
|
||||
|
||||
self.eat();
|
||||
match n {
|
||||
'[' => { self.read_function(Pos::ZERO); }
|
||||
'/' if self.peek() == Some('/') => { self.read_line_comment(); }
|
||||
'/' if self.peek() == Some('*') => { self.read_block_comment(); }
|
||||
'"' if mode == Header => { self.read_string(); }
|
||||
'`' if mode == Body => { self.read_raw(); }
|
||||
'\\' => { self.eat(); }
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let end = self.index();
|
||||
(&self.src[start..end], terminated)
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Token<'s> {
|
||||
let (string, terminated) = self.read_until_unescaped('"');
|
||||
Str { string, terminated }
|
||||
@ -540,6 +491,7 @@ mod tests {
|
||||
use Token::{
|
||||
Space as S,
|
||||
LineComment as LC, BlockComment as BC,
|
||||
LeftBracket as L, RightBracket as R,
|
||||
LeftParen as LP, RightParen as RP,
|
||||
LeftBrace as LB, RightBrace as RB,
|
||||
Ident as Id,
|
||||
@ -557,25 +509,12 @@ mod tests {
|
||||
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
|
||||
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
|
||||
|
||||
macro_rules! F {
|
||||
($h:expr, None, $t:expr) => {
|
||||
Token::Function { header: $h, body: None, terminated: $t }
|
||||
};
|
||||
($h:expr, $b:expr, $t:expr) => {
|
||||
Token::Function {
|
||||
header: $h,
|
||||
body: Some(Into::<Spanned<&str>>::into($b)),
|
||||
terminated: $t,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
|
||||
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
|
||||
macro_rules! test {
|
||||
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
|
||||
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
|
||||
let found = Tokens::new($src, Pos::ZERO, $mode).collect::<Vec<_>>();
|
||||
let found = Tokens::new($src, $mode).collect::<Vec<_>>();
|
||||
check($src, exp, found, $spans);
|
||||
}
|
||||
}
|
||||
@ -616,7 +555,7 @@ mod tests {
|
||||
fn tokenize_body_only_tokens() {
|
||||
t!(Body, "_*" => Underscore, Star);
|
||||
t!(Body, "***" => Star, Star, Star);
|
||||
t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star);
|
||||
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
|
||||
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
|
||||
t!(Body, "`raw`" => Raw("raw", true));
|
||||
t!(Body, "`[func]`" => Raw("[func]", true));
|
||||
@ -674,50 +613,6 @@ mod tests {
|
||||
t!(Header, "\"🌎\"" => Str("🌎", true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_functions() {
|
||||
t!(Body, "a[f]" => T("a"), F!("f", None, true));
|
||||
t!(Body, "[f]a" => F!("f", None, true), T("a"));
|
||||
t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true));
|
||||
t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a"));
|
||||
t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true));
|
||||
t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(","));
|
||||
t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true));
|
||||
t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true));
|
||||
t!(Header, "[" => F!("", None, false));
|
||||
t!(Header, "]" => Invalid("]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_correct_end_of_function() {
|
||||
// End of function with strings and carets in headers
|
||||
t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false));
|
||||
t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true));
|
||||
t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true));
|
||||
t!(Body, "[f: `]" => F!("f: `", None, true));
|
||||
|
||||
// End of function with strings and carets in bodies
|
||||
t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true));
|
||||
t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true));
|
||||
t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false));
|
||||
t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true));
|
||||
t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true));
|
||||
t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false));
|
||||
t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true));
|
||||
t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false));
|
||||
t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true));
|
||||
|
||||
// End of function with comments
|
||||
t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false));
|
||||
t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true));
|
||||
t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true));
|
||||
t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false));
|
||||
|
||||
// End of function with escaped brackets
|
||||
t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true));
|
||||
t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_escaped_symbols() {
|
||||
t!(Body, r"\\" => T(r"\"));
|
||||
@ -746,7 +641,6 @@ mod tests {
|
||||
fn tokenize_with_spans() {
|
||||
ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
|
||||
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
|
||||
ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star));
|
||||
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
|
||||
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
|
||||
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));
|
||||
|
Loading…
x
Reference in New Issue
Block a user