Parse braced expressions and bracketed calls in headers 🗳

- Refactors the tokenizer to be lazy: It does not emit pre-parsed function tokens, but instead allows it's mode to be changed. The modes are tracked on a stack to allow nested compute/typesetting (pop/push).
- Introduces delimited groups into the parser, which make it easy to parse delimited expressions without handling the delimiters in the parsing code for the group's content. A group is started with `start_group`. When reaching the group's end (matching delimiter) the eat and peek methods will simply return `None` instead of the delimiter, stopping the content parser and bubbling up the call stack until `end_group` is called to clear up the situation.
This commit is contained in:
Laurenz 2020-08-17 16:25:09 +02:00
parent 8a80503188
commit 3cbca56a71
6 changed files with 396 additions and 388 deletions

View File

@ -1,18 +1,17 @@
use criterion::{criterion_group, criterion_main, Criterion}; use criterion::{criterion_group, criterion_main, Criterion};
use typstc::syntax::parsing::parse; use typstc::syntax::parsing::parse;
use typstc::syntax::span::Pos;
// 28 not too dense lines. // 28 not too dense lines.
const COMA: &str = include_str!("../tests/coma.typ"); const COMA: &str = include_str!("../tests/coma.typ");
fn parsing_benchmark(c: &mut Criterion) { fn parsing_benchmark(c: &mut Criterion) {
c.bench_function("parse-coma-28-lines", |b| { c.bench_function("parse-coma-28-lines", |b| {
b.iter(|| parse(COMA, Pos::ZERO)) b.iter(|| parse(COMA))
}); });
let long = COMA.repeat(100); let long = COMA.repeat(100);
c.bench_function("parse-coma-2800-lines", |b| { c.bench_function("parse-coma-2800-lines", |b| {
b.iter(|| parse(&long, Pos::ZERO)) b.iter(|| parse(&long))
}); });
} }

View File

@ -270,7 +270,7 @@ impl<V> SpannedEntry<V> {
/// Create an entry with the same span for key and value. /// Create an entry with the same span for key and value.
pub fn val(val: Spanned<V>) -> Self { pub fn val(val: Spanned<V>) -> Self {
Self { key: Span::ZERO, val } Self { key: val.span, val }
} }
/// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>` /// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>`

View File

@ -123,7 +123,7 @@ impl<'a> TreeLayouter<'a> {
..self.ctx ..self.ctx
}).await; }).await;
self.feedback.extend_offset(pass.feedback, call.span.start); self.feedback.extend(pass.feedback);
if let Value::Commands(commands) = pass.output { if let Value::Commands(commands) = pass.output {
for command in commands { for command in commands {

View File

@ -87,7 +87,7 @@ impl Typesetter {
/// Parse source code into a syntax tree. /// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> { pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
parse(src, Pos::ZERO) parse(src)
} }
/// Layout a syntax tree and return the produced layout. /// Layout a syntax tree and return the produced layout.

View File

@ -12,23 +12,45 @@ use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
use super::Ident; use super::Ident;
/// Parse a string of source code. /// Parse a string of source code.
/// pub fn parse(src: &str) -> Pass<SyntaxTree> {
/// All spans in the resulting tree and feedback are offset by the given Parser::new(src).parse()
/// `offset` position. This is used to make spans of a function body relative to }
/// the start of the function as a whole as opposed to the start of the
/// function's body. struct Parser<'s> {
pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> { tokens: Tokens<'s>,
peeked: Option<Option<Spanned<Token<'s>>>>,
delimiters: Vec<(Pos, Token<'static>)>,
feedback: Feedback,
}
impl<'s> Parser<'s> {
fn new(src: &'s str) -> Self {
Self {
tokens: Tokens::new(src, TokenMode::Body),
peeked: None,
delimiters: vec![],
feedback: Feedback::new(),
}
}
fn parse(mut self) -> Pass<SyntaxTree> {
let tree = self.parse_body_contents();
Pass::new(tree, self.feedback)
}
}
// Typesetting content.
impl Parser<'_> {
fn parse_body_contents(&mut self) -> SyntaxTree {
let mut tree = SyntaxTree::new(); let mut tree = SyntaxTree::new();
let mut par = SyntaxTree::new(); let mut par = SyntaxTree::new();
let mut feedback = Feedback::new();
for token in Tokens::new(src, offset, TokenMode::Body) { while let Some(token) = self.peek() {
let span = token.span; par.push(match token.v {
let node = match token.v {
// Starting from two newlines counts as a paragraph break, a single // Starting from two newlines counts as a paragraph break, a single
// newline does not. // newline does not.
Token::Space(newlines) => if newlines < 2 { Token::Space(newlines) => if newlines < 2 {
SyntaxNode::Spacing self.with_span(SyntaxNode::Spacing)
} else { } else {
// End the current paragraph if it is not empty. // End the current paragraph if it is not empty.
if let (Some(first), Some(last)) = (par.first(), par.last()) { if let (Some(first), Some(last)) = (par.first(), par.last()) {
@ -36,37 +58,45 @@ pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
let node = SyntaxNode::Par(std::mem::take(&mut par)); let node = SyntaxNode::Par(std::mem::take(&mut par));
tree.push(Spanned::new(node, span)); tree.push(Spanned::new(node, span));
} }
self.eat();
continue; continue;
} }
Token::LineComment(_) | Token::BlockComment(_) => {
Token::Function { header, body, terminated } => { self.eat();
let parsed = FuncParser::new(header, body).parse(); continue
feedback.extend_offset(parsed.feedback, span.start);
if !terminated {
error!(@feedback, Span::at(span.end), "expected closing bracket");
}
SyntaxNode::Call(parsed.output)
} }
Token::Star => SyntaxNode::ToggleBolder, Token::LeftBracket => {
Token::Underscore => SyntaxNode::ToggleItalic, self.parse_bracket_call().map(|c| SyntaxNode::Call(c))
Token::Backslash => SyntaxNode::Linebreak, }
Token::Star => self.with_span(SyntaxNode::ToggleBolder),
Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
Token::Backslash => self.with_span(SyntaxNode::Linebreak),
Token::Raw { raw, terminated } => { Token::Raw { raw, terminated } => {
if !terminated { if !terminated {
error!(@feedback, Span::at(span.end), "expected backtick"); error!(
@self.feedback, Span::at(token.span.end),
"expected backtick",
);
} }
SyntaxNode::Raw(unescape_raw(raw)) self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
}
Token::Text(text) => {
self.with_span(SyntaxNode::Text(text.to_string()))
} }
Token::Text(text) => SyntaxNode::Text(text.to_string()),
Token::LineComment(_) | Token::BlockComment(_) => continue,
unexpected => { unexpected => {
error!(@feedback, span, "unexpected {}", unexpected.name()); self.eat();
error!(
@self.feedback, token.span,
"unexpected {}", unexpected.name(),
);
continue; continue;
} }
}; });
par.push(Spanned::new(node, span));
} }
if let (Some(first), Some(last)) = (par.first(), par.last()) { if let (Some(first), Some(last)) = (par.first(), par.last()) {
@ -75,31 +105,17 @@ pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
tree.push(Spanned::new(node, span)); tree.push(Spanned::new(node, span));
} }
Pass::new(tree, feedback) tree
}
struct FuncParser<'s> {
tokens: Tokens<'s>,
peeked: Option<Option<Spanned<Token<'s>>>>,
body: Option<Spanned<&'s str>>,
feedback: Feedback,
}
impl<'s> FuncParser<'s> {
fn new(header: &'s str, body: Option<Spanned<&'s str>>) -> Self {
Self {
// Start at column 1 because the opening bracket is also part of
// the function, but not part of the `header` string.
tokens: Tokens::new(header, Pos::new(0, 1), TokenMode::Header),
peeked: None,
body,
feedback: Feedback::new(),
} }
} }
fn parse(mut self) -> Pass<CallExpr> { // Function calls.
impl Parser<'_> {
fn parse_bracket_call(&mut self) -> Spanned<CallExpr> {
self.start_group(Delimiter::Bracket);
self.tokens.push_mode(TokenMode::Header);
let after_bracket = self.pos(); let after_bracket = self.pos();
self.skip_white(); self.skip_white();
let name = self.parse_ident().unwrap_or_else(|| { let name = self.parse_ident().unwrap_or_else(|| {
self.expected_found_or_at("function name", after_bracket); self.expected_found_or_at("function name", after_bracket);
@ -107,36 +123,105 @@ impl<'s> FuncParser<'s> {
}); });
self.skip_white(); self.skip_white();
let mut args = match self.eat().map(Spanned::value) { let mut args = match self.eatv() {
Some(Token::Colon) => self.parse_table(false).0.v, Some(Token::Colon) => self.parse_table_contents().0,
Some(_) => { Some(_) => {
self.expected_at("colon", name.span.end); self.expected_at("colon", name.span.end);
while self.eat().is_some() {}
TableExpr::new() TableExpr::new()
} }
None => TableExpr::new(), None => TableExpr::new(),
}; };
if let Some(body) = self.body { self.tokens.pop_mode();
args.push(SpannedEntry::val(body.map(|src| { let mut span = self.end_group();
let parsed = parse(src, body.span.start);
self.feedback.extend(parsed.feedback); if self.check(Token::LeftBracket) {
Expr::Tree(parsed.output) self.start_group(Delimiter::Bracket);
}))); self.tokens.push_mode(TokenMode::Body);
let body = self.parse_body_contents();
self.tokens.pop_mode();
let body_span = self.end_group();
let expr = Expr::Tree(body);
args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
span.expand(body_span);
} }
Pass::new(CallExpr { name, args }, self.feedback) Spanned::new(CallExpr { name, args }, span)
}
fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
self.start_group(Delimiter::Paren);
let args = self.parse_table_contents().0;
let args_span = self.end_group();
let span = Span::merge(name.span, args_span);
Spanned::new(CallExpr { name, args }, span)
} }
} }
// Parsing expressions and values // Tables.
impl FuncParser<'_> { impl Parser<'_> {
fn parse_ident(&mut self) -> Option<Spanned<Ident>> { fn parse_table_contents(&mut self) -> (TableExpr, bool) {
self.peek().and_then(|token| match token.v { let mut table = TableExpr::new();
Token::Ident(id) => self.eat_span(Ident(id.to_string())), let mut comma_and_keyless = true;
_ => None,
}) while { self.skip_white(); !self.eof() } {
let (key, val) = if let Some(ident) = self.parse_ident() {
self.skip_white();
match self.peekv() {
Some(Token::Equals) => {
self.eat();
self.skip_white();
(Some(ident), try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
}))
} }
Some(Token::LeftParen) => {
let call = self.parse_paren_call(ident);
(None, call.map(|c| Expr::Call(c)))
}
_ => (None, ident.map(|id| Expr::Ident(id)))
}
} else {
(None, try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
}))
};
let behind = val.span.end;
if let Some(key) = key {
comma_and_keyless = false;
table.insert(key.v.0, SpannedEntry::new(key.span, val));
self.feedback.decorations
.push(Spanned::new(Decoration::TableKey, key.span));
} else {
table.push(SpannedEntry::val(val));
}
if { self.skip_white(); self.eof() } {
break;
}
self.expect_at(Token::Comma, behind);
comma_and_keyless = false;
}
let coercable = comma_and_keyless && !table.is_empty();
(table, coercable)
}
}
// Expressions and values.
impl Parser<'_> {
fn parse_expr(&mut self) -> Option<Spanned<Expr>> { fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
self.parse_binops("summand", Self::parse_term, |token| match token { self.parse_binops("summand", Self::parse_term, |token| match token {
Token::Plus => Some(Expr::Add), Token::Plus => Some(Expr::Add),
@ -206,37 +291,37 @@ impl FuncParser<'_> {
fn parse_value(&mut self) -> Option<Spanned<Expr>> { fn parse_value(&mut self) -> Option<Spanned<Expr>> {
let Spanned { v: token, span } = self.peek()?; let Spanned { v: token, span } = self.peek()?;
match token { Some(match token {
// This could be a function call or an identifier. // This could be a function call or an identifier.
Token::Ident(id) => { Token::Ident(id) => {
let name = Spanned::new(Ident(id.to_string()), span); let name = Spanned::new(Ident(id.to_string()), span);
self.eat(); self.eat();
self.skip_white(); self.skip_white();
Some(if self.check(Token::LeftParen) { if self.check(Token::LeftParen) {
self.parse_func_call(name).map(|call| Expr::Call(call)) self.parse_paren_call(name).map(|call| Expr::Call(call))
} else { } else {
name.map(|id| Expr::Ident(id)) name.map(|id| Expr::Ident(id))
}) }
} }
Token::Str { string, terminated } => { Token::Str { string, terminated } => {
if !terminated { if !terminated {
self.expected_at("quote", span.end); self.expected_at("quote", span.end);
} }
self.eat_span(Expr::Str(unescape_string(string))) self.with_span(Expr::Str(unescape_string(string)))
} }
Token::Bool(b) => self.eat_span(Expr::Bool(b)), Token::Bool(b) => self.with_span(Expr::Bool(b)),
Token::Number(n) => self.eat_span(Expr::Number(n)), Token::Number(n) => self.with_span(Expr::Number(n)),
Token::Length(s) => self.eat_span(Expr::Length(s)), Token::Length(s) => self.with_span(Expr::Length(s)),
Token::Hex(s) => { Token::Hex(s) => {
if let Ok(color) = RgbaColor::from_str(s) { if let Ok(color) = RgbaColor::from_str(s) {
self.eat_span(Expr::Color(color)) self.with_span(Expr::Color(color))
} else { } else {
// Heal color by assuming black. // Heal color by assuming black.
error!(@self.feedback, span, "invalid color"); error!(@self.feedback, span, "invalid color");
let healed = RgbaColor::new_healed(0, 0, 0, 255); let healed = RgbaColor::new_healed(0, 0, 0, 255);
self.eat_span(Expr::Color(healed)) self.with_span(Expr::Color(healed))
} }
} }
@ -244,128 +329,54 @@ impl FuncParser<'_> {
// a table in any case and coerce the table into a value if it is // a table in any case and coerce the table into a value if it is
// coercable (length 1 and no trailing comma). // coercable (length 1 and no trailing comma).
Token::LeftParen => { Token::LeftParen => {
let (table, coercable) = self.parse_table(true); self.start_group(Delimiter::Paren);
Some(if coercable { let (table, coercable) = self.parse_table_contents();
table.map(|v| { let span = self.end_group();
v.into_values()
let expr = if coercable {
table.into_values()
.next() .next()
.expect("table is coercable").val.v .expect("table is coercable").val.v
})
} else { } else {
table.map(|tab| Expr::Table(tab)) Expr::Table(table)
};
Spanned::new(expr, span)
}
// This is a content expression.
Token::LeftBrace => {
self.start_group(Delimiter::Brace);
self.tokens.push_mode(TokenMode::Body);
let tree = self.parse_body_contents();
self.tokens.pop_mode();
let span = self.end_group();
Spanned::new(Expr::Tree(tree), span)
}
// This is a bracketed function call.
Token::LeftBracket => {
let call = self.parse_bracket_call();
let tree = vec![call.map(|c| SyntaxNode::Call(c))];
Spanned::new(Expr::Tree(tree), span)
}
_ => return None,
}) })
} }
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
self.peek().and_then(|token| match token.v {
Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
_ => None, _ => None,
})
} }
} }
fn parse_func_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> { // Error handling.
let args = self.parse_table(true).0; impl Parser<'_> {
let span = Span::merge(name.span, args.span);
Spanned::new(CallExpr { name, args: args.v }, span)
}
/// Set `parens` to true, when this should expect an opening paren and stop
/// at the balanced closing paren (this is the case for normal tables and
/// round-paren function calls). Set it to false, when this is used to parse
/// the top-level function arguments.
///
/// The returned boolean tells you whether the table can be coerced into an
/// expression (this is the case when it's length 1 and has no trailing
/// comma).
fn parse_table(&mut self, parens: bool) -> (Spanned<TableExpr>, bool) {
let start = self.pos();
if parens {
self.assert(Token::LeftParen);
}
let mut table = TableExpr::new();
let mut coercable = true;
loop {
self.skip_white();
if self.eof() || (parens && self.check(Token::RightParen)) {
break;
}
let behind_arg;
if let Some(ident) = self.parse_ident() {
// This could be a keyword argument, a function call or a simple
// identifier.
self.skip_white();
if self.check_eat(Token::Equals).is_some() {
self.skip_white();
let key = ident;
self.feedback.decorations
.push(Spanned::new(Decoration::TableKey, key.span));
let val = try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
});
coercable = false;
behind_arg = val.span.end;
table.insert(key.v.0, SpannedEntry::new(key.span, val));
} else if self.check(Token::LeftParen) {
let call = self.parse_func_call(ident);
let expr = call.map(|call| Expr::Call(call));
behind_arg = expr.span.end;
table.push(SpannedEntry::val(expr));
} else {
let expr = ident.map(|id| Expr::Ident(id));
behind_arg = expr.span.end;
table.push(SpannedEntry::val(expr));
}
} else {
// It's a positional argument.
let expr = try_opt_or!(self.parse_expr(), {
self.expected("value");
continue;
});
behind_arg = expr.span.end;
table.push(SpannedEntry::val(expr));
}
self.skip_white();
if self.eof() || (parens && self.check(Token::RightParen)) {
break;
}
self.expect_at(Token::Comma, behind_arg);
coercable = false;
}
if parens {
self.expect(Token::RightParen);
}
coercable = coercable && !table.is_empty();
let end = self.pos();
(Spanned::new(table, Span::new(start, end)), coercable)
}
}
// Error handling
impl FuncParser<'_> {
fn expect(&mut self, token: Token<'_>) -> bool {
if self.check(token) {
self.eat();
true
} else {
self.expected(token.name());
false
}
}
fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
if self.check(token) { if self.check(token) {
self.eat(); self.eat();
@ -400,40 +411,58 @@ impl FuncParser<'_> {
} }
} }
// Parsing primitives // Parsing primitives.
impl<'s> FuncParser<'s> { impl<'s> Parser<'s> {
fn start_group(&mut self, delimiter: Delimiter) {
let start = self.pos();
self.assert(delimiter.start());
self.delimiters.push((start, delimiter.end()));
}
fn end_group(&mut self) -> Span {
assert_eq!(self.peek(), None, "unfinished group");
let (start, end_token) = self.delimiters.pop()
.expect("group was not started");
match self.peeked.unwrap() {
Some(token) if token.v == end_token => {
self.peeked = None;
Span::new(start, token.span.end)
}
_ => {
let end = self.pos();
error!(
@self.feedback, Span::at(end),
"expected {}", end_token.name(),
);
Span::new(start, end)
}
}
}
fn skip_white(&mut self) { fn skip_white(&mut self) {
loop { while matches!(
match self.peek().map(Spanned::value) { self.peekv(),
Some(Token::Space(_)) Some(Token::Space(_)) |
| Some(Token::LineComment(_)) Some(Token::LineComment(_)) |
| Some(Token::BlockComment(_)) => { self.eat(); } Some(Token::BlockComment(_))
_ => break, ) {
} self.eat();
} }
} }
fn eat(&mut self) -> Option<Spanned<Token<'s>>> { fn eatv(&mut self) -> Option<Token<'s>> {
self.peeked.take().unwrap_or_else(|| self.tokens.next()) self.eat().map(Spanned::value)
} }
fn eat_span<T>(&mut self, v: T) -> Option<Spanned<T>> { fn peekv(&mut self) -> Option<Token<'s>> {
self.eat().map(|spanned| spanned.map(|_| v)) self.peek().map(Spanned::value)
}
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
let tokens = &mut self.tokens;
*self.peeked.get_or_insert_with(|| tokens.next())
} }
fn assert(&mut self, token: Token<'_>) { fn assert(&mut self, token: Token<'_>) {
assert!(self.check_eat(token).is_some()); assert!(self.check_eat(token).is_some());
} }
fn check(&mut self, token: Token<'_>) -> bool {
self.peek().map(Spanned::value) == Some(token)
}
fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> { fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
if self.check(token) { if self.check(token) {
self.eat() self.eat()
@ -442,10 +471,39 @@ impl<'s> FuncParser<'s> {
} }
} }
fn check(&mut self, token: Token<'_>) -> bool {
self.peekv() == Some(token)
}
fn with_span<T>(&mut self, v: T) -> Spanned<T> {
let span = self.eat().expect("expected token").span;
Spanned::new(v, span)
}
fn eof(&mut self) -> bool { fn eof(&mut self) -> bool {
self.peek().is_none() self.peek().is_none()
} }
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
let token = self.peek()?;
self.peeked = None;
Some(token)
}
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
let tokens = &mut self.tokens;
let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
// Check for unclosed groups.
if Delimiter::is_delimiter(token.v) {
if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
return None;
}
}
Some(token)
}
fn pos(&self) -> Pos { fn pos(&self) -> Pos {
self.peeked self.peeked
.flatten() .flatten()
@ -454,6 +512,38 @@ impl<'s> FuncParser<'s> {
} }
} }
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum Delimiter {
Paren,
Bracket,
Brace,
}
impl Delimiter {
fn is_delimiter(token: Token<'_>) -> bool {
matches!(
token,
Token::RightParen | Token::RightBracket | Token::RightBrace
)
}
fn start(self) -> Token<'static> {
match self {
Self::Paren => Token::LeftParen,
Self::Bracket => Token::LeftBracket,
Self::Brace => Token::LeftBrace,
}
}
fn end(self) -> Token<'static> {
match self {
Self::Paren => Token::RightParen,
Self::Bracket => Token::RightBracket,
Self::Brace => Token::RightBrace,
}
}
}
fn unescape_string(string: &str) -> String { fn unescape_string(string: &str) -> String {
let mut iter = string.chars(); let mut iter = string.chars();
let mut out = String::with_capacity(string.len()); let mut out = String::with_capacity(string.len());
@ -608,7 +698,7 @@ mod tests {
macro_rules! test { macro_rules! test {
(@spans=$spans:expr, $src:expr => $($tts:tt)*) => { (@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
let exp = Tree![@$($tts)*]; let exp = Tree![@$($tts)*];
let pass = parse($src, Pos::ZERO); let pass = parse($src);
check($src, exp, pass.output, $spans); check($src, exp, pass.output, $spans);
}; };
} }
@ -624,7 +714,7 @@ mod tests {
macro_rules! e { macro_rules! e {
($src:expr => $($tts:tt)*) => { ($src:expr => $($tts:tt)*) => {
let exp = vec![$($tts)*]; let exp = vec![$($tts)*];
let pass = parse($src, Pos::ZERO); let pass = parse($src);
let found = pass.feedback.diagnostics.iter() let found = pass.feedback.diagnostics.iter()
.map(|s| s.as_ref().map(|e| e.message.as_str())) .map(|s| s.as_ref().map(|e| e.message.as_str()))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -636,7 +726,7 @@ mod tests {
macro_rules! d { macro_rules! d {
($src:expr => $($tts:tt)*) => { ($src:expr => $($tts:tt)*) => {
let exp = vec![$($tts)*]; let exp = vec![$($tts)*];
let pass = parse($src, Pos::ZERO); let pass = parse($src);
check($src, exp, pass.feedback.decorations, true); check($src, exp, pass.feedback.decorations, true);
}; };
} }
@ -717,6 +807,15 @@ mod tests {
e!("[val : 12, /* \n */ 14]" => ); e!("[val : 12, /* \n */ 14]" => );
} }
#[test]
fn test_parse_groups() {
e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
s(0,2, 0,2, "expected closing bracket"));
e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
s(0,5, 0,6, "unexpected closing brace"));
}
#[test] #[test]
fn test_parse_function_names() { fn test_parse_function_names() {
// No closing bracket. // No closing bracket.
@ -760,19 +859,29 @@ mod tests {
t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]); t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]);
e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment")); e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
// Raw in body.
t!("[val][`Hi]`" => P![F!("val"; Tree![P![R!["Hi]"]]])]);
e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
// Crazy.
t!("[v][[v][v][v]]" => P![F!("v"; Tree![P![
F!("v"; Tree![P![T("v")]]), F!("v")
]])]);
// Spanned. // Spanned.
ts!(" [box][Oh my]" => s(0,0, 0,13, P![ ts!(" [box][Oh my]" => s(0,0, 0,13, P![
s(0,0, 0,1, S), s(0,0, 0,1, S),
s(0,1, 0,13, F!(s(0,1, 0,4, "box"); s(0,1, 0,13, F!(s(0,2, 0,5, "box");
s(0,6, 0,11, Tree![s(0,6, 0,11, P![ s(0,6, 0,13, Tree![s(0,7, 0,12, P![
s(0,6, 0,8, T("Oh")), s(0,8, 0,9, S), s(0,9, 0,11, T("my")) s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
])]) ])])
)) ))
])); ]));
} }
#[test] #[test]
fn test_parse_simple_values() { fn test_parse_values() {
// Simple.
v!("_" => Id("_")); v!("_" => Id("_"));
v!("name" => Id("name")); v!("name" => Id("name"));
v!("α" => Id("α")); v!("α" => Id("α"));
@ -787,6 +896,12 @@ mod tests {
v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00))); v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string")); v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
// Content.
v!("{_hi_}" => Tree![P![I, T("hi"), I]]);
e!("[val: {_hi_}]" => );
v!("[hi]" => Tree![F!["hi"]]);
e!("[val: [hi]]" => );
// Healed colors. // Healed colors.
v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff))); v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
e!("[val: #12345]" => s(0,6, 0,12, "invalid color")); e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
@ -925,7 +1040,7 @@ mod tests {
v!("(\x07 abc,)" => Table![Id("abc")]); v!("(\x07 abc,)" => Table![Id("abc")]);
e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token")); e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma")); e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
e!("[val: [hi]]" => s(0,6, 0,10, "expected value, found function")); e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren"));
// Expected comma. // Expected comma.
v!("(true false)" => Table![Bool(true), Bool(false)]); v!("(true false)" => Table![Bool(true), Bool(false)]);

View File

@ -22,27 +22,10 @@ pub enum Token<'s> {
/// can contain nested block comments. /// can contain nested block comments.
BlockComment(&'s str), BlockComment(&'s str),
/// A function invocation. /// A left bracket starting a function invocation or body: `[`.
Function { LeftBracket,
/// The header string: /// A right bracket ending a function invocation or body: `]`.
/// ```typst RightBracket,
/// [header: args][body]
/// ^^^^^^^^^^^^
/// ```
header: &'s str,
/// The spanned body string:
/// ```typst
/// [header][hello *world*]
/// ^^^^^^^^^^^^^
/// ^-- The span is relative to right before this bracket
/// ```
body: Option<Spanned<&'s str>>,
/// Whether the last closing bracket was present.
/// - `[func]` or `[func][body]` => terminated
/// - `[func` or `[func][body` => not terminated
terminated: bool,
},
/// A left parenthesis in a function header: `(`. /// A left parenthesis in a function header: `(`.
LeftParen, LeftParen,
/// A right parenthesis in a function header: `)`. /// A right parenthesis in a function header: `)`.
@ -119,7 +102,8 @@ impl<'s> Token<'s> {
Space(_) => "space", Space(_) => "space",
LineComment(_) => "line comment", LineComment(_) => "line comment",
BlockComment(_) => "block comment", BlockComment(_) => "block comment",
Function { .. } => "function", LeftBracket => "opening bracket",
RightBracket => "closing bracket",
LeftParen => "opening paren", LeftParen => "opening paren",
RightParen => "closing paren", RightParen => "closing paren",
LeftBrace => "opening brace", LeftBrace => "opening brace",
@ -141,7 +125,6 @@ impl<'s> Token<'s> {
Backslash => "backslash", Backslash => "backslash",
Raw { .. } => "raw text", Raw { .. } => "raw text",
Text(_) => "text", Text(_) => "text",
Invalid("]") => "closing bracket",
Invalid("*/") => "end of block comment", Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token", Invalid(_) => "invalid token",
} }
@ -152,8 +135,9 @@ impl<'s> Token<'s> {
#[derive(Debug)] #[derive(Debug)]
pub struct Tokens<'s> { pub struct Tokens<'s> {
src: &'s str, src: &'s str,
mode: TokenMode,
iter: Peekable<Chars<'s>>, iter: Peekable<Chars<'s>>,
mode: TokenMode,
stack: Vec<TokenMode>,
pos: Pos, pos: Pos,
index: usize, index: usize,
} }
@ -172,16 +156,29 @@ impl<'s> Tokens<'s> {
/// ///
/// The first token's span starts an the given `offset` position instead of /// The first token's span starts an the given `offset` position instead of
/// the zero position. /// the zero position.
pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self { pub fn new(src: &'s str, mode: TokenMode) -> Self {
Self { Self {
src, src,
mode,
iter: src.chars().peekable(), iter: src.chars().peekable(),
pos: offset, mode,
stack: vec![],
pos: Pos::ZERO,
index: 0, index: 0,
} }
} }
/// Change the token mode and push the old one on a stack.
pub fn push_mode(&mut self, mode: TokenMode) {
self.stack.push(self.mode);
self.mode = mode;
}
/// Pop the old token mode from the stack. This panics if there is no mode
/// on the stack.
pub fn pop_mode(&mut self) {
self.mode = self.stack.pop().expect("no pushed mode");
}
/// The index in the string at which the last token ends and next token will /// The index in the string at which the last token ends and next token will
/// start. /// start.
pub fn index(&self) -> usize { pub fn index(&self) -> usize {
@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> {
// Whitespace. // Whitespace.
c if c.is_whitespace() => self.read_whitespace(start), c if c.is_whitespace() => self.read_whitespace(start),
// Functions. // Functions and blocks.
'[' => self.read_function(start), '[' => LeftBracket,
']' => Invalid("]"), ']' => RightBracket,
'{' => LeftBrace,
'}' => RightBrace,
// Syntactic elements in function headers. // Syntactic elements in function headers.
'(' if self.mode == Header => LeftParen, '(' if self.mode == Header => LeftParen,
')' if self.mode == Header => RightParen, ')' if self.mode == Header => RightParen,
'{' if self.mode == Header => LeftBrace,
'}' if self.mode == Header => RightBrace,
':' if self.mode == Header => Colon, ':' if self.mode == Header => Colon,
',' if self.mode == Header => Comma, ',' if self.mode == Header => Comma,
'=' if self.mode == Header => Equals, '=' if self.mode == Header => Equals,
@ -322,52 +319,6 @@ impl<'s> Tokens<'s> {
Space(end.line - start.line) Space(end.line - start.line)
} }
fn read_function(&mut self, start: Pos) -> Token<'s> {
let (header, terminated) = self.read_function_part(Header);
self.eat();
if self.peek() != Some('[') {
return Function { header, body: None, terminated };
}
self.eat();
let body_start = self.pos() - start;
let (body, terminated) = self.read_function_part(Body);
let body_end = self.pos() - start;
let span = Span::new(body_start, body_end);
self.eat();
Function { header, body: Some(Spanned { v: body, span }), terminated }
}
fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) {
let start = self.index();
let mut terminated = false;
while let Some(n) = self.peek() {
if n == ']' {
terminated = true;
break;
}
self.eat();
match n {
'[' => { self.read_function(Pos::ZERO); }
'/' if self.peek() == Some('/') => { self.read_line_comment(); }
'/' if self.peek() == Some('*') => { self.read_block_comment(); }
'"' if mode == Header => { self.read_string(); }
'`' if mode == Body => { self.read_raw(); }
'\\' => { self.eat(); }
_ => {}
}
}
let end = self.index();
(&self.src[start..end], terminated)
}
fn read_string(&mut self) -> Token<'s> { fn read_string(&mut self) -> Token<'s> {
let (string, terminated) = self.read_until_unescaped('"'); let (string, terminated) = self.read_until_unescaped('"');
Str { string, terminated } Str { string, terminated }
@ -540,6 +491,7 @@ mod tests {
use Token::{ use Token::{
Space as S, Space as S,
LineComment as LC, BlockComment as BC, LineComment as LC, BlockComment as BC,
LeftBracket as L, RightBracket as R,
LeftParen as LP, RightParen as RP, LeftParen as LP, RightParen as RP,
LeftBrace as LB, RightBrace as RB, LeftBrace as LB, RightBrace as RB,
Ident as Id, Ident as Id,
@ -557,25 +509,12 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } } fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } } fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
macro_rules! F {
($h:expr, None, $t:expr) => {
Token::Function { header: $h, body: None, terminated: $t }
};
($h:expr, $b:expr, $t:expr) => {
Token::Function {
header: $h,
body: Some(Into::<Spanned<&str>>::into($b)),
terminated: $t,
}
};
}
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
macro_rules! test { macro_rules! test {
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => { (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*]; let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
let found = Tokens::new($src, Pos::ZERO, $mode).collect::<Vec<_>>(); let found = Tokens::new($src, $mode).collect::<Vec<_>>();
check($src, exp, found, $spans); check($src, exp, found, $spans);
} }
} }
@ -616,7 +555,7 @@ mod tests {
fn tokenize_body_only_tokens() { fn tokenize_body_only_tokens() {
t!(Body, "_*" => Underscore, Star); t!(Body, "_*" => Underscore, Star);
t!(Body, "***" => Star, Star, Star); t!(Body, "***" => Star, Star, Star);
t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star); t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there")); t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
t!(Body, "`raw`" => Raw("raw", true)); t!(Body, "`raw`" => Raw("raw", true));
t!(Body, "`[func]`" => Raw("[func]", true)); t!(Body, "`[func]`" => Raw("[func]", true));
@ -674,50 +613,6 @@ mod tests {
t!(Header, "\"🌎\"" => Str("🌎", true)); t!(Header, "\"🌎\"" => Str("🌎", true));
} }
#[test]
fn tokenize_functions() {
t!(Body, "a[f]" => T("a"), F!("f", None, true));
t!(Body, "[f]a" => F!("f", None, true), T("a"));
t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true));
t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a"));
t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true));
t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(","));
t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true));
t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true));
t!(Header, "[" => F!("", None, false));
t!(Header, "]" => Invalid("]"));
}
#[test]
fn tokenize_correct_end_of_function() {
// End of function with strings and carets in headers
t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false));
t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true));
t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true));
t!(Body, "[f: `]" => F!("f: `", None, true));
// End of function with strings and carets in bodies
t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true));
t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true));
t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false));
t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true));
t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true));
t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false));
t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true));
t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false));
t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true));
// End of function with comments
t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false));
t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true));
t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true));
t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false));
// End of function with escaped brackets
t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true));
t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true));
}
#[test] #[test]
fn tokenize_escaped_symbols() { fn tokenize_escaped_symbols() {
t!(Body, r"\\" => T(r"\")); t!(Body, r"\\" => T(r"\"));
@ -746,7 +641,6 @@ mod tests {
fn tokenize_with_spans() { fn tokenize_with_spans() {
ts!(Body, "hello" => s(0,0, 0,5, T("hello"))); ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c"))); ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star));
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f"))); ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore)); ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0))); ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));