mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Parse braced expressions and bracketed calls in headers 🗳
- Refactors the tokenizer to be lazy: It does not emit pre-parsed function tokens, but instead allows it's mode to be changed. The modes are tracked on a stack to allow nested compute/typesetting (pop/push). - Introduces delimited groups into the parser, which make it easy to parse delimited expressions without handling the delimiters in the parsing code for the group's content. A group is started with `start_group`. When reaching the group's end (matching delimiter) the eat and peek methods will simply return `None` instead of the delimiter, stopping the content parser and bubbling up the call stack until `end_group` is called to clear up the situation.
This commit is contained in:
parent
8a80503188
commit
3cbca56a71
@ -1,18 +1,17 @@
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
use typstc::syntax::parsing::parse;
|
use typstc::syntax::parsing::parse;
|
||||||
use typstc::syntax::span::Pos;
|
|
||||||
|
|
||||||
// 28 not too dense lines.
|
// 28 not too dense lines.
|
||||||
const COMA: &str = include_str!("../tests/coma.typ");
|
const COMA: &str = include_str!("../tests/coma.typ");
|
||||||
|
|
||||||
fn parsing_benchmark(c: &mut Criterion) {
|
fn parsing_benchmark(c: &mut Criterion) {
|
||||||
c.bench_function("parse-coma-28-lines", |b| {
|
c.bench_function("parse-coma-28-lines", |b| {
|
||||||
b.iter(|| parse(COMA, Pos::ZERO))
|
b.iter(|| parse(COMA))
|
||||||
});
|
});
|
||||||
|
|
||||||
let long = COMA.repeat(100);
|
let long = COMA.repeat(100);
|
||||||
c.bench_function("parse-coma-2800-lines", |b| {
|
c.bench_function("parse-coma-2800-lines", |b| {
|
||||||
b.iter(|| parse(&long, Pos::ZERO))
|
b.iter(|| parse(&long))
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,7 +270,7 @@ impl<V> SpannedEntry<V> {
|
|||||||
|
|
||||||
/// Create an entry with the same span for key and value.
|
/// Create an entry with the same span for key and value.
|
||||||
pub fn val(val: Spanned<V>) -> Self {
|
pub fn val(val: Spanned<V>) -> Self {
|
||||||
Self { key: Span::ZERO, val }
|
Self { key: val.span, val }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>`
|
/// Convert from `&SpannedEntry<T>` to `SpannedEntry<&T>`
|
||||||
|
@ -123,7 +123,7 @@ impl<'a> TreeLayouter<'a> {
|
|||||||
..self.ctx
|
..self.ctx
|
||||||
}).await;
|
}).await;
|
||||||
|
|
||||||
self.feedback.extend_offset(pass.feedback, call.span.start);
|
self.feedback.extend(pass.feedback);
|
||||||
|
|
||||||
if let Value::Commands(commands) = pass.output {
|
if let Value::Commands(commands) = pass.output {
|
||||||
for command in commands {
|
for command in commands {
|
||||||
|
@ -87,7 +87,7 @@ impl Typesetter {
|
|||||||
|
|
||||||
/// Parse source code into a syntax tree.
|
/// Parse source code into a syntax tree.
|
||||||
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
|
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
|
||||||
parse(src, Pos::ZERO)
|
parse(src)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Layout a syntax tree and return the produced layout.
|
/// Layout a syntax tree and return the produced layout.
|
||||||
|
@ -12,23 +12,45 @@ use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
|
|||||||
use super::Ident;
|
use super::Ident;
|
||||||
|
|
||||||
/// Parse a string of source code.
|
/// Parse a string of source code.
|
||||||
///
|
pub fn parse(src: &str) -> Pass<SyntaxTree> {
|
||||||
/// All spans in the resulting tree and feedback are offset by the given
|
Parser::new(src).parse()
|
||||||
/// `offset` position. This is used to make spans of a function body relative to
|
}
|
||||||
/// the start of the function as a whole as opposed to the start of the
|
|
||||||
/// function's body.
|
struct Parser<'s> {
|
||||||
pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
|
tokens: Tokens<'s>,
|
||||||
|
peeked: Option<Option<Spanned<Token<'s>>>>,
|
||||||
|
delimiters: Vec<(Pos, Token<'static>)>,
|
||||||
|
feedback: Feedback,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Parser<'s> {
|
||||||
|
fn new(src: &'s str) -> Self {
|
||||||
|
Self {
|
||||||
|
tokens: Tokens::new(src, TokenMode::Body),
|
||||||
|
peeked: None,
|
||||||
|
delimiters: vec![],
|
||||||
|
feedback: Feedback::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse(mut self) -> Pass<SyntaxTree> {
|
||||||
|
let tree = self.parse_body_contents();
|
||||||
|
Pass::new(tree, self.feedback)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Typesetting content.
|
||||||
|
impl Parser<'_> {
|
||||||
|
fn parse_body_contents(&mut self) -> SyntaxTree {
|
||||||
let mut tree = SyntaxTree::new();
|
let mut tree = SyntaxTree::new();
|
||||||
let mut par = SyntaxTree::new();
|
let mut par = SyntaxTree::new();
|
||||||
let mut feedback = Feedback::new();
|
|
||||||
|
|
||||||
for token in Tokens::new(src, offset, TokenMode::Body) {
|
while let Some(token) = self.peek() {
|
||||||
let span = token.span;
|
par.push(match token.v {
|
||||||
let node = match token.v {
|
|
||||||
// Starting from two newlines counts as a paragraph break, a single
|
// Starting from two newlines counts as a paragraph break, a single
|
||||||
// newline does not.
|
// newline does not.
|
||||||
Token::Space(newlines) => if newlines < 2 {
|
Token::Space(newlines) => if newlines < 2 {
|
||||||
SyntaxNode::Spacing
|
self.with_span(SyntaxNode::Spacing)
|
||||||
} else {
|
} else {
|
||||||
// End the current paragraph if it is not empty.
|
// End the current paragraph if it is not empty.
|
||||||
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
||||||
@ -36,37 +58,45 @@ pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
|
|||||||
let node = SyntaxNode::Par(std::mem::take(&mut par));
|
let node = SyntaxNode::Par(std::mem::take(&mut par));
|
||||||
tree.push(Spanned::new(node, span));
|
tree.push(Spanned::new(node, span));
|
||||||
}
|
}
|
||||||
|
self.eat();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Token::LineComment(_) | Token::BlockComment(_) => {
|
||||||
Token::Function { header, body, terminated } => {
|
self.eat();
|
||||||
let parsed = FuncParser::new(header, body).parse();
|
continue
|
||||||
feedback.extend_offset(parsed.feedback, span.start);
|
|
||||||
if !terminated {
|
|
||||||
error!(@feedback, Span::at(span.end), "expected closing bracket");
|
|
||||||
}
|
|
||||||
SyntaxNode::Call(parsed.output)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Star => SyntaxNode::ToggleBolder,
|
Token::LeftBracket => {
|
||||||
Token::Underscore => SyntaxNode::ToggleItalic,
|
self.parse_bracket_call().map(|c| SyntaxNode::Call(c))
|
||||||
Token::Backslash => SyntaxNode::Linebreak,
|
}
|
||||||
|
|
||||||
|
Token::Star => self.with_span(SyntaxNode::ToggleBolder),
|
||||||
|
Token::Underscore => self.with_span(SyntaxNode::ToggleItalic),
|
||||||
|
Token::Backslash => self.with_span(SyntaxNode::Linebreak),
|
||||||
|
|
||||||
Token::Raw { raw, terminated } => {
|
Token::Raw { raw, terminated } => {
|
||||||
if !terminated {
|
if !terminated {
|
||||||
error!(@feedback, Span::at(span.end), "expected backtick");
|
error!(
|
||||||
|
@self.feedback, Span::at(token.span.end),
|
||||||
|
"expected backtick",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
SyntaxNode::Raw(unescape_raw(raw))
|
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Text(text) => {
|
||||||
|
self.with_span(SyntaxNode::Text(text.to_string()))
|
||||||
}
|
}
|
||||||
Token::Text(text) => SyntaxNode::Text(text.to_string()),
|
|
||||||
|
|
||||||
Token::LineComment(_) | Token::BlockComment(_) => continue,
|
|
||||||
unexpected => {
|
unexpected => {
|
||||||
error!(@feedback, span, "unexpected {}", unexpected.name());
|
self.eat();
|
||||||
|
error!(
|
||||||
|
@self.feedback, token.span,
|
||||||
|
"unexpected {}", unexpected.name(),
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
par.push(Spanned::new(node, span));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
if let (Some(first), Some(last)) = (par.first(), par.last()) {
|
||||||
@ -75,31 +105,17 @@ pub fn parse(src: &str, offset: Pos) -> Pass<SyntaxTree> {
|
|||||||
tree.push(Spanned::new(node, span));
|
tree.push(Spanned::new(node, span));
|
||||||
}
|
}
|
||||||
|
|
||||||
Pass::new(tree, feedback)
|
tree
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct FuncParser<'s> {
|
// Function calls.
|
||||||
tokens: Tokens<'s>,
|
impl Parser<'_> {
|
||||||
peeked: Option<Option<Spanned<Token<'s>>>>,
|
fn parse_bracket_call(&mut self) -> Spanned<CallExpr> {
|
||||||
body: Option<Spanned<&'s str>>,
|
self.start_group(Delimiter::Bracket);
|
||||||
feedback: Feedback,
|
self.tokens.push_mode(TokenMode::Header);
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> FuncParser<'s> {
|
|
||||||
fn new(header: &'s str, body: Option<Spanned<&'s str>>) -> Self {
|
|
||||||
Self {
|
|
||||||
// Start at column 1 because the opening bracket is also part of
|
|
||||||
// the function, but not part of the `header` string.
|
|
||||||
tokens: Tokens::new(header, Pos::new(0, 1), TokenMode::Header),
|
|
||||||
peeked: None,
|
|
||||||
body,
|
|
||||||
feedback: Feedback::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse(mut self) -> Pass<CallExpr> {
|
|
||||||
let after_bracket = self.pos();
|
let after_bracket = self.pos();
|
||||||
|
|
||||||
self.skip_white();
|
self.skip_white();
|
||||||
let name = self.parse_ident().unwrap_or_else(|| {
|
let name = self.parse_ident().unwrap_or_else(|| {
|
||||||
self.expected_found_or_at("function name", after_bracket);
|
self.expected_found_or_at("function name", after_bracket);
|
||||||
@ -107,36 +123,105 @@ impl<'s> FuncParser<'s> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
self.skip_white();
|
self.skip_white();
|
||||||
let mut args = match self.eat().map(Spanned::value) {
|
let mut args = match self.eatv() {
|
||||||
Some(Token::Colon) => self.parse_table(false).0.v,
|
Some(Token::Colon) => self.parse_table_contents().0,
|
||||||
Some(_) => {
|
Some(_) => {
|
||||||
self.expected_at("colon", name.span.end);
|
self.expected_at("colon", name.span.end);
|
||||||
|
while self.eat().is_some() {}
|
||||||
TableExpr::new()
|
TableExpr::new()
|
||||||
}
|
}
|
||||||
None => TableExpr::new(),
|
None => TableExpr::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(body) = self.body {
|
self.tokens.pop_mode();
|
||||||
args.push(SpannedEntry::val(body.map(|src| {
|
let mut span = self.end_group();
|
||||||
let parsed = parse(src, body.span.start);
|
|
||||||
self.feedback.extend(parsed.feedback);
|
if self.check(Token::LeftBracket) {
|
||||||
Expr::Tree(parsed.output)
|
self.start_group(Delimiter::Bracket);
|
||||||
})));
|
self.tokens.push_mode(TokenMode::Body);
|
||||||
|
|
||||||
|
let body = self.parse_body_contents();
|
||||||
|
|
||||||
|
self.tokens.pop_mode();
|
||||||
|
let body_span = self.end_group();
|
||||||
|
|
||||||
|
let expr = Expr::Tree(body);
|
||||||
|
args.push(SpannedEntry::val(Spanned::new(expr, body_span)));
|
||||||
|
span.expand(body_span);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pass::new(CallExpr { name, args }, self.feedback)
|
Spanned::new(CallExpr { name, args }, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_paren_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
|
||||||
|
self.start_group(Delimiter::Paren);
|
||||||
|
let args = self.parse_table_contents().0;
|
||||||
|
let args_span = self.end_group();
|
||||||
|
let span = Span::merge(name.span, args_span);
|
||||||
|
Spanned::new(CallExpr { name, args }, span)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parsing expressions and values
|
// Tables.
|
||||||
impl FuncParser<'_> {
|
impl Parser<'_> {
|
||||||
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
|
fn parse_table_contents(&mut self) -> (TableExpr, bool) {
|
||||||
self.peek().and_then(|token| match token.v {
|
let mut table = TableExpr::new();
|
||||||
Token::Ident(id) => self.eat_span(Ident(id.to_string())),
|
let mut comma_and_keyless = true;
|
||||||
_ => None,
|
|
||||||
})
|
while { self.skip_white(); !self.eof() } {
|
||||||
|
let (key, val) = if let Some(ident) = self.parse_ident() {
|
||||||
|
self.skip_white();
|
||||||
|
|
||||||
|
match self.peekv() {
|
||||||
|
Some(Token::Equals) => {
|
||||||
|
self.eat();
|
||||||
|
self.skip_white();
|
||||||
|
|
||||||
|
(Some(ident), try_opt_or!(self.parse_expr(), {
|
||||||
|
self.expected("value");
|
||||||
|
continue;
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some(Token::LeftParen) => {
|
||||||
|
let call = self.parse_paren_call(ident);
|
||||||
|
(None, call.map(|c| Expr::Call(c)))
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => (None, ident.map(|id| Expr::Ident(id)))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
(None, try_opt_or!(self.parse_expr(), {
|
||||||
|
self.expected("value");
|
||||||
|
continue;
|
||||||
|
}))
|
||||||
|
};
|
||||||
|
|
||||||
|
let behind = val.span.end;
|
||||||
|
if let Some(key) = key {
|
||||||
|
comma_and_keyless = false;
|
||||||
|
table.insert(key.v.0, SpannedEntry::new(key.span, val));
|
||||||
|
self.feedback.decorations
|
||||||
|
.push(Spanned::new(Decoration::TableKey, key.span));
|
||||||
|
} else {
|
||||||
|
table.push(SpannedEntry::val(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
if { self.skip_white(); self.eof() } {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect_at(Token::Comma, behind);
|
||||||
|
comma_and_keyless = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let coercable = comma_and_keyless && !table.is_empty();
|
||||||
|
(table, coercable)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expressions and values.
|
||||||
|
impl Parser<'_> {
|
||||||
fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
|
fn parse_expr(&mut self) -> Option<Spanned<Expr>> {
|
||||||
self.parse_binops("summand", Self::parse_term, |token| match token {
|
self.parse_binops("summand", Self::parse_term, |token| match token {
|
||||||
Token::Plus => Some(Expr::Add),
|
Token::Plus => Some(Expr::Add),
|
||||||
@ -206,37 +291,37 @@ impl FuncParser<'_> {
|
|||||||
|
|
||||||
fn parse_value(&mut self) -> Option<Spanned<Expr>> {
|
fn parse_value(&mut self) -> Option<Spanned<Expr>> {
|
||||||
let Spanned { v: token, span } = self.peek()?;
|
let Spanned { v: token, span } = self.peek()?;
|
||||||
match token {
|
Some(match token {
|
||||||
// This could be a function call or an identifier.
|
// This could be a function call or an identifier.
|
||||||
Token::Ident(id) => {
|
Token::Ident(id) => {
|
||||||
let name = Spanned::new(Ident(id.to_string()), span);
|
let name = Spanned::new(Ident(id.to_string()), span);
|
||||||
self.eat();
|
self.eat();
|
||||||
self.skip_white();
|
self.skip_white();
|
||||||
Some(if self.check(Token::LeftParen) {
|
if self.check(Token::LeftParen) {
|
||||||
self.parse_func_call(name).map(|call| Expr::Call(call))
|
self.parse_paren_call(name).map(|call| Expr::Call(call))
|
||||||
} else {
|
} else {
|
||||||
name.map(|id| Expr::Ident(id))
|
name.map(|id| Expr::Ident(id))
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Str { string, terminated } => {
|
Token::Str { string, terminated } => {
|
||||||
if !terminated {
|
if !terminated {
|
||||||
self.expected_at("quote", span.end);
|
self.expected_at("quote", span.end);
|
||||||
}
|
}
|
||||||
self.eat_span(Expr::Str(unescape_string(string)))
|
self.with_span(Expr::Str(unescape_string(string)))
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Bool(b) => self.eat_span(Expr::Bool(b)),
|
Token::Bool(b) => self.with_span(Expr::Bool(b)),
|
||||||
Token::Number(n) => self.eat_span(Expr::Number(n)),
|
Token::Number(n) => self.with_span(Expr::Number(n)),
|
||||||
Token::Length(s) => self.eat_span(Expr::Length(s)),
|
Token::Length(s) => self.with_span(Expr::Length(s)),
|
||||||
Token::Hex(s) => {
|
Token::Hex(s) => {
|
||||||
if let Ok(color) = RgbaColor::from_str(s) {
|
if let Ok(color) = RgbaColor::from_str(s) {
|
||||||
self.eat_span(Expr::Color(color))
|
self.with_span(Expr::Color(color))
|
||||||
} else {
|
} else {
|
||||||
// Heal color by assuming black.
|
// Heal color by assuming black.
|
||||||
error!(@self.feedback, span, "invalid color");
|
error!(@self.feedback, span, "invalid color");
|
||||||
let healed = RgbaColor::new_healed(0, 0, 0, 255);
|
let healed = RgbaColor::new_healed(0, 0, 0, 255);
|
||||||
self.eat_span(Expr::Color(healed))
|
self.with_span(Expr::Color(healed))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -244,128 +329,54 @@ impl FuncParser<'_> {
|
|||||||
// a table in any case and coerce the table into a value if it is
|
// a table in any case and coerce the table into a value if it is
|
||||||
// coercable (length 1 and no trailing comma).
|
// coercable (length 1 and no trailing comma).
|
||||||
Token::LeftParen => {
|
Token::LeftParen => {
|
||||||
let (table, coercable) = self.parse_table(true);
|
self.start_group(Delimiter::Paren);
|
||||||
Some(if coercable {
|
let (table, coercable) = self.parse_table_contents();
|
||||||
table.map(|v| {
|
let span = self.end_group();
|
||||||
v.into_values()
|
|
||||||
|
let expr = if coercable {
|
||||||
|
table.into_values()
|
||||||
.next()
|
.next()
|
||||||
.expect("table is coercable").val.v
|
.expect("table is coercable").val.v
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
table.map(|tab| Expr::Table(tab))
|
Expr::Table(table)
|
||||||
|
};
|
||||||
|
|
||||||
|
Spanned::new(expr, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is a content expression.
|
||||||
|
Token::LeftBrace => {
|
||||||
|
self.start_group(Delimiter::Brace);
|
||||||
|
self.tokens.push_mode(TokenMode::Body);
|
||||||
|
|
||||||
|
let tree = self.parse_body_contents();
|
||||||
|
|
||||||
|
self.tokens.pop_mode();
|
||||||
|
let span = self.end_group();
|
||||||
|
Spanned::new(Expr::Tree(tree), span)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is a bracketed function call.
|
||||||
|
Token::LeftBracket => {
|
||||||
|
let call = self.parse_bracket_call();
|
||||||
|
let tree = vec![call.map(|c| SyntaxNode::Call(c))];
|
||||||
|
Spanned::new(Expr::Tree(tree), span)
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => return None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_ident(&mut self) -> Option<Spanned<Ident>> {
|
||||||
|
self.peek().and_then(|token| match token.v {
|
||||||
|
Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
})
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_func_call(&mut self, name: Spanned<Ident>) -> Spanned<CallExpr> {
|
|
||||||
let args = self.parse_table(true).0;
|
|
||||||
let span = Span::merge(name.span, args.span);
|
|
||||||
Spanned::new(CallExpr { name, args: args.v }, span)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set `parens` to true, when this should expect an opening paren and stop
|
|
||||||
/// at the balanced closing paren (this is the case for normal tables and
|
|
||||||
/// round-paren function calls). Set it to false, when this is used to parse
|
|
||||||
/// the top-level function arguments.
|
|
||||||
///
|
|
||||||
/// The returned boolean tells you whether the table can be coerced into an
|
|
||||||
/// expression (this is the case when it's length 1 and has no trailing
|
|
||||||
/// comma).
|
|
||||||
fn parse_table(&mut self, parens: bool) -> (Spanned<TableExpr>, bool) {
|
|
||||||
let start = self.pos();
|
|
||||||
if parens {
|
|
||||||
self.assert(Token::LeftParen);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut table = TableExpr::new();
|
|
||||||
let mut coercable = true;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
self.skip_white();
|
|
||||||
if self.eof() || (parens && self.check(Token::RightParen)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
let behind_arg;
|
|
||||||
|
|
||||||
if let Some(ident) = self.parse_ident() {
|
|
||||||
// This could be a keyword argument, a function call or a simple
|
|
||||||
// identifier.
|
|
||||||
self.skip_white();
|
|
||||||
|
|
||||||
if self.check_eat(Token::Equals).is_some() {
|
|
||||||
self.skip_white();
|
|
||||||
|
|
||||||
let key = ident;
|
|
||||||
self.feedback.decorations
|
|
||||||
.push(Spanned::new(Decoration::TableKey, key.span));
|
|
||||||
|
|
||||||
let val = try_opt_or!(self.parse_expr(), {
|
|
||||||
self.expected("value");
|
|
||||||
continue;
|
|
||||||
});
|
|
||||||
|
|
||||||
coercable = false;
|
|
||||||
behind_arg = val.span.end;
|
|
||||||
table.insert(key.v.0, SpannedEntry::new(key.span, val));
|
|
||||||
|
|
||||||
} else if self.check(Token::LeftParen) {
|
|
||||||
let call = self.parse_func_call(ident);
|
|
||||||
let expr = call.map(|call| Expr::Call(call));
|
|
||||||
|
|
||||||
behind_arg = expr.span.end;
|
|
||||||
table.push(SpannedEntry::val(expr));
|
|
||||||
} else {
|
|
||||||
let expr = ident.map(|id| Expr::Ident(id));
|
|
||||||
|
|
||||||
behind_arg = expr.span.end;
|
|
||||||
table.push(SpannedEntry::val(expr));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// It's a positional argument.
|
|
||||||
let expr = try_opt_or!(self.parse_expr(), {
|
|
||||||
self.expected("value");
|
|
||||||
continue;
|
|
||||||
});
|
|
||||||
behind_arg = expr.span.end;
|
|
||||||
table.push(SpannedEntry::val(expr));
|
|
||||||
}
|
|
||||||
|
|
||||||
self.skip_white();
|
|
||||||
if self.eof() || (parens && self.check(Token::RightParen)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.expect_at(Token::Comma, behind_arg);
|
|
||||||
coercable = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if parens {
|
|
||||||
self.expect(Token::RightParen);
|
|
||||||
}
|
|
||||||
|
|
||||||
coercable = coercable && !table.is_empty();
|
|
||||||
|
|
||||||
let end = self.pos();
|
|
||||||
(Spanned::new(table, Span::new(start, end)), coercable)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Error handling
|
// Error handling.
|
||||||
impl FuncParser<'_> {
|
impl Parser<'_> {
|
||||||
fn expect(&mut self, token: Token<'_>) -> bool {
|
|
||||||
if self.check(token) {
|
|
||||||
self.eat();
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
self.expected(token.name());
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
|
fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool {
|
||||||
if self.check(token) {
|
if self.check(token) {
|
||||||
self.eat();
|
self.eat();
|
||||||
@ -400,40 +411,58 @@ impl FuncParser<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parsing primitives
|
// Parsing primitives.
|
||||||
impl<'s> FuncParser<'s> {
|
impl<'s> Parser<'s> {
|
||||||
|
fn start_group(&mut self, delimiter: Delimiter) {
|
||||||
|
let start = self.pos();
|
||||||
|
self.assert(delimiter.start());
|
||||||
|
self.delimiters.push((start, delimiter.end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_group(&mut self) -> Span {
|
||||||
|
assert_eq!(self.peek(), None, "unfinished group");
|
||||||
|
let (start, end_token) = self.delimiters.pop()
|
||||||
|
.expect("group was not started");
|
||||||
|
|
||||||
|
match self.peeked.unwrap() {
|
||||||
|
Some(token) if token.v == end_token => {
|
||||||
|
self.peeked = None;
|
||||||
|
Span::new(start, token.span.end)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let end = self.pos();
|
||||||
|
error!(
|
||||||
|
@self.feedback, Span::at(end),
|
||||||
|
"expected {}", end_token.name(),
|
||||||
|
);
|
||||||
|
Span::new(start, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn skip_white(&mut self) {
|
fn skip_white(&mut self) {
|
||||||
loop {
|
while matches!(
|
||||||
match self.peek().map(Spanned::value) {
|
self.peekv(),
|
||||||
Some(Token::Space(_))
|
Some(Token::Space(_)) |
|
||||||
| Some(Token::LineComment(_))
|
Some(Token::LineComment(_)) |
|
||||||
| Some(Token::BlockComment(_)) => { self.eat(); }
|
Some(Token::BlockComment(_))
|
||||||
_ => break,
|
) {
|
||||||
}
|
self.eat();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
fn eatv(&mut self) -> Option<Token<'s>> {
|
||||||
self.peeked.take().unwrap_or_else(|| self.tokens.next())
|
self.eat().map(Spanned::value)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eat_span<T>(&mut self, v: T) -> Option<Spanned<T>> {
|
fn peekv(&mut self) -> Option<Token<'s>> {
|
||||||
self.eat().map(|spanned| spanned.map(|_| v))
|
self.peek().map(Spanned::value)
|
||||||
}
|
|
||||||
|
|
||||||
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
|
|
||||||
let tokens = &mut self.tokens;
|
|
||||||
*self.peeked.get_or_insert_with(|| tokens.next())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert(&mut self, token: Token<'_>) {
|
fn assert(&mut self, token: Token<'_>) {
|
||||||
assert!(self.check_eat(token).is_some());
|
assert!(self.check_eat(token).is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check(&mut self, token: Token<'_>) -> bool {
|
|
||||||
self.peek().map(Spanned::value) == Some(token)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
|
fn check_eat(&mut self, token: Token<'_>) -> Option<Spanned<Token<'s>>> {
|
||||||
if self.check(token) {
|
if self.check(token) {
|
||||||
self.eat()
|
self.eat()
|
||||||
@ -442,10 +471,39 @@ impl<'s> FuncParser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn check(&mut self, token: Token<'_>) -> bool {
|
||||||
|
self.peekv() == Some(token)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_span<T>(&mut self, v: T) -> Spanned<T> {
|
||||||
|
let span = self.eat().expect("expected token").span;
|
||||||
|
Spanned::new(v, span)
|
||||||
|
}
|
||||||
|
|
||||||
fn eof(&mut self) -> bool {
|
fn eof(&mut self) -> bool {
|
||||||
self.peek().is_none()
|
self.peek().is_none()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||||
|
let token = self.peek()?;
|
||||||
|
self.peeked = None;
|
||||||
|
Some(token)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||||
|
let tokens = &mut self.tokens;
|
||||||
|
let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?;
|
||||||
|
|
||||||
|
// Check for unclosed groups.
|
||||||
|
if Delimiter::is_delimiter(token.v) {
|
||||||
|
if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(token)
|
||||||
|
}
|
||||||
|
|
||||||
fn pos(&self) -> Pos {
|
fn pos(&self) -> Pos {
|
||||||
self.peeked
|
self.peeked
|
||||||
.flatten()
|
.flatten()
|
||||||
@ -454,6 +512,38 @@ impl<'s> FuncParser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
|
enum Delimiter {
|
||||||
|
Paren,
|
||||||
|
Bracket,
|
||||||
|
Brace,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Delimiter {
|
||||||
|
fn is_delimiter(token: Token<'_>) -> bool {
|
||||||
|
matches!(
|
||||||
|
token,
|
||||||
|
Token::RightParen | Token::RightBracket | Token::RightBrace
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start(self) -> Token<'static> {
|
||||||
|
match self {
|
||||||
|
Self::Paren => Token::LeftParen,
|
||||||
|
Self::Bracket => Token::LeftBracket,
|
||||||
|
Self::Brace => Token::LeftBrace,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end(self) -> Token<'static> {
|
||||||
|
match self {
|
||||||
|
Self::Paren => Token::RightParen,
|
||||||
|
Self::Bracket => Token::RightBracket,
|
||||||
|
Self::Brace => Token::RightBrace,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn unescape_string(string: &str) -> String {
|
fn unescape_string(string: &str) -> String {
|
||||||
let mut iter = string.chars();
|
let mut iter = string.chars();
|
||||||
let mut out = String::with_capacity(string.len());
|
let mut out = String::with_capacity(string.len());
|
||||||
@ -608,7 +698,7 @@ mod tests {
|
|||||||
macro_rules! test {
|
macro_rules! test {
|
||||||
(@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
|
(@spans=$spans:expr, $src:expr => $($tts:tt)*) => {
|
||||||
let exp = Tree![@$($tts)*];
|
let exp = Tree![@$($tts)*];
|
||||||
let pass = parse($src, Pos::ZERO);
|
let pass = parse($src);
|
||||||
check($src, exp, pass.output, $spans);
|
check($src, exp, pass.output, $spans);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -624,7 +714,7 @@ mod tests {
|
|||||||
macro_rules! e {
|
macro_rules! e {
|
||||||
($src:expr => $($tts:tt)*) => {
|
($src:expr => $($tts:tt)*) => {
|
||||||
let exp = vec![$($tts)*];
|
let exp = vec![$($tts)*];
|
||||||
let pass = parse($src, Pos::ZERO);
|
let pass = parse($src);
|
||||||
let found = pass.feedback.diagnostics.iter()
|
let found = pass.feedback.diagnostics.iter()
|
||||||
.map(|s| s.as_ref().map(|e| e.message.as_str()))
|
.map(|s| s.as_ref().map(|e| e.message.as_str()))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
@ -636,7 +726,7 @@ mod tests {
|
|||||||
macro_rules! d {
|
macro_rules! d {
|
||||||
($src:expr => $($tts:tt)*) => {
|
($src:expr => $($tts:tt)*) => {
|
||||||
let exp = vec![$($tts)*];
|
let exp = vec![$($tts)*];
|
||||||
let pass = parse($src, Pos::ZERO);
|
let pass = parse($src);
|
||||||
check($src, exp, pass.feedback.decorations, true);
|
check($src, exp, pass.feedback.decorations, true);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -717,6 +807,15 @@ mod tests {
|
|||||||
e!("[val : 12, /* \n */ 14]" => );
|
e!("[val : 12, /* \n */ 14]" => );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_groups() {
|
||||||
|
e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"),
|
||||||
|
s(0,2, 0,2, "expected closing bracket"));
|
||||||
|
|
||||||
|
e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"),
|
||||||
|
s(0,5, 0,6, "unexpected closing brace"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_function_names() {
|
fn test_parse_function_names() {
|
||||||
// No closing bracket.
|
// No closing bracket.
|
||||||
@ -760,19 +859,29 @@ mod tests {
|
|||||||
t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]);
|
t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]);
|
||||||
e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
|
e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment"));
|
||||||
|
|
||||||
|
// Raw in body.
|
||||||
|
t!("[val][`Hi]`" => P![F!("val"; Tree![P![R!["Hi]"]]])]);
|
||||||
|
e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket"));
|
||||||
|
|
||||||
|
// Crazy.
|
||||||
|
t!("[v][[v][v][v]]" => P![F!("v"; Tree![P![
|
||||||
|
F!("v"; Tree![P![T("v")]]), F!("v")
|
||||||
|
]])]);
|
||||||
|
|
||||||
// Spanned.
|
// Spanned.
|
||||||
ts!(" [box][Oh my]" => s(0,0, 0,13, P![
|
ts!(" [box][Oh my]" => s(0,0, 0,13, P![
|
||||||
s(0,0, 0,1, S),
|
s(0,0, 0,1, S),
|
||||||
s(0,1, 0,13, F!(s(0,1, 0,4, "box");
|
s(0,1, 0,13, F!(s(0,2, 0,5, "box");
|
||||||
s(0,6, 0,11, Tree![s(0,6, 0,11, P![
|
s(0,6, 0,13, Tree![s(0,7, 0,12, P![
|
||||||
s(0,6, 0,8, T("Oh")), s(0,8, 0,9, S), s(0,9, 0,11, T("my"))
|
s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my"))
|
||||||
])])
|
])])
|
||||||
))
|
))
|
||||||
]));
|
]));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_simple_values() {
|
fn test_parse_values() {
|
||||||
|
// Simple.
|
||||||
v!("_" => Id("_"));
|
v!("_" => Id("_"));
|
||||||
v!("name" => Id("name"));
|
v!("name" => Id("name"));
|
||||||
v!("α" => Id("α"));
|
v!("α" => Id("α"));
|
||||||
@ -787,6 +896,12 @@ mod tests {
|
|||||||
v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
|
v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00)));
|
||||||
v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
|
v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string"));
|
||||||
|
|
||||||
|
// Content.
|
||||||
|
v!("{_hi_}" => Tree![P![I, T("hi"), I]]);
|
||||||
|
e!("[val: {_hi_}]" => );
|
||||||
|
v!("[hi]" => Tree![F!["hi"]]);
|
||||||
|
e!("[val: [hi]]" => );
|
||||||
|
|
||||||
// Healed colors.
|
// Healed colors.
|
||||||
v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
|
v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff)));
|
||||||
e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
|
e!("[val: #12345]" => s(0,6, 0,12, "invalid color"));
|
||||||
@ -925,7 +1040,7 @@ mod tests {
|
|||||||
v!("(\x07 abc,)" => Table![Id("abc")]);
|
v!("(\x07 abc,)" => Table![Id("abc")]);
|
||||||
e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
|
e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token"));
|
||||||
e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
|
e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma"));
|
||||||
e!("[val: [hi]]" => s(0,6, 0,10, "expected value, found function"));
|
e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren"));
|
||||||
|
|
||||||
// Expected comma.
|
// Expected comma.
|
||||||
v!("(true false)" => Table![Bool(true), Bool(false)]);
|
v!("(true false)" => Table![Bool(true), Bool(false)]);
|
||||||
|
@ -22,27 +22,10 @@ pub enum Token<'s> {
|
|||||||
/// can contain nested block comments.
|
/// can contain nested block comments.
|
||||||
BlockComment(&'s str),
|
BlockComment(&'s str),
|
||||||
|
|
||||||
/// A function invocation.
|
/// A left bracket starting a function invocation or body: `[`.
|
||||||
Function {
|
LeftBracket,
|
||||||
/// The header string:
|
/// A right bracket ending a function invocation or body: `]`.
|
||||||
/// ```typst
|
RightBracket,
|
||||||
/// [header: args][body]
|
|
||||||
/// ^^^^^^^^^^^^
|
|
||||||
/// ```
|
|
||||||
header: &'s str,
|
|
||||||
/// The spanned body string:
|
|
||||||
/// ```typst
|
|
||||||
/// [header][hello *world*]
|
|
||||||
/// ^^^^^^^^^^^^^
|
|
||||||
/// ^-- The span is relative to right before this bracket
|
|
||||||
/// ```
|
|
||||||
body: Option<Spanned<&'s str>>,
|
|
||||||
/// Whether the last closing bracket was present.
|
|
||||||
/// - `[func]` or `[func][body]` => terminated
|
|
||||||
/// - `[func` or `[func][body` => not terminated
|
|
||||||
terminated: bool,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// A left parenthesis in a function header: `(`.
|
/// A left parenthesis in a function header: `(`.
|
||||||
LeftParen,
|
LeftParen,
|
||||||
/// A right parenthesis in a function header: `)`.
|
/// A right parenthesis in a function header: `)`.
|
||||||
@ -119,7 +102,8 @@ impl<'s> Token<'s> {
|
|||||||
Space(_) => "space",
|
Space(_) => "space",
|
||||||
LineComment(_) => "line comment",
|
LineComment(_) => "line comment",
|
||||||
BlockComment(_) => "block comment",
|
BlockComment(_) => "block comment",
|
||||||
Function { .. } => "function",
|
LeftBracket => "opening bracket",
|
||||||
|
RightBracket => "closing bracket",
|
||||||
LeftParen => "opening paren",
|
LeftParen => "opening paren",
|
||||||
RightParen => "closing paren",
|
RightParen => "closing paren",
|
||||||
LeftBrace => "opening brace",
|
LeftBrace => "opening brace",
|
||||||
@ -141,7 +125,6 @@ impl<'s> Token<'s> {
|
|||||||
Backslash => "backslash",
|
Backslash => "backslash",
|
||||||
Raw { .. } => "raw text",
|
Raw { .. } => "raw text",
|
||||||
Text(_) => "text",
|
Text(_) => "text",
|
||||||
Invalid("]") => "closing bracket",
|
|
||||||
Invalid("*/") => "end of block comment",
|
Invalid("*/") => "end of block comment",
|
||||||
Invalid(_) => "invalid token",
|
Invalid(_) => "invalid token",
|
||||||
}
|
}
|
||||||
@ -152,8 +135,9 @@ impl<'s> Token<'s> {
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Tokens<'s> {
|
pub struct Tokens<'s> {
|
||||||
src: &'s str,
|
src: &'s str,
|
||||||
mode: TokenMode,
|
|
||||||
iter: Peekable<Chars<'s>>,
|
iter: Peekable<Chars<'s>>,
|
||||||
|
mode: TokenMode,
|
||||||
|
stack: Vec<TokenMode>,
|
||||||
pos: Pos,
|
pos: Pos,
|
||||||
index: usize,
|
index: usize,
|
||||||
}
|
}
|
||||||
@ -172,16 +156,29 @@ impl<'s> Tokens<'s> {
|
|||||||
///
|
///
|
||||||
/// The first token's span starts an the given `offset` position instead of
|
/// The first token's span starts an the given `offset` position instead of
|
||||||
/// the zero position.
|
/// the zero position.
|
||||||
pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self {
|
pub fn new(src: &'s str, mode: TokenMode) -> Self {
|
||||||
Self {
|
Self {
|
||||||
src,
|
src,
|
||||||
mode,
|
|
||||||
iter: src.chars().peekable(),
|
iter: src.chars().peekable(),
|
||||||
pos: offset,
|
mode,
|
||||||
|
stack: vec![],
|
||||||
|
pos: Pos::ZERO,
|
||||||
index: 0,
|
index: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Change the token mode and push the old one on a stack.
|
||||||
|
pub fn push_mode(&mut self, mode: TokenMode) {
|
||||||
|
self.stack.push(self.mode);
|
||||||
|
self.mode = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pop the old token mode from the stack. This panics if there is no mode
|
||||||
|
/// on the stack.
|
||||||
|
pub fn pop_mode(&mut self) {
|
||||||
|
self.mode = self.stack.pop().expect("no pushed mode");
|
||||||
|
}
|
||||||
|
|
||||||
/// The index in the string at which the last token ends and next token will
|
/// The index in the string at which the last token ends and next token will
|
||||||
/// start.
|
/// start.
|
||||||
pub fn index(&self) -> usize {
|
pub fn index(&self) -> usize {
|
||||||
@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> {
|
|||||||
// Whitespace.
|
// Whitespace.
|
||||||
c if c.is_whitespace() => self.read_whitespace(start),
|
c if c.is_whitespace() => self.read_whitespace(start),
|
||||||
|
|
||||||
// Functions.
|
// Functions and blocks.
|
||||||
'[' => self.read_function(start),
|
'[' => LeftBracket,
|
||||||
']' => Invalid("]"),
|
']' => RightBracket,
|
||||||
|
'{' => LeftBrace,
|
||||||
|
'}' => RightBrace,
|
||||||
|
|
||||||
// Syntactic elements in function headers.
|
// Syntactic elements in function headers.
|
||||||
'(' if self.mode == Header => LeftParen,
|
'(' if self.mode == Header => LeftParen,
|
||||||
')' if self.mode == Header => RightParen,
|
')' if self.mode == Header => RightParen,
|
||||||
'{' if self.mode == Header => LeftBrace,
|
|
||||||
'}' if self.mode == Header => RightBrace,
|
|
||||||
':' if self.mode == Header => Colon,
|
':' if self.mode == Header => Colon,
|
||||||
',' if self.mode == Header => Comma,
|
',' if self.mode == Header => Comma,
|
||||||
'=' if self.mode == Header => Equals,
|
'=' if self.mode == Header => Equals,
|
||||||
@ -322,52 +319,6 @@ impl<'s> Tokens<'s> {
|
|||||||
Space(end.line - start.line)
|
Space(end.line - start.line)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_function(&mut self, start: Pos) -> Token<'s> {
|
|
||||||
let (header, terminated) = self.read_function_part(Header);
|
|
||||||
self.eat();
|
|
||||||
|
|
||||||
if self.peek() != Some('[') {
|
|
||||||
return Function { header, body: None, terminated };
|
|
||||||
}
|
|
||||||
|
|
||||||
self.eat();
|
|
||||||
|
|
||||||
let body_start = self.pos() - start;
|
|
||||||
let (body, terminated) = self.read_function_part(Body);
|
|
||||||
let body_end = self.pos() - start;
|
|
||||||
let span = Span::new(body_start, body_end);
|
|
||||||
|
|
||||||
self.eat();
|
|
||||||
|
|
||||||
Function { header, body: Some(Spanned { v: body, span }), terminated }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) {
|
|
||||||
let start = self.index();
|
|
||||||
let mut terminated = false;
|
|
||||||
|
|
||||||
while let Some(n) = self.peek() {
|
|
||||||
if n == ']' {
|
|
||||||
terminated = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.eat();
|
|
||||||
match n {
|
|
||||||
'[' => { self.read_function(Pos::ZERO); }
|
|
||||||
'/' if self.peek() == Some('/') => { self.read_line_comment(); }
|
|
||||||
'/' if self.peek() == Some('*') => { self.read_block_comment(); }
|
|
||||||
'"' if mode == Header => { self.read_string(); }
|
|
||||||
'`' if mode == Body => { self.read_raw(); }
|
|
||||||
'\\' => { self.eat(); }
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let end = self.index();
|
|
||||||
(&self.src[start..end], terminated)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_string(&mut self) -> Token<'s> {
|
fn read_string(&mut self) -> Token<'s> {
|
||||||
let (string, terminated) = self.read_until_unescaped('"');
|
let (string, terminated) = self.read_until_unescaped('"');
|
||||||
Str { string, terminated }
|
Str { string, terminated }
|
||||||
@ -540,6 +491,7 @@ mod tests {
|
|||||||
use Token::{
|
use Token::{
|
||||||
Space as S,
|
Space as S,
|
||||||
LineComment as LC, BlockComment as BC,
|
LineComment as LC, BlockComment as BC,
|
||||||
|
LeftBracket as L, RightBracket as R,
|
||||||
LeftParen as LP, RightParen as RP,
|
LeftParen as LP, RightParen as RP,
|
||||||
LeftBrace as LB, RightBrace as RB,
|
LeftBrace as LB, RightBrace as RB,
|
||||||
Ident as Id,
|
Ident as Id,
|
||||||
@ -557,25 +509,12 @@ mod tests {
|
|||||||
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
|
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
|
||||||
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
|
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
|
||||||
|
|
||||||
macro_rules! F {
|
|
||||||
($h:expr, None, $t:expr) => {
|
|
||||||
Token::Function { header: $h, body: None, terminated: $t }
|
|
||||||
};
|
|
||||||
($h:expr, $b:expr, $t:expr) => {
|
|
||||||
Token::Function {
|
|
||||||
header: $h,
|
|
||||||
body: Some(Into::<Spanned<&str>>::into($b)),
|
|
||||||
terminated: $t,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
|
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
|
||||||
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
|
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
|
||||||
macro_rules! test {
|
macro_rules! test {
|
||||||
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
|
(@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => {
|
||||||
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
|
let exp = vec![$(Into::<Spanned<Token>>::into($token)),*];
|
||||||
let found = Tokens::new($src, Pos::ZERO, $mode).collect::<Vec<_>>();
|
let found = Tokens::new($src, $mode).collect::<Vec<_>>();
|
||||||
check($src, exp, found, $spans);
|
check($src, exp, found, $spans);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -616,7 +555,7 @@ mod tests {
|
|||||||
fn tokenize_body_only_tokens() {
|
fn tokenize_body_only_tokens() {
|
||||||
t!(Body, "_*" => Underscore, Star);
|
t!(Body, "_*" => Underscore, Star);
|
||||||
t!(Body, "***" => Star, Star, Star);
|
t!(Body, "***" => Star, Star, Star);
|
||||||
t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star);
|
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
|
||||||
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
|
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
|
||||||
t!(Body, "`raw`" => Raw("raw", true));
|
t!(Body, "`raw`" => Raw("raw", true));
|
||||||
t!(Body, "`[func]`" => Raw("[func]", true));
|
t!(Body, "`[func]`" => Raw("[func]", true));
|
||||||
@ -674,50 +613,6 @@ mod tests {
|
|||||||
t!(Header, "\"🌎\"" => Str("🌎", true));
|
t!(Header, "\"🌎\"" => Str("🌎", true));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn tokenize_functions() {
|
|
||||||
t!(Body, "a[f]" => T("a"), F!("f", None, true));
|
|
||||||
t!(Body, "[f]a" => F!("f", None, true), T("a"));
|
|
||||||
t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true));
|
|
||||||
t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a"));
|
|
||||||
t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true));
|
|
||||||
t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(","));
|
|
||||||
t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true));
|
|
||||||
t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true));
|
|
||||||
t!(Header, "[" => F!("", None, false));
|
|
||||||
t!(Header, "]" => Invalid("]"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn tokenize_correct_end_of_function() {
|
|
||||||
// End of function with strings and carets in headers
|
|
||||||
t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false));
|
|
||||||
t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true));
|
|
||||||
t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true));
|
|
||||||
t!(Body, "[f: `]" => F!("f: `", None, true));
|
|
||||||
|
|
||||||
// End of function with strings and carets in bodies
|
|
||||||
t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true));
|
|
||||||
t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true));
|
|
||||||
t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false));
|
|
||||||
t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true));
|
|
||||||
t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true));
|
|
||||||
t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false));
|
|
||||||
t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true));
|
|
||||||
t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false));
|
|
||||||
t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true));
|
|
||||||
|
|
||||||
// End of function with comments
|
|
||||||
t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false));
|
|
||||||
t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true));
|
|
||||||
t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true));
|
|
||||||
t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false));
|
|
||||||
|
|
||||||
// End of function with escaped brackets
|
|
||||||
t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true));
|
|
||||||
t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_escaped_symbols() {
|
fn tokenize_escaped_symbols() {
|
||||||
t!(Body, r"\\" => T(r"\"));
|
t!(Body, r"\\" => T(r"\"));
|
||||||
@ -746,7 +641,6 @@ mod tests {
|
|||||||
fn tokenize_with_spans() {
|
fn tokenize_with_spans() {
|
||||||
ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
|
ts!(Body, "hello" => s(0,0, 0,5, T("hello")));
|
||||||
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
|
ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c")));
|
||||||
ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star));
|
|
||||||
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
|
ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f")));
|
||||||
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
|
ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore));
|
||||||
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));
|
ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user