From 3cbca56a7195bb2a7996530d584300d697c11dc8 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 17 Aug 2020 16:25:09 +0200 Subject: [PATCH] =?UTF-8?q?Parse=20braced=20expressions=20and=20bracketed?= =?UTF-8?q?=20calls=20in=20headers=20=F0=9F=97=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Refactors the tokenizer to be lazy: It does not emit pre-parsed function tokens, but instead allows it's mode to be changed. The modes are tracked on a stack to allow nested compute/typesetting (pop/push). - Introduces delimited groups into the parser, which make it easy to parse delimited expressions without handling the delimiters in the parsing code for the group's content. A group is started with `start_group`. When reaching the group's end (matching delimiter) the eat and peek methods will simply return `None` instead of the delimiter, stopping the content parser and bubbling up the call stack until `end_group` is called to clear up the situation. --- benches/bench_parsing.rs | 5 +- src/compute/table.rs | 2 +- src/layout/tree.rs | 2 +- src/lib.rs | 2 +- src/syntax/parsing.rs | 603 +++++++++++++++++++++++---------------- src/syntax/tokens.rs | 170 +++-------- 6 files changed, 396 insertions(+), 388 deletions(-) diff --git a/benches/bench_parsing.rs b/benches/bench_parsing.rs index a3a17a84e..4a8a7eb2d 100644 --- a/benches/bench_parsing.rs +++ b/benches/bench_parsing.rs @@ -1,18 +1,17 @@ use criterion::{criterion_group, criterion_main, Criterion}; use typstc::syntax::parsing::parse; -use typstc::syntax::span::Pos; // 28 not too dense lines. const COMA: &str = include_str!("../tests/coma.typ"); fn parsing_benchmark(c: &mut Criterion) { c.bench_function("parse-coma-28-lines", |b| { - b.iter(|| parse(COMA, Pos::ZERO)) + b.iter(|| parse(COMA)) }); let long = COMA.repeat(100); c.bench_function("parse-coma-2800-lines", |b| { - b.iter(|| parse(&long, Pos::ZERO)) + b.iter(|| parse(&long)) }); } diff --git a/src/compute/table.rs b/src/compute/table.rs index f11eacfce..75effd60a 100644 --- a/src/compute/table.rs +++ b/src/compute/table.rs @@ -270,7 +270,7 @@ impl SpannedEntry { /// Create an entry with the same span for key and value. pub fn val(val: Spanned) -> Self { - Self { key: Span::ZERO, val } + Self { key: val.span, val } } /// Convert from `&SpannedEntry` to `SpannedEntry<&T>` diff --git a/src/layout/tree.rs b/src/layout/tree.rs index 39e111bdf..092ba5828 100644 --- a/src/layout/tree.rs +++ b/src/layout/tree.rs @@ -123,7 +123,7 @@ impl<'a> TreeLayouter<'a> { ..self.ctx }).await; - self.feedback.extend_offset(pass.feedback, call.span.start); + self.feedback.extend(pass.feedback); if let Value::Commands(commands) = pass.output { for command in commands { diff --git a/src/lib.rs b/src/lib.rs index e30e41b2e..301960348 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,7 +87,7 @@ impl Typesetter { /// Parse source code into a syntax tree. pub fn parse(&self, src: &str) -> Pass { - parse(src, Pos::ZERO) + parse(src) } /// Layout a syntax tree and return the produced layout. diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index 8ed778e15..8dd567d35 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -12,94 +12,110 @@ use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr}; use super::Ident; /// Parse a string of source code. -/// -/// All spans in the resulting tree and feedback are offset by the given -/// `offset` position. This is used to make spans of a function body relative to -/// the start of the function as a whole as opposed to the start of the -/// function's body. -pub fn parse(src: &str, offset: Pos) -> Pass { - let mut tree = SyntaxTree::new(); - let mut par = SyntaxTree::new(); - let mut feedback = Feedback::new(); - - for token in Tokens::new(src, offset, TokenMode::Body) { - let span = token.span; - let node = match token.v { - // Starting from two newlines counts as a paragraph break, a single - // newline does not. - Token::Space(newlines) => if newlines < 2 { - SyntaxNode::Spacing - } else { - // End the current paragraph if it is not empty. - if let (Some(first), Some(last)) = (par.first(), par.last()) { - let span = Span::merge(first.span, last.span); - let node = SyntaxNode::Par(std::mem::take(&mut par)); - tree.push(Spanned::new(node, span)); - } - continue; - } - - Token::Function { header, body, terminated } => { - let parsed = FuncParser::new(header, body).parse(); - feedback.extend_offset(parsed.feedback, span.start); - if !terminated { - error!(@feedback, Span::at(span.end), "expected closing bracket"); - } - SyntaxNode::Call(parsed.output) - } - - Token::Star => SyntaxNode::ToggleBolder, - Token::Underscore => SyntaxNode::ToggleItalic, - Token::Backslash => SyntaxNode::Linebreak, - Token::Raw { raw, terminated } => { - if !terminated { - error!(@feedback, Span::at(span.end), "expected backtick"); - } - SyntaxNode::Raw(unescape_raw(raw)) - } - Token::Text(text) => SyntaxNode::Text(text.to_string()), - - Token::LineComment(_) | Token::BlockComment(_) => continue, - unexpected => { - error!(@feedback, span, "unexpected {}", unexpected.name()); - continue; - } - }; - - par.push(Spanned::new(node, span)); - } - - if let (Some(first), Some(last)) = (par.first(), par.last()) { - let span = Span::merge(first.span, last.span); - let node = SyntaxNode::Par(par); - tree.push(Spanned::new(node, span)); - } - - Pass::new(tree, feedback) +pub fn parse(src: &str) -> Pass { + Parser::new(src).parse() } -struct FuncParser<'s> { +struct Parser<'s> { tokens: Tokens<'s>, peeked: Option>>>, - body: Option>, + delimiters: Vec<(Pos, Token<'static>)>, feedback: Feedback, } -impl<'s> FuncParser<'s> { - fn new(header: &'s str, body: Option>) -> Self { +impl<'s> Parser<'s> { + fn new(src: &'s str) -> Self { Self { - // Start at column 1 because the opening bracket is also part of - // the function, but not part of the `header` string. - tokens: Tokens::new(header, Pos::new(0, 1), TokenMode::Header), + tokens: Tokens::new(src, TokenMode::Body), peeked: None, - body, + delimiters: vec![], feedback: Feedback::new(), } } - fn parse(mut self) -> Pass { - let after_bracket = self.pos(); + fn parse(mut self) -> Pass { + let tree = self.parse_body_contents(); + Pass::new(tree, self.feedback) + } +} +// Typesetting content. +impl Parser<'_> { + fn parse_body_contents(&mut self) -> SyntaxTree { + let mut tree = SyntaxTree::new(); + let mut par = SyntaxTree::new(); + + while let Some(token) = self.peek() { + par.push(match token.v { + // Starting from two newlines counts as a paragraph break, a single + // newline does not. + Token::Space(newlines) => if newlines < 2 { + self.with_span(SyntaxNode::Spacing) + } else { + // End the current paragraph if it is not empty. + if let (Some(first), Some(last)) = (par.first(), par.last()) { + let span = Span::merge(first.span, last.span); + let node = SyntaxNode::Par(std::mem::take(&mut par)); + tree.push(Spanned::new(node, span)); + } + self.eat(); + continue; + } + Token::LineComment(_) | Token::BlockComment(_) => { + self.eat(); + continue + } + + Token::LeftBracket => { + self.parse_bracket_call().map(|c| SyntaxNode::Call(c)) + } + + Token::Star => self.with_span(SyntaxNode::ToggleBolder), + Token::Underscore => self.with_span(SyntaxNode::ToggleItalic), + Token::Backslash => self.with_span(SyntaxNode::Linebreak), + + Token::Raw { raw, terminated } => { + if !terminated { + error!( + @self.feedback, Span::at(token.span.end), + "expected backtick", + ); + } + self.with_span(SyntaxNode::Raw(unescape_raw(raw))) + } + + Token::Text(text) => { + self.with_span(SyntaxNode::Text(text.to_string())) + } + + unexpected => { + self.eat(); + error!( + @self.feedback, token.span, + "unexpected {}", unexpected.name(), + ); + continue; + } + }); + } + + if let (Some(first), Some(last)) = (par.first(), par.last()) { + let span = Span::merge(first.span, last.span); + let node = SyntaxNode::Par(par); + tree.push(Spanned::new(node, span)); + } + + tree + } +} + +// Function calls. +impl Parser<'_> { + fn parse_bracket_call(&mut self) -> Spanned { + self.start_group(Delimiter::Bracket); + self.tokens.push_mode(TokenMode::Header); + + let after_bracket = self.pos(); self.skip_white(); let name = self.parse_ident().unwrap_or_else(|| { self.expected_found_or_at("function name", after_bracket); @@ -107,36 +123,105 @@ impl<'s> FuncParser<'s> { }); self.skip_white(); - let mut args = match self.eat().map(Spanned::value) { - Some(Token::Colon) => self.parse_table(false).0.v, + let mut args = match self.eatv() { + Some(Token::Colon) => self.parse_table_contents().0, Some(_) => { self.expected_at("colon", name.span.end); + while self.eat().is_some() {} TableExpr::new() } None => TableExpr::new(), }; - if let Some(body) = self.body { - args.push(SpannedEntry::val(body.map(|src| { - let parsed = parse(src, body.span.start); - self.feedback.extend(parsed.feedback); - Expr::Tree(parsed.output) - }))); + self.tokens.pop_mode(); + let mut span = self.end_group(); + + if self.check(Token::LeftBracket) { + self.start_group(Delimiter::Bracket); + self.tokens.push_mode(TokenMode::Body); + + let body = self.parse_body_contents(); + + self.tokens.pop_mode(); + let body_span = self.end_group(); + + let expr = Expr::Tree(body); + args.push(SpannedEntry::val(Spanned::new(expr, body_span))); + span.expand(body_span); } - Pass::new(CallExpr { name, args }, self.feedback) + Spanned::new(CallExpr { name, args }, span) + } + + fn parse_paren_call(&mut self, name: Spanned) -> Spanned { + self.start_group(Delimiter::Paren); + let args = self.parse_table_contents().0; + let args_span = self.end_group(); + let span = Span::merge(name.span, args_span); + Spanned::new(CallExpr { name, args }, span) } } -// Parsing expressions and values -impl FuncParser<'_> { - fn parse_ident(&mut self) -> Option> { - self.peek().and_then(|token| match token.v { - Token::Ident(id) => self.eat_span(Ident(id.to_string())), - _ => None, - }) - } +// Tables. +impl Parser<'_> { + fn parse_table_contents(&mut self) -> (TableExpr, bool) { + let mut table = TableExpr::new(); + let mut comma_and_keyless = true; + while { self.skip_white(); !self.eof() } { + let (key, val) = if let Some(ident) = self.parse_ident() { + self.skip_white(); + + match self.peekv() { + Some(Token::Equals) => { + self.eat(); + self.skip_white(); + + (Some(ident), try_opt_or!(self.parse_expr(), { + self.expected("value"); + continue; + })) + } + + Some(Token::LeftParen) => { + let call = self.parse_paren_call(ident); + (None, call.map(|c| Expr::Call(c))) + } + + _ => (None, ident.map(|id| Expr::Ident(id))) + } + } else { + (None, try_opt_or!(self.parse_expr(), { + self.expected("value"); + continue; + })) + }; + + let behind = val.span.end; + if let Some(key) = key { + comma_and_keyless = false; + table.insert(key.v.0, SpannedEntry::new(key.span, val)); + self.feedback.decorations + .push(Spanned::new(Decoration::TableKey, key.span)); + } else { + table.push(SpannedEntry::val(val)); + } + + if { self.skip_white(); self.eof() } { + break; + } + + self.expect_at(Token::Comma, behind); + comma_and_keyless = false; + } + + let coercable = comma_and_keyless && !table.is_empty(); + (table, coercable) + } +} + +// Expressions and values. +impl Parser<'_> { fn parse_expr(&mut self) -> Option> { self.parse_binops("summand", Self::parse_term, |token| match token { Token::Plus => Some(Expr::Add), @@ -206,37 +291,37 @@ impl FuncParser<'_> { fn parse_value(&mut self) -> Option> { let Spanned { v: token, span } = self.peek()?; - match token { + Some(match token { // This could be a function call or an identifier. Token::Ident(id) => { let name = Spanned::new(Ident(id.to_string()), span); self.eat(); self.skip_white(); - Some(if self.check(Token::LeftParen) { - self.parse_func_call(name).map(|call| Expr::Call(call)) + if self.check(Token::LeftParen) { + self.parse_paren_call(name).map(|call| Expr::Call(call)) } else { name.map(|id| Expr::Ident(id)) - }) + } } Token::Str { string, terminated } => { if !terminated { self.expected_at("quote", span.end); } - self.eat_span(Expr::Str(unescape_string(string))) + self.with_span(Expr::Str(unescape_string(string))) } - Token::Bool(b) => self.eat_span(Expr::Bool(b)), - Token::Number(n) => self.eat_span(Expr::Number(n)), - Token::Length(s) => self.eat_span(Expr::Length(s)), + Token::Bool(b) => self.with_span(Expr::Bool(b)), + Token::Number(n) => self.with_span(Expr::Number(n)), + Token::Length(s) => self.with_span(Expr::Length(s)), Token::Hex(s) => { if let Ok(color) = RgbaColor::from_str(s) { - self.eat_span(Expr::Color(color)) + self.with_span(Expr::Color(color)) } else { // Heal color by assuming black. error!(@self.feedback, span, "invalid color"); let healed = RgbaColor::new_healed(0, 0, 0, 255); - self.eat_span(Expr::Color(healed)) + self.with_span(Expr::Color(healed)) } } @@ -244,128 +329,54 @@ impl FuncParser<'_> { // a table in any case and coerce the table into a value if it is // coercable (length 1 and no trailing comma). Token::LeftParen => { - let (table, coercable) = self.parse_table(true); - Some(if coercable { - table.map(|v| { - v.into_values() - .next() - .expect("table is coercable").val.v - }) + self.start_group(Delimiter::Paren); + let (table, coercable) = self.parse_table_contents(); + let span = self.end_group(); + + let expr = if coercable { + table.into_values() + .next() + .expect("table is coercable").val.v } else { - table.map(|tab| Expr::Table(tab)) - }) + Expr::Table(table) + }; + + Spanned::new(expr, span) } + // This is a content expression. + Token::LeftBrace => { + self.start_group(Delimiter::Brace); + self.tokens.push_mode(TokenMode::Body); + + let tree = self.parse_body_contents(); + + self.tokens.pop_mode(); + let span = self.end_group(); + Spanned::new(Expr::Tree(tree), span) + } + + // This is a bracketed function call. + Token::LeftBracket => { + let call = self.parse_bracket_call(); + let tree = vec![call.map(|c| SyntaxNode::Call(c))]; + Spanned::new(Expr::Tree(tree), span) + } + + _ => return None, + }) + } + + fn parse_ident(&mut self) -> Option> { + self.peek().and_then(|token| match token.v { + Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), _ => None, - } - } - - fn parse_func_call(&mut self, name: Spanned) -> Spanned { - let args = self.parse_table(true).0; - let span = Span::merge(name.span, args.span); - Spanned::new(CallExpr { name, args: args.v }, span) - } - - /// Set `parens` to true, when this should expect an opening paren and stop - /// at the balanced closing paren (this is the case for normal tables and - /// round-paren function calls). Set it to false, when this is used to parse - /// the top-level function arguments. - /// - /// The returned boolean tells you whether the table can be coerced into an - /// expression (this is the case when it's length 1 and has no trailing - /// comma). - fn parse_table(&mut self, parens: bool) -> (Spanned, bool) { - let start = self.pos(); - if parens { - self.assert(Token::LeftParen); - } - - let mut table = TableExpr::new(); - let mut coercable = true; - - loop { - self.skip_white(); - if self.eof() || (parens && self.check(Token::RightParen)) { - break; - } - - let behind_arg; - - if let Some(ident) = self.parse_ident() { - // This could be a keyword argument, a function call or a simple - // identifier. - self.skip_white(); - - if self.check_eat(Token::Equals).is_some() { - self.skip_white(); - - let key = ident; - self.feedback.decorations - .push(Spanned::new(Decoration::TableKey, key.span)); - - let val = try_opt_or!(self.parse_expr(), { - self.expected("value"); - continue; - }); - - coercable = false; - behind_arg = val.span.end; - table.insert(key.v.0, SpannedEntry::new(key.span, val)); - - } else if self.check(Token::LeftParen) { - let call = self.parse_func_call(ident); - let expr = call.map(|call| Expr::Call(call)); - - behind_arg = expr.span.end; - table.push(SpannedEntry::val(expr)); - } else { - let expr = ident.map(|id| Expr::Ident(id)); - - behind_arg = expr.span.end; - table.push(SpannedEntry::val(expr)); - } - } else { - // It's a positional argument. - let expr = try_opt_or!(self.parse_expr(), { - self.expected("value"); - continue; - }); - behind_arg = expr.span.end; - table.push(SpannedEntry::val(expr)); - } - - self.skip_white(); - if self.eof() || (parens && self.check(Token::RightParen)) { - break; - } - - self.expect_at(Token::Comma, behind_arg); - coercable = false; - } - - if parens { - self.expect(Token::RightParen); - } - - coercable = coercable && !table.is_empty(); - - let end = self.pos(); - (Spanned::new(table, Span::new(start, end)), coercable) + }) } } -// Error handling -impl FuncParser<'_> { - fn expect(&mut self, token: Token<'_>) -> bool { - if self.check(token) { - self.eat(); - true - } else { - self.expected(token.name()); - false - } - } - +// Error handling. +impl Parser<'_> { fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { if self.check(token) { self.eat(); @@ -400,40 +411,58 @@ impl FuncParser<'_> { } } -// Parsing primitives -impl<'s> FuncParser<'s> { - fn skip_white(&mut self) { - loop { - match self.peek().map(Spanned::value) { - Some(Token::Space(_)) - | Some(Token::LineComment(_)) - | Some(Token::BlockComment(_)) => { self.eat(); } - _ => break, +// Parsing primitives. +impl<'s> Parser<'s> { + fn start_group(&mut self, delimiter: Delimiter) { + let start = self.pos(); + self.assert(delimiter.start()); + self.delimiters.push((start, delimiter.end())); + } + + fn end_group(&mut self) -> Span { + assert_eq!(self.peek(), None, "unfinished group"); + let (start, end_token) = self.delimiters.pop() + .expect("group was not started"); + + match self.peeked.unwrap() { + Some(token) if token.v == end_token => { + self.peeked = None; + Span::new(start, token.span.end) + } + _ => { + let end = self.pos(); + error!( + @self.feedback, Span::at(end), + "expected {}", end_token.name(), + ); + Span::new(start, end) } } } - fn eat(&mut self) -> Option>> { - self.peeked.take().unwrap_or_else(|| self.tokens.next()) + fn skip_white(&mut self) { + while matches!( + self.peekv(), + Some(Token::Space(_)) | + Some(Token::LineComment(_)) | + Some(Token::BlockComment(_)) + ) { + self.eat(); + } } - fn eat_span(&mut self, v: T) -> Option> { - self.eat().map(|spanned| spanned.map(|_| v)) + fn eatv(&mut self) -> Option> { + self.eat().map(Spanned::value) } - fn peek(&mut self) -> Option>> { - let tokens = &mut self.tokens; - *self.peeked.get_or_insert_with(|| tokens.next()) + fn peekv(&mut self) -> Option> { + self.peek().map(Spanned::value) } fn assert(&mut self, token: Token<'_>) { assert!(self.check_eat(token).is_some()); } - fn check(&mut self, token: Token<'_>) -> bool { - self.peek().map(Spanned::value) == Some(token) - } - fn check_eat(&mut self, token: Token<'_>) -> Option>> { if self.check(token) { self.eat() @@ -442,10 +471,39 @@ impl<'s> FuncParser<'s> { } } + fn check(&mut self, token: Token<'_>) -> bool { + self.peekv() == Some(token) + } + + fn with_span(&mut self, v: T) -> Spanned { + let span = self.eat().expect("expected token").span; + Spanned::new(v, span) + } + fn eof(&mut self) -> bool { self.peek().is_none() } + fn eat(&mut self) -> Option>> { + let token = self.peek()?; + self.peeked = None; + Some(token) + } + + fn peek(&mut self) -> Option>> { + let tokens = &mut self.tokens; + let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; + + // Check for unclosed groups. + if Delimiter::is_delimiter(token.v) { + if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { + return None; + } + } + + Some(token) + } + fn pos(&self) -> Pos { self.peeked .flatten() @@ -454,6 +512,38 @@ impl<'s> FuncParser<'s> { } } +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum Delimiter { + Paren, + Bracket, + Brace, +} + +impl Delimiter { + fn is_delimiter(token: Token<'_>) -> bool { + matches!( + token, + Token::RightParen | Token::RightBracket | Token::RightBrace + ) + } + + fn start(self) -> Token<'static> { + match self { + Self::Paren => Token::LeftParen, + Self::Bracket => Token::LeftBracket, + Self::Brace => Token::LeftBrace, + } + } + + fn end(self) -> Token<'static> { + match self { + Self::Paren => Token::RightParen, + Self::Bracket => Token::RightBracket, + Self::Brace => Token::RightBrace, + } + } +} + fn unescape_string(string: &str) -> String { let mut iter = string.chars(); let mut out = String::with_capacity(string.len()); @@ -608,7 +698,7 @@ mod tests { macro_rules! test { (@spans=$spans:expr, $src:expr => $($tts:tt)*) => { let exp = Tree![@$($tts)*]; - let pass = parse($src, Pos::ZERO); + let pass = parse($src); check($src, exp, pass.output, $spans); }; } @@ -624,7 +714,7 @@ mod tests { macro_rules! e { ($src:expr => $($tts:tt)*) => { let exp = vec![$($tts)*]; - let pass = parse($src, Pos::ZERO); + let pass = parse($src); let found = pass.feedback.diagnostics.iter() .map(|s| s.as_ref().map(|e| e.message.as_str())) .collect::>(); @@ -636,7 +726,7 @@ mod tests { macro_rules! d { ($src:expr => $($tts:tt)*) => { let exp = vec![$($tts)*]; - let pass = parse($src, Pos::ZERO); + let pass = parse($src); check($src, exp, pass.feedback.decorations, true); }; } @@ -717,6 +807,15 @@ mod tests { e!("[val : 12, /* \n */ 14]" => ); } + #[test] + fn test_parse_groups() { + e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"), + s(0,2, 0,2, "expected closing bracket")); + + e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"), + s(0,5, 0,6, "unexpected closing brace")); + } + #[test] fn test_parse_function_names() { // No closing bracket. @@ -760,19 +859,29 @@ mod tests { t!("[val: 1][*Hi*]" => P![F!("val"; Num(1.0), Tree![P![B, T("Hi"), B]])]); e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment")); + // Raw in body. + t!("[val][`Hi]`" => P![F!("val"; Tree![P![R!["Hi]"]]])]); + e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket")); + + // Crazy. + t!("[v][[v][v][v]]" => P![F!("v"; Tree![P![ + F!("v"; Tree![P![T("v")]]), F!("v") + ]])]); + // Spanned. ts!(" [box][Oh my]" => s(0,0, 0,13, P![ s(0,0, 0,1, S), - s(0,1, 0,13, F!(s(0,1, 0,4, "box"); - s(0,6, 0,11, Tree![s(0,6, 0,11, P![ - s(0,6, 0,8, T("Oh")), s(0,8, 0,9, S), s(0,9, 0,11, T("my")) + s(0,1, 0,13, F!(s(0,2, 0,5, "box"); + s(0,6, 0,13, Tree![s(0,7, 0,12, P![ + s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my")) ])]) )) ])); } #[test] - fn test_parse_simple_values() { + fn test_parse_values() { + // Simple. v!("_" => Id("_")); v!("name" => Id("name")); v!("α" => Id("α")); @@ -787,6 +896,12 @@ mod tests { v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00))); v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string")); + // Content. + v!("{_hi_}" => Tree![P![I, T("hi"), I]]); + e!("[val: {_hi_}]" => ); + v!("[hi]" => Tree![F!["hi"]]); + e!("[val: [hi]]" => ); + // Healed colors. v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff))); e!("[val: #12345]" => s(0,6, 0,12, "invalid color")); @@ -925,7 +1040,7 @@ mod tests { v!("(\x07 abc,)" => Table![Id("abc")]); e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token")); e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma")); - e!("[val: [hi]]" => s(0,6, 0,10, "expected value, found function")); + e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren")); // Expected comma. v!("(true false)" => Table![Bool(true), Bool(false)]); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index cafc7727e..2d371bf83 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -22,27 +22,10 @@ pub enum Token<'s> { /// can contain nested block comments. BlockComment(&'s str), - /// A function invocation. - Function { - /// The header string: - /// ```typst - /// [header: args][body] - /// ^^^^^^^^^^^^ - /// ``` - header: &'s str, - /// The spanned body string: - /// ```typst - /// [header][hello *world*] - /// ^^^^^^^^^^^^^ - /// ^-- The span is relative to right before this bracket - /// ``` - body: Option>, - /// Whether the last closing bracket was present. - /// - `[func]` or `[func][body]` => terminated - /// - `[func` or `[func][body` => not terminated - terminated: bool, - }, - + /// A left bracket starting a function invocation or body: `[`. + LeftBracket, + /// A right bracket ending a function invocation or body: `]`. + RightBracket, /// A left parenthesis in a function header: `(`. LeftParen, /// A right parenthesis in a function header: `)`. @@ -119,7 +102,8 @@ impl<'s> Token<'s> { Space(_) => "space", LineComment(_) => "line comment", BlockComment(_) => "block comment", - Function { .. } => "function", + LeftBracket => "opening bracket", + RightBracket => "closing bracket", LeftParen => "opening paren", RightParen => "closing paren", LeftBrace => "opening brace", @@ -141,7 +125,6 @@ impl<'s> Token<'s> { Backslash => "backslash", Raw { .. } => "raw text", Text(_) => "text", - Invalid("]") => "closing bracket", Invalid("*/") => "end of block comment", Invalid(_) => "invalid token", } @@ -152,8 +135,9 @@ impl<'s> Token<'s> { #[derive(Debug)] pub struct Tokens<'s> { src: &'s str, - mode: TokenMode, iter: Peekable>, + mode: TokenMode, + stack: Vec, pos: Pos, index: usize, } @@ -172,16 +156,29 @@ impl<'s> Tokens<'s> { /// /// The first token's span starts an the given `offset` position instead of /// the zero position. - pub fn new(src: &'s str, offset: Pos, mode: TokenMode) -> Self { + pub fn new(src: &'s str, mode: TokenMode) -> Self { Self { src, - mode, iter: src.chars().peekable(), - pos: offset, + mode, + stack: vec![], + pos: Pos::ZERO, index: 0, } } + /// Change the token mode and push the old one on a stack. + pub fn push_mode(&mut self, mode: TokenMode) { + self.stack.push(self.mode); + self.mode = mode; + } + + /// Pop the old token mode from the stack. This panics if there is no mode + /// on the stack. + pub fn pop_mode(&mut self) { + self.mode = self.stack.pop().expect("no pushed mode"); + } + /// The index in the string at which the last token ends and next token will /// start. pub fn index(&self) -> usize { @@ -212,15 +209,15 @@ impl<'s> Iterator for Tokens<'s> { // Whitespace. c if c.is_whitespace() => self.read_whitespace(start), - // Functions. - '[' => self.read_function(start), - ']' => Invalid("]"), + // Functions and blocks. + '[' => LeftBracket, + ']' => RightBracket, + '{' => LeftBrace, + '}' => RightBrace, // Syntactic elements in function headers. '(' if self.mode == Header => LeftParen, ')' if self.mode == Header => RightParen, - '{' if self.mode == Header => LeftBrace, - '}' if self.mode == Header => RightBrace, ':' if self.mode == Header => Colon, ',' if self.mode == Header => Comma, '=' if self.mode == Header => Equals, @@ -322,52 +319,6 @@ impl<'s> Tokens<'s> { Space(end.line - start.line) } - fn read_function(&mut self, start: Pos) -> Token<'s> { - let (header, terminated) = self.read_function_part(Header); - self.eat(); - - if self.peek() != Some('[') { - return Function { header, body: None, terminated }; - } - - self.eat(); - - let body_start = self.pos() - start; - let (body, terminated) = self.read_function_part(Body); - let body_end = self.pos() - start; - let span = Span::new(body_start, body_end); - - self.eat(); - - Function { header, body: Some(Spanned { v: body, span }), terminated } - } - - fn read_function_part(&mut self, mode: TokenMode) -> (&'s str, bool) { - let start = self.index(); - let mut terminated = false; - - while let Some(n) = self.peek() { - if n == ']' { - terminated = true; - break; - } - - self.eat(); - match n { - '[' => { self.read_function(Pos::ZERO); } - '/' if self.peek() == Some('/') => { self.read_line_comment(); } - '/' if self.peek() == Some('*') => { self.read_block_comment(); } - '"' if mode == Header => { self.read_string(); } - '`' if mode == Body => { self.read_raw(); } - '\\' => { self.eat(); } - _ => {} - } - } - - let end = self.index(); - (&self.src[start..end], terminated) - } - fn read_string(&mut self) -> Token<'s> { let (string, terminated) = self.read_until_unescaped('"'); Str { string, terminated } @@ -540,6 +491,7 @@ mod tests { use Token::{ Space as S, LineComment as LC, BlockComment as BC, + LeftBracket as L, RightBracket as R, LeftParen as LP, RightParen as RP, LeftBrace as LB, RightBrace as RB, Ident as Id, @@ -557,25 +509,12 @@ mod tests { fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } } fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } } - macro_rules! F { - ($h:expr, None, $t:expr) => { - Token::Function { header: $h, body: None, terminated: $t } - }; - ($h:expr, $b:expr, $t:expr) => { - Token::Function { - header: $h, - body: Some(Into::>::into($b)), - terminated: $t, - } - }; - } - macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } macro_rules! test { (@spans=$spans:expr, $mode:expr, $src:expr => $($token:expr),*) => { let exp = vec![$(Into::>::into($token)),*]; - let found = Tokens::new($src, Pos::ZERO, $mode).collect::>(); + let found = Tokens::new($src, $mode).collect::>(); check($src, exp, found, $spans); } } @@ -616,7 +555,7 @@ mod tests { fn tokenize_body_only_tokens() { t!(Body, "_*" => Underscore, Star); t!(Body, "***" => Star, Star, Star); - t!(Body, "[func]*bold*" => F!("func", None, true), Star, T("bold"), Star); + t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star); t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there")); t!(Body, "`raw`" => Raw("raw", true)); t!(Body, "`[func]`" => Raw("[func]", true)); @@ -674,50 +613,6 @@ mod tests { t!(Header, "\"🌎\"" => Str("🌎", true)); } - #[test] - fn tokenize_functions() { - t!(Body, "a[f]" => T("a"), F!("f", None, true)); - t!(Body, "[f]a" => F!("f", None, true), T("a")); - t!(Body, "\n\n[f][ ]" => S(2), F!("f", " ", true)); - t!(Body, "abc [f][ ]a" => T("abc"), S(0), F!("f", " ", true), T("a")); - t!(Body, "[f: [=][*]]" => F!("f: [=][*]", None, true)); - t!(Body, "[_][[,],]," => F!("_", "[,],", true), T(",")); - t!(Body, "[=][=][=]" => F!("=", "=", true), F!("=", None, true)); - t!(Body, "[=][[=][=][=]]" => F!("=", "[=][=][=]", true)); - t!(Header, "[" => F!("", None, false)); - t!(Header, "]" => Invalid("]")); - } - - #[test] - fn tokenize_correct_end_of_function() { - // End of function with strings and carets in headers - t!(Body, r#"[f: "]"# => F!(r#"f: "]"#, None, false)); - t!(Body, "[f: \"s\"]" => F!("f: \"s\"", None, true)); - t!(Body, r#"[f: \"\"\"]"# => F!(r#"f: \"\"\""#, None, true)); - t!(Body, "[f: `]" => F!("f: `", None, true)); - - // End of function with strings and carets in bodies - t!(Body, "[f][\"]" => F!("f", s(0,4, 0,5, "\""), true)); - t!(Body, r#"[f][\"]"# => F!("f", s(0,4, 0,6, r#"\""#), true)); - t!(Body, "[f][`]" => F!("f", s(0,4, 0,6, "`]"), false)); - t!(Body, "[f][\\`]" => F!("f", s(0,4, 0,6, "\\`"), true)); - t!(Body, "[f][`raw`]" => F!("f", s(0,4, 0,9, "`raw`"), true)); - t!(Body, "[f][`raw]" => F!("f", s(0,4, 0,9, "`raw]"), false)); - t!(Body, "[f][`raw]`]" => F!("f", s(0,4, 0,10, "`raw]`"), true)); - t!(Body, "[f][`\\`]" => F!("f", s(0,4, 0,8, "`\\`]"), false)); - t!(Body, "[f][`\\\\`]" => F!("f", s(0,4, 0,8, "`\\\\`"), true)); - - // End of function with comments - t!(Body, "[f][/*]" => F!("f", s(0,4, 0,7, "/*]"), false)); - t!(Body, "[f][/*`*/]" => F!("f", s(0,4, 0,9, "/*`*/"), true)); - t!(Body, "[f: //]\n]" => F!("f: //]\n", None, true)); - t!(Body, "[f: \"//]\n]" => F!("f: \"//]\n]", None, false)); - - // End of function with escaped brackets - t!(Body, "[f][\\]]" => F!("f", s(0,4, 0,6, "\\]"), true)); - t!(Body, "[f][\\[]" => F!("f", s(0,4, 0,6, "\\["), true)); - } - #[test] fn tokenize_escaped_symbols() { t!(Body, r"\\" => T(r"\")); @@ -746,7 +641,6 @@ mod tests { fn tokenize_with_spans() { ts!(Body, "hello" => s(0,0, 0,5, T("hello"))); ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c"))); - ts!(Body, "[x = \"(1)\"]*" => s(0,0, 0,11, F!("x = \"(1)\"", None, true)), s(0,11, 0,12, Star)); ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f"))); ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore)); ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0)));