diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 340e89eac..e7ab89f1c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,11 +1,654 @@ //! Parsing and tokenization. -mod escaping; -mod parser; -mod tokenizer; +mod postprocess; +mod tokens; -pub use parser::*; -pub use tokenizer::*; +pub use tokens::*; + +use std::str::FromStr; + +use super::*; +use crate::color::RgbaColor; +use crate::compute::table::SpannedEntry; +use crate::syntax::*; +use crate::{Feedback, Pass}; + +/// Parse a string of source code. +pub fn parse(src: &str) -> Pass { + Parser::new(src).parse() +} + +struct Parser<'s> { + tokens: Tokens<'s>, + peeked: Option>>>, + delimiters: Vec<(Pos, Token<'static>)>, + at_block_or_line_start: bool, + feedback: Feedback, +} + +impl<'s> Parser<'s> { + fn new(src: &'s str) -> Self { + Self { + tokens: Tokens::new(src, TokenMode::Body), + peeked: None, + delimiters: vec![], + at_block_or_line_start: true, + feedback: Feedback::new(), + } + } + + fn parse(mut self) -> Pass { + let tree = self.parse_body_contents(); + Pass::new(tree, self.feedback) + } +} + +// Typesetting content. +impl Parser<'_> { + fn parse_body_contents(&mut self) -> SyntaxTree { + let mut tree = SyntaxTree::new(); + + self.at_block_or_line_start = true; + while !self.eof() { + if let Some(node) = self.parse_node() { + tree.push(node); + } + } + + tree + } + + fn parse_node(&mut self) -> Option> { + let token = self.peek()?; + let end = Span::at(token.span.end); + + // Set block or line start to false because most nodes have that effect, but + // remember the old value to actually check it for hashtags and because comments + // and spaces want to retain it. + let was_at_block_or_line_start = self.at_block_or_line_start; + self.at_block_or_line_start = false; + + Some(match token.v { + // Starting from two newlines counts as a paragraph break, a single + // newline does not. + Token::Space(n) => { + if n == 0 { + self.at_block_or_line_start = was_at_block_or_line_start; + } else if n >= 1 { + self.at_block_or_line_start = true; + } + + self.with_span(if n >= 2 { + SyntaxNode::Parbreak + } else { + SyntaxNode::Spacing + }) + } + + Token::LineComment(_) | Token::BlockComment(_) => { + self.at_block_or_line_start = was_at_block_or_line_start; + self.eat(); + return None; + } + + Token::LeftBracket => { + let call = self.parse_bracket_call(false); + self.at_block_or_line_start = false; + call.map(SyntaxNode::Call) + } + + Token::Star => self.with_span(SyntaxNode::ToggleBolder), + Token::Underscore => self.with_span(SyntaxNode::ToggleItalic), + Token::Backslash => self.with_span(SyntaxNode::Linebreak), + + Token::Hashtag if was_at_block_or_line_start => { + self.parse_heading().map(SyntaxNode::Heading) + } + + Token::Raw { raw, backticks, terminated } => { + if !terminated { + error!(@self.feedback, end, "expected backtick(s)"); + } + + let raw = if backticks > 1 { + postprocess::process_raw(raw) + } else { + Raw { + lang: None, + lines: postprocess::split_lines(raw), + inline: true, + } + }; + + self.with_span(SyntaxNode::Raw(raw)) + } + + Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), + Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())), + + Token::UnicodeEscape { sequence, terminated } => { + if !terminated { + error!(@self.feedback, end, "expected closing brace"); + } + + if let Some(c) = postprocess::hex_to_char(sequence) { + self.with_span(SyntaxNode::Text(c.to_string())) + } else { + error!(@self.feedback, token.span, "invalid unicode escape sequence"); + self.eat(); + return None; + } + } + + unexpected => { + error!(@self.feedback, token.span, "unexpected {}", unexpected.name()); + self.eat(); + return None; + } + }) + } + + fn parse_heading(&mut self) -> Spanned { + let start = self.pos(); + self.assert(Token::Hashtag); + + let mut level = 0; + while self.peekv() == Some(Token::Hashtag) { + level += 1; + self.eat(); + } + + let span = Span::new(start, self.pos()); + let level = level.span_with(span); + + if level.v > 5 { + warning!( + @self.feedback, level.span, + "section depth larger than 6 has no effect", + ); + } + + self.skip_ws(); + + let mut tree = SyntaxTree::new(); + while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) { + if let Some(node) = self.parse_node() { + tree.push(node); + } + } + + let span = Span::new(start, self.pos()); + Heading { level, tree }.span_with(span) + } +} + +// Function calls. +impl Parser<'_> { + fn parse_bracket_call(&mut self, chained: bool) -> Spanned { + let before_bracket = self.pos(); + if !chained { + self.start_group(Group::Bracket); + self.tokens.push_mode(TokenMode::Header); + } + + let before_name = self.pos(); + self.start_group(Group::Subheader); + self.skip_ws(); + let name = self.parse_ident().unwrap_or_else(|| { + self.expected_found_or_at("function name", before_name); + Ident(String::new()).span_with(Span::at(before_name)) + }); + + self.skip_ws(); + + let mut args = match self.eatv() { + Some(Token::Colon) => self.parse_table_contents().0, + Some(_) => { + self.expected_at("colon", name.span.end); + while self.eat().is_some() {} + TableExpr::new() + } + None => TableExpr::new(), + }; + + self.end_group(); + self.skip_ws(); + let (has_chained_child, end) = if self.peek().is_some() { + let item = self.parse_bracket_call(true); + let span = item.span; + let t = vec![item.map(SyntaxNode::Call)]; + args.push(SpannedEntry::val(Expr::Tree(t).span_with(span))); + (true, span.end) + } else { + self.tokens.pop_mode(); + (false, self.end_group().end) + }; + + let start = if chained { before_name } else { before_bracket }; + let mut span = Span::new(start, end); + + if self.check(Token::LeftBracket) && !has_chained_child { + self.start_group(Group::Bracket); + self.tokens.push_mode(TokenMode::Body); + + let body = self.parse_body_contents(); + + self.tokens.pop_mode(); + let body_span = self.end_group(); + + let expr = Expr::Tree(body); + args.push(SpannedEntry::val(expr.span_with(body_span))); + span.expand(body_span); + } + + CallExpr { name, args }.span_with(span) + } + + fn parse_paren_call(&mut self, name: Spanned) -> Spanned { + self.start_group(Group::Paren); + let args = self.parse_table_contents().0; + let args_span = self.end_group(); + let span = Span::merge(name.span, args_span); + CallExpr { name, args }.span_with(span) + } +} + +// Tables. +impl Parser<'_> { + fn parse_table_contents(&mut self) -> (TableExpr, bool) { + let mut table = TableExpr::new(); + let mut comma_and_keyless = true; + + while { + self.skip_ws(); + !self.eof() + } { + let (key, val) = if let Some(ident) = self.parse_ident() { + self.skip_ws(); + + match self.peekv() { + Some(Token::Equals) => { + self.eat(); + self.skip_ws(); + if let Some(value) = self.parse_expr() { + (Some(ident), value) + } else { + self.expected("value"); + continue; + } + } + + Some(Token::LeftParen) => { + let call = self.parse_paren_call(ident); + (None, call.map(Expr::Call)) + } + + _ => (None, ident.map(Expr::Ident)), + } + } else if let Some(value) = self.parse_expr() { + (None, value) + } else { + self.expected("value"); + continue; + }; + + let behind = val.span.end; + if let Some(key) = key { + comma_and_keyless = false; + table.insert(key.v.0, SpannedEntry::new(key.span, val)); + self.feedback + .decorations + .push(Decoration::TableKey.span_with(key.span)); + } else { + table.push(SpannedEntry::val(val)); + } + + if { + self.skip_ws(); + self.eof() + } { + break; + } + + self.expect_at(Token::Comma, behind); + comma_and_keyless = false; + } + + let coercable = comma_and_keyless && !table.is_empty(); + (table, coercable) + } +} + +type Binop = fn(Box>, Box>) -> Expr; + +// Expressions and values. +impl Parser<'_> { + fn parse_expr(&mut self) -> Option> { + self.parse_binops("summand", Self::parse_term, |token| match token { + Token::Plus => Some(Expr::Add), + Token::Hyphen => Some(Expr::Sub), + _ => None, + }) + } + + fn parse_term(&mut self) -> Option> { + self.parse_binops("factor", Self::parse_factor, |token| match token { + Token::Star => Some(Expr::Mul), + Token::Slash => Some(Expr::Div), + _ => None, + }) + } + + /// Parse expression of the form ` ( )*`. + fn parse_binops( + &mut self, + operand_name: &str, + mut parse_operand: impl FnMut(&mut Self) -> Option>, + mut parse_op: impl FnMut(Token) -> Option, + ) -> Option> { + let mut left = parse_operand(self)?; + + self.skip_ws(); + while let Some(token) = self.peek() { + if let Some(op) = parse_op(token.v) { + self.eat(); + self.skip_ws(); + + if let Some(right) = parse_operand(self) { + let span = Span::merge(left.span, right.span); + let v = op(Box::new(left), Box::new(right)); + left = v.span_with(span); + self.skip_ws(); + continue; + } + + error!( + @self.feedback, Span::merge(left.span, token.span), + "missing right {}", operand_name, + ); + } + break; + } + + Some(left) + } + + fn parse_factor(&mut self) -> Option> { + if let Some(hyph) = self.check_eat(Token::Hyphen) { + self.skip_ws(); + if let Some(factor) = self.parse_factor() { + let span = Span::merge(hyph.span, factor.span); + Some(Expr::Neg(Box::new(factor)).span_with(span)) + } else { + error!(@self.feedback, hyph.span, "dangling minus"); + None + } + } else { + self.parse_value() + } + } + + fn parse_value(&mut self) -> Option> { + let Spanned { v: token, span } = self.peek()?; + Some(match token { + // This could be a function call or an identifier. + Token::Ident(id) => { + let name = Ident(id.to_string()).span_with(span); + self.eat(); + self.skip_ws(); + if self.check(Token::LeftParen) { + self.parse_paren_call(name).map(Expr::Call) + } else { + name.map(Expr::Ident) + } + } + + Token::Str { string, terminated } => { + if !terminated { + self.expected_at("quote", span.end); + } + self.with_span(Expr::Str(postprocess::unescape_string(string))) + } + + Token::Bool(b) => self.with_span(Expr::Bool(b)), + Token::Number(n) => self.with_span(Expr::Number(n)), + Token::Length(s) => self.with_span(Expr::Length(s)), + Token::Hex(s) => { + if let Ok(color) = RgbaColor::from_str(s) { + self.with_span(Expr::Color(color)) + } else { + // Heal color by assuming black. + error!(@self.feedback, span, "invalid color"); + let healed = RgbaColor::new_healed(0, 0, 0, 255); + self.with_span(Expr::Color(healed)) + } + } + + // This could be a table or a parenthesized expression. We parse as + // a table in any case and coerce the table into a value if it is + // coercable (length 1 and no trailing comma). + Token::LeftParen => { + self.start_group(Group::Paren); + let (table, coercable) = self.parse_table_contents(); + let span = self.end_group(); + + let expr = if coercable { + table.into_values().next().expect("table is coercable").val.v + } else { + Expr::Table(table) + }; + + expr.span_with(span) + } + + // This is a content expression. + Token::LeftBrace => { + self.start_group(Group::Brace); + self.tokens.push_mode(TokenMode::Body); + + let tree = self.parse_body_contents(); + + self.tokens.pop_mode(); + let span = self.end_group(); + Expr::Tree(tree).span_with(span) + } + + // This is a bracketed function call. + Token::LeftBracket => { + let call = self.parse_bracket_call(false); + let tree = vec![call.map(SyntaxNode::Call)]; + Expr::Tree(tree).span_with(span) + } + + _ => return None, + }) + } + + fn parse_ident(&mut self) -> Option> { + self.peek().and_then(|token| match token.v { + Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), + _ => None, + }) + } +} + +// Error handling. +impl Parser<'_> { + fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { + if self.check(token) { + self.eat(); + true + } else { + self.expected_at(token.name(), pos); + false + } + } + + fn expected(&mut self, thing: &str) { + if let Some(found) = self.eat() { + error!( + @self.feedback, found.span, + "expected {}, found {}", thing, found.v.name(), + ); + } else { + error!(@self.feedback, Span::at(self.pos()), "expected {}", thing); + } + } + + fn expected_at(&mut self, thing: &str, pos: Pos) { + error!(@self.feedback, Span::at(pos), "expected {}", thing); + } + + fn expected_found_or_at(&mut self, thing: &str, pos: Pos) { + if self.eof() { + self.expected_at(thing, pos) + } else { + self.expected(thing); + } + } +} + +// Parsing primitives. +impl<'s> Parser<'s> { + fn start_group(&mut self, group: Group) { + let start = self.pos(); + if let Some(start_token) = group.start() { + self.assert(start_token); + } + self.delimiters.push((start, group.end())); + } + + fn end_group(&mut self) -> Span { + let peeked = self.peek(); + + let (start, end_token) = self.delimiters.pop().expect("group was not started"); + + if end_token != Token::Chain && peeked != None { + self.delimiters.push((start, end_token)); + assert_eq!(peeked, None, "unfinished group"); + } + + match self.peeked.unwrap() { + Some(token) if token.v == end_token => { + self.peeked = None; + Span::new(start, token.span.end) + } + _ => { + let end = self.pos(); + if end_token != Token::Chain { + error!( + @self.feedback, Span::at(end), + "expected {}", end_token.name(), + ); + } + Span::new(start, end) + } + } + } + + fn skip_ws(&mut self) { + while matches!( + self.peekv(), + Some(Token::Space(_)) | + Some(Token::LineComment(_)) | + Some(Token::BlockComment(_)) + ) { + self.eat(); + } + } + + fn eatv(&mut self) -> Option> { + self.eat().map(Spanned::value) + } + + fn peekv(&mut self) -> Option> { + self.peek().map(Spanned::value) + } + + fn assert(&mut self, token: Token<'_>) { + assert!(self.check_eat(token).is_some()); + } + + fn check_eat(&mut self, token: Token<'_>) -> Option>> { + if self.check(token) { self.eat() } else { None } + } + + /// Checks if the next token is of some kind + fn check(&mut self, token: Token<'_>) -> bool { + self.peekv() == Some(token) + } + + fn with_span(&mut self, v: T) -> Spanned { + let span = self.eat().expect("expected token").span; + v.span_with(span) + } + + fn eof(&mut self) -> bool { + self.peek().is_none() + } + + fn eat(&mut self) -> Option>> { + let token = self.peek()?; + self.peeked = None; + Some(token) + } + + fn peek(&mut self) -> Option>> { + let tokens = &mut self.tokens; + let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; + + // Check for unclosed groups. + if Group::is_delimiter(token.v) { + if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { + return None; + } + } + + Some(token) + } + + fn pos(&self) -> Pos { + self.peeked + .flatten() + .map(|s| s.span.start) + .unwrap_or_else(|| self.tokens.pos()) + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum Group { + Paren, + Bracket, + Brace, + Subheader, +} + +impl Group { + fn is_delimiter(token: Token<'_>) -> bool { + matches!( + token, + Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain + ) + } + + fn start(self) -> Option> { + match self { + Self::Paren => Some(Token::LeftParen), + Self::Bracket => Some(Token::LeftBracket), + Self::Brace => Some(Token::LeftBrace), + Self::Subheader => None, + } + } + + fn end(self) -> Token<'static> { + match self { + Self::Paren => Token::RightParen, + Self::Bracket => Token::RightBracket, + Self::Brace => Token::RightBrace, + Self::Subheader => Token::Chain, + } + } +} #[cfg(test)] mod tests; diff --git a/src/parse/parser.rs b/src/parse/parser.rs deleted file mode 100644 index 3446af838..000000000 --- a/src/parse/parser.rs +++ /dev/null @@ -1,645 +0,0 @@ -use std::str::FromStr; - -use super::escaping::*; -use super::*; -use crate::color::RgbaColor; -use crate::compute::table::SpannedEntry; -use crate::syntax::*; -use crate::{Feedback, Pass}; - -/// Parse a string of source code. -pub fn parse(src: &str) -> Pass { - Parser::new(src).parse() -} - -struct Parser<'s> { - tokens: Tokens<'s>, - peeked: Option>>>, - delimiters: Vec<(Pos, Token<'static>)>, - at_block_or_line_start: bool, - feedback: Feedback, -} - -impl<'s> Parser<'s> { - fn new(src: &'s str) -> Self { - Self { - tokens: Tokens::new(src, TokenMode::Body), - peeked: None, - delimiters: vec![], - at_block_or_line_start: true, - feedback: Feedback::new(), - } - } - - fn parse(mut self) -> Pass { - let tree = self.parse_body_contents(); - Pass::new(tree, self.feedback) - } -} - -// Typesetting content. -impl Parser<'_> { - fn parse_body_contents(&mut self) -> SyntaxTree { - let mut tree = SyntaxTree::new(); - - self.at_block_or_line_start = true; - while !self.eof() { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - tree - } - - fn parse_node(&mut self) -> Option> { - let token = self.peek()?; - let end = Span::at(token.span.end); - - // Set block or line start to false because most nodes have that effect, but - // remember the old value to actually check it for hashtags and because comments - // and spaces want to retain it. - let was_at_block_or_line_start = self.at_block_or_line_start; - self.at_block_or_line_start = false; - - Some(match token.v { - // Starting from two newlines counts as a paragraph break, a single - // newline does not. - Token::Space(n) => { - if n == 0 { - self.at_block_or_line_start = was_at_block_or_line_start; - } else if n >= 1 { - self.at_block_or_line_start = true; - } - - self.with_span(if n >= 2 { - SyntaxNode::Parbreak - } else { - SyntaxNode::Spacing - }) - } - - Token::LineComment(_) | Token::BlockComment(_) => { - self.at_block_or_line_start = was_at_block_or_line_start; - self.eat(); - return None; - } - - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - self.at_block_or_line_start = false; - call.map(SyntaxNode::Call) - } - - Token::Star => self.with_span(SyntaxNode::ToggleBolder), - Token::Underscore => self.with_span(SyntaxNode::ToggleItalic), - Token::Backslash => self.with_span(SyntaxNode::Linebreak), - - Token::Hashtag if was_at_block_or_line_start => { - self.parse_heading().map(SyntaxNode::Heading) - } - - Token::Raw { raw, backticks, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backtick(s)"); - } - - let raw = if backticks > 1 { - process_raw(raw) - } else { - Raw { - lang: None, - lines: split_lines(raw), - inline: true, - } - }; - - self.with_span(SyntaxNode::Raw(raw)) - } - - Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), - Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())), - - Token::UnicodeEscape { sequence, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected closing brace"); - } - - if let Some(c) = hex_to_char(sequence) { - self.with_span(SyntaxNode::Text(c.to_string())) - } else { - error!(@self.feedback, token.span, "invalid unicode escape sequence"); - self.eat(); - return None; - } - } - - unexpected => { - error!(@self.feedback, token.span, "unexpected {}", unexpected.name()); - self.eat(); - return None; - } - }) - } - - fn parse_heading(&mut self) -> Spanned { - let start = self.pos(); - self.assert(Token::Hashtag); - - let mut level = 0; - while self.peekv() == Some(Token::Hashtag) { - level += 1; - self.eat(); - } - - let span = Span::new(start, self.pos()); - let level = level.span_with(span); - - if level.v > 5 { - warning!( - @self.feedback, level.span, - "section depth larger than 6 has no effect", - ); - } - - self.skip_ws(); - - let mut tree = SyntaxTree::new(); - while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - let span = Span::new(start, self.pos()); - Heading { level, tree }.span_with(span) - } -} - -// Function calls. -impl Parser<'_> { - fn parse_bracket_call(&mut self, chained: bool) -> Spanned { - let before_bracket = self.pos(); - if !chained { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Header); - } - - let before_name = self.pos(); - self.start_group(Group::Subheader); - self.skip_ws(); - let name = self.parse_ident().unwrap_or_else(|| { - self.expected_found_or_at("function name", before_name); - Ident(String::new()).span_with(Span::at(before_name)) - }); - - self.skip_ws(); - - let mut args = match self.eatv() { - Some(Token::Colon) => self.parse_table_contents().0, - Some(_) => { - self.expected_at("colon", name.span.end); - while self.eat().is_some() {} - TableExpr::new() - } - None => TableExpr::new(), - }; - - self.end_group(); - self.skip_ws(); - let (has_chained_child, end) = if self.peek().is_some() { - let item = self.parse_bracket_call(true); - let span = item.span; - let t = vec![item.map(SyntaxNode::Call)]; - args.push(SpannedEntry::val(Expr::Tree(t).span_with(span))); - (true, span.end) - } else { - self.tokens.pop_mode(); - (false, self.end_group().end) - }; - - let start = if chained { before_name } else { before_bracket }; - let mut span = Span::new(start, end); - - if self.check(Token::LeftBracket) && !has_chained_child { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Body); - - let body = self.parse_body_contents(); - - self.tokens.pop_mode(); - let body_span = self.end_group(); - - let expr = Expr::Tree(body); - args.push(SpannedEntry::val(expr.span_with(body_span))); - span.expand(body_span); - } - - CallExpr { name, args }.span_with(span) - } - - fn parse_paren_call(&mut self, name: Spanned) -> Spanned { - self.start_group(Group::Paren); - let args = self.parse_table_contents().0; - let args_span = self.end_group(); - let span = Span::merge(name.span, args_span); - CallExpr { name, args }.span_with(span) - } -} - -// Tables. -impl Parser<'_> { - fn parse_table_contents(&mut self) -> (TableExpr, bool) { - let mut table = TableExpr::new(); - let mut comma_and_keyless = true; - - while { - self.skip_ws(); - !self.eof() - } { - let (key, val) = if let Some(ident) = self.parse_ident() { - self.skip_ws(); - - match self.peekv() { - Some(Token::Equals) => { - self.eat(); - self.skip_ws(); - if let Some(value) = self.parse_expr() { - (Some(ident), value) - } else { - self.expected("value"); - continue; - } - } - - Some(Token::LeftParen) => { - let call = self.parse_paren_call(ident); - (None, call.map(Expr::Call)) - } - - _ => (None, ident.map(Expr::Ident)), - } - } else if let Some(value) = self.parse_expr() { - (None, value) - } else { - self.expected("value"); - continue; - }; - - let behind = val.span.end; - if let Some(key) = key { - comma_and_keyless = false; - table.insert(key.v.0, SpannedEntry::new(key.span, val)); - self.feedback - .decorations - .push(Decoration::TableKey.span_with(key.span)); - } else { - table.push(SpannedEntry::val(val)); - } - - if { - self.skip_ws(); - self.eof() - } { - break; - } - - self.expect_at(Token::Comma, behind); - comma_and_keyless = false; - } - - let coercable = comma_and_keyless && !table.is_empty(); - (table, coercable) - } -} - -type Binop = fn(Box>, Box>) -> Expr; - -// Expressions and values. -impl Parser<'_> { - fn parse_expr(&mut self) -> Option> { - self.parse_binops("summand", Self::parse_term, |token| match token { - Token::Plus => Some(Expr::Add), - Token::Hyphen => Some(Expr::Sub), - _ => None, - }) - } - - fn parse_term(&mut self) -> Option> { - self.parse_binops("factor", Self::parse_factor, |token| match token { - Token::Star => Some(Expr::Mul), - Token::Slash => Some(Expr::Div), - _ => None, - }) - } - - /// Parse expression of the form ` ( )*`. - fn parse_binops( - &mut self, - operand_name: &str, - mut parse_operand: impl FnMut(&mut Self) -> Option>, - mut parse_op: impl FnMut(Token) -> Option, - ) -> Option> { - let mut left = parse_operand(self)?; - - self.skip_ws(); - while let Some(token) = self.peek() { - if let Some(op) = parse_op(token.v) { - self.eat(); - self.skip_ws(); - - if let Some(right) = parse_operand(self) { - let span = Span::merge(left.span, right.span); - let v = op(Box::new(left), Box::new(right)); - left = v.span_with(span); - self.skip_ws(); - continue; - } - - error!( - @self.feedback, Span::merge(left.span, token.span), - "missing right {}", operand_name, - ); - } - break; - } - - Some(left) - } - - fn parse_factor(&mut self) -> Option> { - if let Some(hyph) = self.check_eat(Token::Hyphen) { - self.skip_ws(); - if let Some(factor) = self.parse_factor() { - let span = Span::merge(hyph.span, factor.span); - Some(Expr::Neg(Box::new(factor)).span_with(span)) - } else { - error!(@self.feedback, hyph.span, "dangling minus"); - None - } - } else { - self.parse_value() - } - } - - fn parse_value(&mut self) -> Option> { - let Spanned { v: token, span } = self.peek()?; - Some(match token { - // This could be a function call or an identifier. - Token::Ident(id) => { - let name = Ident(id.to_string()).span_with(span); - self.eat(); - self.skip_ws(); - if self.check(Token::LeftParen) { - self.parse_paren_call(name).map(Expr::Call) - } else { - name.map(Expr::Ident) - } - } - - Token::Str { string, terminated } => { - if !terminated { - self.expected_at("quote", span.end); - } - self.with_span(Expr::Str(unescape_string(string))) - } - - Token::Bool(b) => self.with_span(Expr::Bool(b)), - Token::Number(n) => self.with_span(Expr::Number(n)), - Token::Length(s) => self.with_span(Expr::Length(s)), - Token::Hex(s) => { - if let Ok(color) = RgbaColor::from_str(s) { - self.with_span(Expr::Color(color)) - } else { - // Heal color by assuming black. - error!(@self.feedback, span, "invalid color"); - let healed = RgbaColor::new_healed(0, 0, 0, 255); - self.with_span(Expr::Color(healed)) - } - } - - // This could be a table or a parenthesized expression. We parse as - // a table in any case and coerce the table into a value if it is - // coercable (length 1 and no trailing comma). - Token::LeftParen => { - self.start_group(Group::Paren); - let (table, coercable) = self.parse_table_contents(); - let span = self.end_group(); - - let expr = if coercable { - table.into_values().next().expect("table is coercable").val.v - } else { - Expr::Table(table) - }; - - expr.span_with(span) - } - - // This is a content expression. - Token::LeftBrace => { - self.start_group(Group::Brace); - self.tokens.push_mode(TokenMode::Body); - - let tree = self.parse_body_contents(); - - self.tokens.pop_mode(); - let span = self.end_group(); - Expr::Tree(tree).span_with(span) - } - - // This is a bracketed function call. - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - let tree = vec![call.map(SyntaxNode::Call)]; - Expr::Tree(tree).span_with(span) - } - - _ => return None, - }) - } - - fn parse_ident(&mut self) -> Option> { - self.peek().and_then(|token| match token.v { - Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), - _ => None, - }) - } -} - -// Error handling. -impl Parser<'_> { - fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { - if self.check(token) { - self.eat(); - true - } else { - self.expected_at(token.name(), pos); - false - } - } - - fn expected(&mut self, thing: &str) { - if let Some(found) = self.eat() { - error!( - @self.feedback, found.span, - "expected {}, found {}", thing, found.v.name(), - ); - } else { - error!(@self.feedback, Span::at(self.pos()), "expected {}", thing); - } - } - - fn expected_at(&mut self, thing: &str, pos: Pos) { - error!(@self.feedback, Span::at(pos), "expected {}", thing); - } - - fn expected_found_or_at(&mut self, thing: &str, pos: Pos) { - if self.eof() { - self.expected_at(thing, pos) - } else { - self.expected(thing); - } - } -} - -// Parsing primitives. -impl<'s> Parser<'s> { - fn start_group(&mut self, group: Group) { - let start = self.pos(); - if let Some(start_token) = group.start() { - self.assert(start_token); - } - self.delimiters.push((start, group.end())); - } - - fn end_group(&mut self) -> Span { - let peeked = self.peek(); - - let (start, end_token) = self.delimiters.pop().expect("group was not started"); - - if end_token != Token::Chain && peeked != None { - self.delimiters.push((start, end_token)); - assert_eq!(peeked, None, "unfinished group"); - } - - match self.peeked.unwrap() { - Some(token) if token.v == end_token => { - self.peeked = None; - Span::new(start, token.span.end) - } - _ => { - let end = self.pos(); - if end_token != Token::Chain { - error!( - @self.feedback, Span::at(end), - "expected {}", end_token.name(), - ); - } - Span::new(start, end) - } - } - } - - fn skip_ws(&mut self) { - while matches!( - self.peekv(), - Some(Token::Space(_)) | - Some(Token::LineComment(_)) | - Some(Token::BlockComment(_)) - ) { - self.eat(); - } - } - - fn eatv(&mut self) -> Option> { - self.eat().map(Spanned::value) - } - - fn peekv(&mut self) -> Option> { - self.peek().map(Spanned::value) - } - - fn assert(&mut self, token: Token<'_>) { - assert!(self.check_eat(token).is_some()); - } - - fn check_eat(&mut self, token: Token<'_>) -> Option>> { - if self.check(token) { self.eat() } else { None } - } - - /// Checks if the next token is of some kind - fn check(&mut self, token: Token<'_>) -> bool { - self.peekv() == Some(token) - } - - fn with_span(&mut self, v: T) -> Spanned { - let span = self.eat().expect("expected token").span; - v.span_with(span) - } - - fn eof(&mut self) -> bool { - self.peek().is_none() - } - - fn eat(&mut self) -> Option>> { - let token = self.peek()?; - self.peeked = None; - Some(token) - } - - fn peek(&mut self) -> Option>> { - let tokens = &mut self.tokens; - let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; - - // Check for unclosed groups. - if Group::is_delimiter(token.v) { - if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { - return None; - } - } - - Some(token) - } - - fn pos(&self) -> Pos { - self.peeked - .flatten() - .map(|s| s.span.start) - .unwrap_or_else(|| self.tokens.pos()) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum Group { - Paren, - Bracket, - Brace, - Subheader, -} - -impl Group { - fn is_delimiter(token: Token<'_>) -> bool { - matches!( - token, - Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain - ) - } - - fn start(self) -> Option> { - match self { - Self::Paren => Some(Token::LeftParen), - Self::Bracket => Some(Token::LeftBracket), - Self::Brace => Some(Token::LeftBrace), - Self::Subheader => None, - } - } - - fn end(self) -> Token<'static> { - match self { - Self::Paren => Token::RightParen, - Self::Bracket => Token::RightBracket, - Self::Brace => Token::RightBrace, - Self::Subheader => Token::Chain, - } - } -} diff --git a/src/parse/escaping.rs b/src/parse/postprocess.rs similarity index 99% rename from src/parse/escaping.rs rename to src/parse/postprocess.rs index 2e556d0c1..ad4a9057d 100644 --- a/src/parse/escaping.rs +++ b/src/parse/postprocess.rs @@ -1,3 +1,5 @@ +//! Post-processing of strings and raw blocks. + use super::is_newline_char; use crate::syntax::{Ident, Raw}; diff --git a/src/parse/tests.rs b/src/parse/tests.rs index 8ddf013d6..a753378ee 100644 --- a/src/parse/tests.rs +++ b/src/parse/tests.rs @@ -1,3 +1,5 @@ +//! Parser tests. + #![allow(non_snake_case)] use std::fmt::Debug; diff --git a/src/parse/tokenizer.rs b/src/parse/tokens.rs similarity index 100% rename from src/parse/tokenizer.rs rename to src/parse/tokens.rs