From 7f8f225cb3cb44367d731c544f7ce1eebdb97dd5 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Thu, 3 Sep 2020 19:16:19 +0200 Subject: [PATCH] =?UTF-8?q?Split=20up=20parser=20into=20multiple=20files?= =?UTF-8?q?=20=F0=9F=A7=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits up into: - escaping: resolving of escape sequences - parser: the parsing code - tests: all integrated parsing tests Also moves Ident from the root syntax module into the tree module. --- src/compute/value.rs | 3 +- src/syntax/mod.rs | 29 - src/syntax/parsing.rs | 1403 -------------------------------- src/syntax/parsing/escaping.rs | 243 ++++++ src/syntax/parsing/mod.rs | 9 + src/syntax/parsing/parser.rs | 660 +++++++++++++++ src/syntax/parsing/tests.rs | 509 ++++++++++++ src/syntax/tree.rs | 28 +- 8 files changed, 1449 insertions(+), 1435 deletions(-) delete mode 100644 src/syntax/parsing.rs create mode 100644 src/syntax/parsing/escaping.rs create mode 100644 src/syntax/parsing/mod.rs create mode 100644 src/syntax/parsing/parser.rs create mode 100644 src/syntax/parsing/tests.rs diff --git a/src/compute/value.rs b/src/compute/value.rs index 9f1174ec4..ce7e8d577 100644 --- a/src/compute/value.rs +++ b/src/compute/value.rs @@ -12,8 +12,7 @@ use crate::layout::{Command, Commands, Dir, LayoutContext, SpecAlign}; use crate::length::{Length, ScaleLength}; use crate::paper::Paper; use crate::syntax::span::{Span, Spanned}; -use crate::syntax::tree::{SyntaxNode, SyntaxTree}; -use crate::syntax::Ident; +use crate::syntax::tree::{Ident, SyntaxNode, SyntaxTree}; use crate::{DynFuture, Feedback, Pass}; /// A computational value. diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index a9fe7c2e9..70935e796 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -6,35 +6,6 @@ pub mod span; pub mod tokens; pub mod tree; -use std::fmt::{self, Debug, Formatter}; -use tokens::is_identifier; - -/// An identifier as defined by unicode with a few extra permissible characters. -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct Ident(pub String); - -impl Ident { - /// Create a new identifier from a string checking that it is a valid. - pub fn new(ident: impl AsRef + Into) -> Option { - if is_identifier(ident.as_ref()) { - Some(Self(ident.into())) - } else { - None - } - } - - /// Return a reference to the underlying string. - pub fn as_str(&self) -> &str { - self.0.as_str() - } -} - -impl Debug for Ident { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "`{}`", self.0) - } -} - #[cfg(test)] mod tests { use super::span; diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs deleted file mode 100644 index 3ec907a2f..000000000 --- a/src/syntax/parsing.rs +++ /dev/null @@ -1,1403 +0,0 @@ -//! Parsing of source code into syntax trees. - -use std::str::FromStr; - -use super::decoration::Decoration; -use super::span::{Pos, Span, Spanned}; -use super::tokens::{is_newline_char, Token, TokenMode, Tokens}; -use super::tree::{CallExpr, Code, Expr, Heading, SyntaxNode, SyntaxTree, TableExpr}; -use super::Ident; -use crate::color::RgbaColor; -use crate::compute::table::SpannedEntry; -use crate::{Feedback, Pass}; - -/// Parse a string of source code. -pub fn parse(src: &str) -> Pass { - Parser::new(src).parse() -} - -struct Parser<'s> { - tokens: Tokens<'s>, - peeked: Option>>>, - delimiters: Vec<(Pos, Token<'static>)>, - at_block_or_line_start: bool, - feedback: Feedback, -} - -impl<'s> Parser<'s> { - fn new(src: &'s str) -> Self { - Self { - tokens: Tokens::new(src, TokenMode::Body), - peeked: None, - delimiters: vec![], - at_block_or_line_start: true, - feedback: Feedback::new(), - } - } - - fn parse(mut self) -> Pass { - let tree = self.parse_body_contents(); - Pass::new(tree, self.feedback) - } -} - -// Typesetting content. -impl Parser<'_> { - fn parse_body_contents(&mut self) -> SyntaxTree { - let mut tree = SyntaxTree::new(); - - self.at_block_or_line_start = true; - while !self.eof() { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - tree - } - - fn parse_node(&mut self) -> Option> { - let token = self.peek()?; - let end = Span::at(token.span.end); - - // Set block or line start to false because most nodes have that effect, but - // remember the old value to actually check it for hashtags and because comments - // and spaces want to retain it. - let was_at_block_or_line_start = self.at_block_or_line_start; - self.at_block_or_line_start = false; - - Some(match token.v { - // Starting from two newlines counts as a paragraph break, a single - // newline does not. - Token::Space(n) => { - if n == 0 { - self.at_block_or_line_start = was_at_block_or_line_start; - } else if n >= 1 { - self.at_block_or_line_start = true; - } - - self.with_span(if n >= 2 { - SyntaxNode::Parbreak - } else { - SyntaxNode::Spacing - }) - } - - Token::LineComment(_) | Token::BlockComment(_) => { - self.at_block_or_line_start = was_at_block_or_line_start; - self.eat(); - return None; - } - - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - self.at_block_or_line_start = false; - call.map(SyntaxNode::Call) - } - - Token::Star => self.with_span(SyntaxNode::ToggleBolder), - Token::Underscore => self.with_span(SyntaxNode::ToggleItalic), - Token::Backslash => self.with_span(SyntaxNode::Linebreak), - - Token::Hashtag if was_at_block_or_line_start => { - self.parse_heading().map(SyntaxNode::Heading) - } - - Token::Raw { raw, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backtick"); - } - self.with_span(SyntaxNode::Raw(unescape_raw(raw))) - } - - Token::Code { lang, raw, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backticks"); - } - - let lang = lang.and_then(|lang| { - if let Some(ident) = Ident::new(lang.v) { - Some(Spanned::new(ident, lang.span)) - } else { - error!(@self.feedback, lang.span, "invalid identifier"); - None - } - }); - - let mut lines = unescape_code(raw); - let block = lines.len() > 1; - - if lines.last().map(|s| s.is_empty()).unwrap_or(false) { - lines.pop(); - } - - self.with_span(SyntaxNode::Code(Code { lang, lines, block })) - } - - Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), - Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())), - - Token::UnicodeEscape { sequence, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected closing brace"); - } - - if let Some(c) = unescape_char(sequence) { - self.with_span(SyntaxNode::Text(c.to_string())) - } else { - error!(@self.feedback, token.span, "invalid unicode escape sequence"); - self.eat(); - return None; - } - } - - unexpected => { - error!(@self.feedback, token.span, "unexpected {}", unexpected.name()); - self.eat(); - return None; - } - }) - } - - fn parse_heading(&mut self) -> Spanned { - let start = self.pos(); - self.assert(Token::Hashtag); - - let mut level = 0; - while self.peekv() == Some(Token::Hashtag) { - level += 1; - self.eat(); - } - - let span = Span::new(start, self.pos()); - let level = Spanned::new(level, span); - - if level.v > 5 { - warning!( - @self.feedback, level.span, - "section depth larger than 6 has no effect", - ); - } - - self.skip_ws(); - - let mut tree = SyntaxTree::new(); - while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - let span = Span::new(start, self.pos()); - Spanned::new(Heading { level, tree }, span) - } -} - -// Function calls. -impl Parser<'_> { - fn parse_bracket_call(&mut self, chained: bool) -> Spanned { - let before_bracket = self.pos(); - if !chained { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Header); - } - - let before_name = self.pos(); - self.start_group(Group::Subheader); - self.skip_ws(); - let name = self.parse_ident().unwrap_or_else(|| { - self.expected_found_or_at("function name", before_name); - Spanned::new(Ident(String::new()), Span::at(before_name)) - }); - - self.skip_ws(); - - let mut args = match self.eatv() { - Some(Token::Colon) => self.parse_table_contents().0, - Some(_) => { - self.expected_at("colon", name.span.end); - while self.eat().is_some() {} - TableExpr::new() - } - None => TableExpr::new(), - }; - - self.end_group(); - self.skip_ws(); - let (has_chained_child, end) = if self.peek().is_some() { - let item = self.parse_bracket_call(true); - let span = item.span; - let t = vec![item.map(SyntaxNode::Call)]; - args.push(SpannedEntry::val(Spanned::new(Expr::Tree(t), span))); - (true, span.end) - } else { - self.tokens.pop_mode(); - (false, self.end_group().end) - }; - - let start = if chained { before_name } else { before_bracket }; - let mut span = Span::new(start, end); - - if self.check(Token::LeftBracket) && !has_chained_child { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Body); - - let body = self.parse_body_contents(); - - self.tokens.pop_mode(); - let body_span = self.end_group(); - - let expr = Expr::Tree(body); - args.push(SpannedEntry::val(Spanned::new(expr, body_span))); - span.expand(body_span); - } - - Spanned::new(CallExpr { name, args }, span) - } - - fn parse_paren_call(&mut self, name: Spanned) -> Spanned { - self.start_group(Group::Paren); - let args = self.parse_table_contents().0; - let args_span = self.end_group(); - let span = Span::merge(name.span, args_span); - Spanned::new(CallExpr { name, args }, span) - } -} - -// Tables. -impl Parser<'_> { - fn parse_table_contents(&mut self) -> (TableExpr, bool) { - let mut table = TableExpr::new(); - let mut comma_and_keyless = true; - - while { - self.skip_ws(); - !self.eof() - } { - let (key, val) = if let Some(ident) = self.parse_ident() { - self.skip_ws(); - - match self.peekv() { - Some(Token::Equals) => { - self.eat(); - self.skip_ws(); - if let Some(value) = self.parse_expr() { - (Some(ident), value) - } else { - self.expected("value"); - continue; - } - } - - Some(Token::LeftParen) => { - let call = self.parse_paren_call(ident); - (None, call.map(Expr::Call)) - } - - _ => (None, ident.map(Expr::Ident)), - } - } else if let Some(value) = self.parse_expr() { - (None, value) - } else { - self.expected("value"); - continue; - }; - - let behind = val.span.end; - if let Some(key) = key { - comma_and_keyless = false; - table.insert(key.v.0, SpannedEntry::new(key.span, val)); - self.feedback - .decorations - .push(Spanned::new(Decoration::TableKey, key.span)); - } else { - table.push(SpannedEntry::val(val)); - } - - if { - self.skip_ws(); - self.eof() - } { - break; - } - - self.expect_at(Token::Comma, behind); - comma_and_keyless = false; - } - - let coercable = comma_and_keyless && !table.is_empty(); - (table, coercable) - } -} - -type Binop = fn(Box>, Box>) -> Expr; - -// Expressions and values. -impl Parser<'_> { - fn parse_expr(&mut self) -> Option> { - self.parse_binops("summand", Self::parse_term, |token| match token { - Token::Plus => Some(Expr::Add), - Token::Hyphen => Some(Expr::Sub), - _ => None, - }) - } - - fn parse_term(&mut self) -> Option> { - self.parse_binops("factor", Self::parse_factor, |token| match token { - Token::Star => Some(Expr::Mul), - Token::Slash => Some(Expr::Div), - _ => None, - }) - } - - /// Parse expression of the form ` ( )*`. - fn parse_binops( - &mut self, - operand_name: &str, - mut parse_operand: impl FnMut(&mut Self) -> Option>, - mut parse_op: impl FnMut(Token) -> Option, - ) -> Option> { - let mut left = parse_operand(self)?; - - self.skip_ws(); - while let Some(token) = self.peek() { - if let Some(op) = parse_op(token.v) { - self.eat(); - self.skip_ws(); - - if let Some(right) = parse_operand(self) { - let span = Span::merge(left.span, right.span); - let v = op(Box::new(left), Box::new(right)); - left = Spanned::new(v, span); - self.skip_ws(); - continue; - } - - error!( - @self.feedback, Span::merge(left.span, token.span), - "missing right {}", operand_name, - ); - } - break; - } - - Some(left) - } - - fn parse_factor(&mut self) -> Option> { - if let Some(hyph) = self.check_eat(Token::Hyphen) { - self.skip_ws(); - if let Some(factor) = self.parse_factor() { - let span = Span::merge(hyph.span, factor.span); - Some(Spanned::new(Expr::Neg(Box::new(factor)), span)) - } else { - error!(@self.feedback, hyph.span, "dangling minus"); - None - } - } else { - self.parse_value() - } - } - - fn parse_value(&mut self) -> Option> { - let Spanned { v: token, span } = self.peek()?; - Some(match token { - // This could be a function call or an identifier. - Token::Ident(id) => { - let name = Spanned::new(Ident(id.to_string()), span); - self.eat(); - self.skip_ws(); - if self.check(Token::LeftParen) { - self.parse_paren_call(name).map(Expr::Call) - } else { - name.map(Expr::Ident) - } - } - - Token::Str { string, terminated } => { - if !terminated { - self.expected_at("quote", span.end); - } - self.with_span(Expr::Str(unescape_string(string))) - } - - Token::Bool(b) => self.with_span(Expr::Bool(b)), - Token::Number(n) => self.with_span(Expr::Number(n)), - Token::Length(s) => self.with_span(Expr::Length(s)), - Token::Hex(s) => { - if let Ok(color) = RgbaColor::from_str(s) { - self.with_span(Expr::Color(color)) - } else { - // Heal color by assuming black. - error!(@self.feedback, span, "invalid color"); - let healed = RgbaColor::new_healed(0, 0, 0, 255); - self.with_span(Expr::Color(healed)) - } - } - - // This could be a table or a parenthesized expression. We parse as - // a table in any case and coerce the table into a value if it is - // coercable (length 1 and no trailing comma). - Token::LeftParen => { - self.start_group(Group::Paren); - let (table, coercable) = self.parse_table_contents(); - let span = self.end_group(); - - let expr = if coercable { - table.into_values().next().expect("table is coercable").val.v - } else { - Expr::Table(table) - }; - - Spanned::new(expr, span) - } - - // This is a content expression. - Token::LeftBrace => { - self.start_group(Group::Brace); - self.tokens.push_mode(TokenMode::Body); - - let tree = self.parse_body_contents(); - - self.tokens.pop_mode(); - let span = self.end_group(); - Spanned::new(Expr::Tree(tree), span) - } - - // This is a bracketed function call. - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - let tree = vec![call.map(SyntaxNode::Call)]; - Spanned::new(Expr::Tree(tree), span) - } - - _ => return None, - }) - } - - fn parse_ident(&mut self) -> Option> { - self.peek().and_then(|token| match token.v { - Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), - _ => None, - }) - } -} - -// Error handling. -impl Parser<'_> { - fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { - if self.check(token) { - self.eat(); - true - } else { - self.expected_at(token.name(), pos); - false - } - } - - fn expected(&mut self, thing: &str) { - if let Some(found) = self.eat() { - error!( - @self.feedback, found.span, - "expected {}, found {}", thing, found.v.name(), - ); - } else { - error!(@self.feedback, Span::at(self.pos()), "expected {}", thing); - } - } - - fn expected_at(&mut self, thing: &str, pos: Pos) { - error!(@self.feedback, Span::at(pos), "expected {}", thing); - } - - fn expected_found_or_at(&mut self, thing: &str, pos: Pos) { - if self.eof() { - self.expected_at(thing, pos) - } else { - self.expected(thing); - } - } -} - -// Parsing primitives. -impl<'s> Parser<'s> { - fn start_group(&mut self, group: Group) { - let start = self.pos(); - if let Some(start_token) = group.start() { - self.assert(start_token); - } - self.delimiters.push((start, group.end())); - } - - fn end_group(&mut self) -> Span { - let peeked = self.peek(); - - let (start, end_token) = self.delimiters.pop().expect("group was not started"); - - if end_token != Token::Chain && peeked != None { - self.delimiters.push((start, end_token)); - assert_eq!(peeked, None, "unfinished group"); - } - - match self.peeked.unwrap() { - Some(token) if token.v == end_token => { - self.peeked = None; - Span::new(start, token.span.end) - } - _ => { - let end = self.pos(); - if end_token != Token::Chain { - error!( - @self.feedback, Span::at(end), - "expected {}", end_token.name(), - ); - } - Span::new(start, end) - } - } - } - - fn skip_ws(&mut self) { - while matches!( - self.peekv(), - Some(Token::Space(_)) | - Some(Token::LineComment(_)) | - Some(Token::BlockComment(_)) - ) { - self.eat(); - } - } - - fn eatv(&mut self) -> Option> { - self.eat().map(Spanned::value) - } - - fn peekv(&mut self) -> Option> { - self.peek().map(Spanned::value) - } - - fn assert(&mut self, token: Token<'_>) { - assert!(self.check_eat(token).is_some()); - } - - fn check_eat(&mut self, token: Token<'_>) -> Option>> { - if self.check(token) { self.eat() } else { None } - } - - /// Checks if the next token is of some kind - fn check(&mut self, token: Token<'_>) -> bool { - self.peekv() == Some(token) - } - - fn with_span(&mut self, v: T) -> Spanned { - let span = self.eat().expect("expected token").span; - Spanned::new(v, span) - } - - fn eof(&mut self) -> bool { - self.peek().is_none() - } - - fn eat(&mut self) -> Option>> { - let token = self.peek()?; - self.peeked = None; - Some(token) - } - - fn peek(&mut self) -> Option>> { - let tokens = &mut self.tokens; - let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; - - // Check for unclosed groups. - if Group::is_delimiter(token.v) { - if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { - return None; - } - } - - Some(token) - } - - fn pos(&self) -> Pos { - self.peeked - .flatten() - .map(|s| s.span.start) - .unwrap_or_else(|| self.tokens.pos()) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum Group { - Paren, - Bracket, - Brace, - Subheader, -} - -impl Group { - fn is_delimiter(token: Token<'_>) -> bool { - matches!( - token, - Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain - ) - } - - fn start(self) -> Option> { - match self { - Self::Paren => Some(Token::LeftParen), - Self::Bracket => Some(Token::LeftBracket), - Self::Brace => Some(Token::LeftBrace), - Self::Subheader => None, - } - } - - fn end(self) -> Token<'static> { - match self { - Self::Paren => Token::RightParen, - Self::Bracket => Token::RightBracket, - Self::Brace => Token::RightBrace, - Self::Subheader => Token::Chain, - } - } -} - -fn unescape_string(string: &str) -> String { - let mut iter = string.chars().peekable(); - let mut out = String::with_capacity(string.len()); - - while let Some(c) = iter.next() { - if c == '\\' { - match iter.next() { - Some('\\') => out.push('\\'), - Some('"') => out.push('"'), - Some('u') if iter.peek() == Some(&'{') => { - iter.next(); - - let mut sequence = String::new(); - let terminated = loop { - match iter.peek() { - // TODO: Feedback that closing brace is missing. - Some('}') => { - iter.next(); - break true; - } - Some(&c) if c.is_ascii_hexdigit() => { - iter.next(); - sequence.push(c); - } - _ => break false, - } - }; - - // TODO: Feedback that escape sequence is wrong. - if let Some(c) = unescape_char(&sequence) { - out.push(c); - } else { - out.push_str("\\u{"); - out.push_str(&sequence); - if terminated { - out.push('}'); - } - } - } - Some('n') => out.push('\n'), - Some('t') => out.push('\t'), - Some(c) => { - out.push('\\'); - out.push(c); - } - None => out.push('\\'), - } - } else { - out.push(c); - } - } - - out -} - -/// Unescape raw markup and split it into into lines. -fn unescape_raw(raw: &str) -> Vec { - let mut iter = raw.chars(); - let mut text = String::new(); - - while let Some(c) = iter.next() { - if c == '\\' { - if let Some(c) = iter.next() { - if c != '\\' && c != '`' { - text.push('\\'); - } - - text.push(c); - } else { - text.push('\\'); - } - } else { - text.push(c); - } - } - - split_lines(&text) -} - -/// Unescape raw markup and split it into into lines. -fn unescape_code(raw: &str) -> Vec { - let mut iter = raw.chars().peekable(); - let mut text = String::new(); - let mut backticks = 0u32; - let mut update_backtick_count; - - while let Some(c) = iter.next() { - update_backtick_count = true; - - if c == '\\' && backticks > 0 { - let mut tail = String::new(); - let mut escape_success = false; - let mut backticks_after_slash = 0u32; - - while let Some(&s) = iter.peek() { - match s { - '\\' => { - if backticks_after_slash == 0 { - tail.push('\\'); - } else { - // Pattern like `\`\` should fail - // escape and just be printed verbantim. - break; - } - } - '`' => { - tail.push(s); - backticks_after_slash += 1; - if backticks_after_slash == 2 { - escape_success = true; - iter.next(); - break; - } - } - _ => break, - } - - iter.next(); - } - - if !escape_success { - text.push(c); - backticks = backticks_after_slash; - update_backtick_count = false; - } else { - backticks = 0; - } - - text.push_str(&tail); - } else { - text.push(c); - } - - if update_backtick_count { - if c == '`' { - backticks += 1; - } else { - backticks = 0; - } - } - } - - split_lines(&text) -} - -/// Converts a hexademical sequence (without braces or "\u") into a character. -fn unescape_char(sequence: &str) -> Option { - u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) -} - -fn split_lines(text: &str) -> Vec { - let mut iter = text.chars().peekable(); - let mut line = String::new(); - let mut lines = Vec::new(); - - while let Some(c) = iter.next() { - if is_newline_char(c) { - if c == '\r' && iter.peek() == Some(&'\n') { - iter.next(); - } - - lines.push(std::mem::take(&mut line)); - } else { - line.push(c); - } - } - - lines.push(line); - lines -} - -#[cfg(test)] -#[allow(non_snake_case)] -mod tests { - use super::*; - use crate::length::Length; - use crate::syntax::tests::*; - use Decoration::*; - - // ----------------------- Construct Syntax Nodes ----------------------- // - - use SyntaxNode::{ - Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I, - }; - - fn T(text: &str) -> SyntaxNode { - SyntaxNode::Text(text.to_string()) - } - - macro_rules! H { - ($level:expr, $($tts:tt)*) => { - SyntaxNode::Heading(Heading { - level: Spanned::zero($level), - tree: Tree![@$($tts)*], - }) - }; - } - - macro_rules! R { - ($($line:expr),* $(,)?) => { - SyntaxNode::Raw(vec![$($line.to_string()),*]) - }; - } - - macro_rules! C { - ($lang:expr, $($line:expr),* $(,)?) => {{ - let lines = vec![$($line.to_string()) ,*]; - SyntaxNode::Code(Code { - lang: $lang, - block: lines.len() > 1, - lines, - }) - }}; - } - - fn Lang<'a, T: Into>>(lang: T) -> Option> { - Some(Into::>::into(lang).map(|s| Ident(s.to_string()))) - } - - macro_rules! F { - ($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) } - } - - // ------------------------ Construct Expressions ----------------------- // - - use Expr::{Bool, Color, Length as Len, Number as Num}; - - fn Id(ident: &str) -> Expr { - Expr::Ident(Ident(ident.to_string())) - } - fn Str(string: &str) -> Expr { - Expr::Str(string.to_string()) - } - - macro_rules! Table { - (@table=$table:expr,) => {}; - (@table=$table:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{ - let key = Into::>::into($key); - let val = Into::>::into($value); - $table.insert(key.v, SpannedEntry::new(key.span, val)); - Table![@table=$table, $($($tts)*)?]; - }}; - (@table=$table:expr, $value:expr $(, $($tts:tt)*)?) => { - let val = Into::>::into($value); - $table.push(SpannedEntry::val(val)); - Table![@table=$table, $($($tts)*)?]; - }; - (@$($tts:tt)*) => {{ - #[allow(unused_mut)] - let mut table = TableExpr::new(); - Table![@table=table, $($tts)*]; - table - }}; - ($($tts:tt)*) => { Expr::Table(Table![@$($tts)*]) }; - } - - macro_rules! Tree { - (@$($node:expr),* $(,)?) => { - vec![$(Into::>::into($node)),*] - }; - ($($tts:tt)*) => { Expr::Tree(Tree![@$($tts)*]) }; - } - - macro_rules! Call { - (@$name:expr $(; $($tts:tt)*)?) => {{ - let name = Into::>::into($name); - CallExpr { - name: name.map(|n| Ident(n.to_string())), - args: Table![@$($($tts)*)?], - } - }}; - ($($tts:tt)*) => { Expr::Call(Call![@$($tts)*]) }; - } - - fn Neg>>(e1: T) -> Expr { - Expr::Neg(Box::new(e1.into())) - } - fn Add>>(e1: T, e2: T) -> Expr { - Expr::Add(Box::new(e1.into()), Box::new(e2.into())) - } - fn Sub>>(e1: T, e2: T) -> Expr { - Expr::Sub(Box::new(e1.into()), Box::new(e2.into())) - } - fn Mul>>(e1: T, e2: T) -> Expr { - Expr::Mul(Box::new(e1.into()), Box::new(e2.into())) - } - fn Div>>(e1: T, e2: T) -> Expr { - Expr::Div(Box::new(e1.into()), Box::new(e2.into())) - } - - // ----------------------------- Test Macros ---------------------------- // - - // Test syntax trees with or without spans. - macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } - macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } - macro_rules! test { - (@spans=$spans:expr, $src:expr => $($tts:tt)*) => { - let exp = Tree![@$($tts)*]; - let pass = parse($src); - check($src, exp, pass.output, $spans); - }; - } - - // Test expressions. - macro_rules! v { - ($src:expr => $($tts:tt)*) => { - t!(concat!("[val: ", $src, "]") => F!("val"; $($tts)*)); - } - } - - // Test error messages. - macro_rules! e { - ($src:expr => $($tts:tt)*) => { - let exp = vec![$($tts)*]; - let pass = parse($src); - let found = pass.feedback.diagnostics.iter() - .map(|s| s.as_ref().map(|e| e.message.as_str())) - .collect::>(); - check($src, exp, found, true); - }; - } - - // Test decorations. - macro_rules! d { - ($src:expr => $($tts:tt)*) => { - let exp = vec![$($tts)*]; - let pass = parse($src); - check($src, exp, pass.feedback.decorations, true); - }; - } - - // -------------------------------- Tests ------------------------------- // - - #[test] - #[rustfmt::skip] - fn test_unescape_strings() { - fn test(string: &str, expected: &str) { - assert_eq!(unescape_string(string), expected.to_string()); - } - - test(r#"hello world"#, "hello world"); - test(r#"hello\nworld"#, "hello\nworld"); - test(r#"a\"bc"#, "a\"bc"); - test(r#"a\u{2603}bc"#, "aβ˜ƒbc"); - test(r#"a\u{26c3bg"#, "aπ¦°»g"); - test(r#"av\u{6797"#, "avζž—"); - test(r#"a\\"#, "a\\"); - test(r#"a\\\nbc"#, "a\\\nbc"); - test(r#"a\tbc"#, "a\tbc"); - test(r"🌎", "🌎"); - test(r"🌎\", r"🌎\"); - test(r"\🌎", r"\🌎"); - } - - #[test] - #[rustfmt::skip] - fn test_unescape_raws() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_raw(raw), expected); - } - - test("raw\\`", vec!["raw`"]); - test("raw\\\\`", vec!["raw\\`"]); - test("raw\ntext", vec!["raw", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("raw\\a", vec!["raw\\a"]); - test("raw\\", vec!["raw\\"]); - } - - #[test] - #[rustfmt::skip] - fn test_unescape_code() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_code(raw), expected); - } - - test("code\\`", vec!["code\\`"]); - test("code`\\``", vec!["code```"]); - test("code`\\`a", vec!["code`\\`a"]); - test("code``hi`\\``", vec!["code``hi```"]); - test("code`\\\\``", vec!["code`\\``"]); - test("code`\\`\\`go", vec!["code`\\`\\`go"]); - test("code`\\`\\``", vec!["code`\\```"]); - test("code\ntext", vec!["code", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("code\\a", vec!["code\\a"]); - test("code\\", vec!["code\\"]); - } - - #[test] - fn test_parse_groups() { - e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"), - s(0,2, 0,2, "expected closing bracket")); - - e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"), - s(0,5, 0,6, "unexpected closing brace")); - } - - #[test] - fn test_parse_simple_nodes() { - t!("" => ); - t!("hi" => T("hi")); - t!("*hi" => B, T("hi")); - t!("hi_" => T("hi"), I); - t!("hi you" => T("hi"), S, T("you")); - t!("special~name" => T("special"), T("\u{00A0}"), T("name")); - t!("special\\~name" => T("special"), T("~"), T("name")); - t!("\\u{1f303}" => T("πŸŒƒ")); - t!("\n\n\nhello" => P, T("hello")); - t!(r"a\ b" => T("a"), L, S, T("b")); - t!("`py`" => R!["py"]); - t!("`hi\nyou" => R!["hi", "you"]); - e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); - t!("`hi\\`du`" => R!["hi`du"]); - - ts!("```java out```" => s(0,0, 0,14, C![Lang(s(0,3, 0,7, "java")), "out"])); - t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]); - t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![ - Lang("typst"), " Typst uses ``` to indicate code blocks" - ]); - - e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks")); - e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier")); - e!("\\u{d421c809}" => s(0,0, 0,12, "invalid unicode escape sequence")); - e!("\\u{abc" => s(0,6, 0,6, "expected closing brace")); - t!("πŸ’œ\n\n 🌍" => T("πŸ’œ"), P, T("🌍")); - - ts!("hi" => s(0,0, 0,2, T("hi"))); - ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B)); - ts!("πŸ’œ\n\n 🌍" => s(0,0, 0,1, T("πŸ’œ")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍"))); - } - - #[test] - fn test_parse_comments() { - // In body. - t!("hi// you\nw" => T("hi"), S, T("w")); - t!("first//\n//\nsecond" => T("first"), S, S, T("second")); - t!("first//\n \nsecond" => T("first"), P, T("second")); - t!("first/*\n \n*/second" => T("first"), T("second")); - e!("🌎\n*/n" => s(1,0, 1,2, "unexpected end of block comment")); - - // In header. - t!("[val:/*12pt*/]" => F!("val")); - t!("[val \n /* \n */:]" => F!("val")); - e!("[val \n /* \n */:]" => ); - e!("[val : 12, /* \n */ 14]" => ); - } - - #[test] - fn test_parse_headings() { - t!("## Hello world!" => H![1, T("Hello"), S, T("world!")]); - - // Handle various whitespace usages. - t!("####Simple" => H![3, T("Simple")]); - t!(" # Whitespace!" => S, H![0, T("Whitespace!")]); - t!(" /* TODO: Improve */ ## Analysis" => S, S, H!(1, T("Analysis"))); - - // Complex heading contents. - t!("Some text [box][### Valuable facts]" => T("Some"), S, T("text"), S, - F!("box"; Tree![H!(2, T("Valuable"), S, T("facts"))]) - ); - t!("### Grandiose stuff [box][Get it \n\n straight]" => H![2, - T("Grandiose"), S, T("stuff"), S, - F!("box"; Tree![T("Get"), S, T("it"), P, T("straight")]) - ]); - t!("###### Multiline \\ headings" => H![5, T("Multiline"), S, L, S, T("headings")]); - - // Things that should not become headings. - t!("\\## Text" => T("#"), T("#"), S, T("Text")); - t!(" ###### # Text" => S, H!(5, T("#"), S, T("Text"))); - t!("I am #1" => T("I"), S, T("am"), S, T("#"), T("1")); - t!("[box][\n] # hi" => F!("box"; Tree![S]), S, T("#"), S, T("hi")); - - // Depth warnings. - e!("########" => s(0,0, 0,8, "section depth larger than 6 has no effect")); - } - - #[test] - fn test_parse_function_names() { - // No closing bracket. - t!("[" => F!("")); - e!("[" => s(0,1, 0,1, "expected function name"), - s(0,1, 0,1, "expected closing bracket")); - - // No name. - e!("[]" => s(0,1, 0,1, "expected function name")); - e!("[\"]" => s(0,1, 0,3, "expected function name, found string"), - s(0,3, 0,3, "expected closing bracket")); - - // A valid name. - t!("[hi]" => F!("hi")); - t!("[ f]" => F!("f")); - - // An invalid name. - e!("[12]" => s(0,1, 0,3, "expected function name, found number")); - e!("[ 🌎]" => s(0,3, 0,4, "expected function name, found invalid token")); - } - - #[test] - fn test_parse_chaining() { - // Things the parser has to make sense of - t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Table![Num(5.0), Num(2.1)], Tree![F!("you")])); - t!("[box >>][Hi]" => F!("box"; Tree![T("Hi")])); - t!("[box >> pad: 1pt][Hi]" => F!("box"; Tree![ - F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi"))) - ])); - t!("[bold: 400, >> emph >> sub: 1cm]" => F!("bold"; Num(400.0), Tree![ - F!("emph"; Tree!(F!("sub"; Len(Length::cm(1.0))))) - ])); - - // Errors for unclosed / empty predecessor groups - e!("[hi: (5.0, 2.1 >> you]" => s(0, 15, 0, 15, "expected closing paren")); - e!("[>> abc]" => s(0, 1, 0, 1, "expected function name")); - } - - #[test] - fn test_parse_colon_starting_func_args() { - // Just colon without args. - e!("[val:]" => ); - - // Wrong token. - t!("[val=]" => F!("val")); - e!("[val=]" => s(0,4, 0,4, "expected colon")); - e!("[val/🌎:$]" => s(0,4, 0,4, "expected colon")); - - // String in invalid header without colon still parsed as string - // Note: No "expected quote" error because not even the string was - // expected. - e!("[val/\"]" => s(0,4, 0,4, "expected colon"), - s(0,7, 0,7, "expected closing bracket")); - } - - #[test] - fn test_parse_function_bodies() { - t!("[val: 1][*Hi*]" => F!("val"; Num(1.0), Tree![B, T("Hi"), B])); - e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment")); - - // Raw in body. - t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]])); - e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket")); - - // Crazy. - t!("[v][[v][v][v]]" => F!("v"; Tree![F!("v"; Tree![T("v")]), F!("v")])); - - // Spanned. - ts!(" [box][Oh my]" => - s(0,0, 0,1, S), - s(0,1, 0,13, F!(s(0,2, 0,5, "box"); - s(0,6, 0,13, Tree![ - s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my")) - ]) - )) - ); - } - - #[test] - fn test_parse_values() { - // Simple. - v!("_" => Id("_")); - v!("name" => Id("name")); - v!("Ξ±" => Id("Ξ±")); - v!("\"hi\"" => Str("hi")); - v!("true" => Bool(true)); - v!("false" => Bool(false)); - v!("1.0e-4" => Num(1e-4)); - v!("3.14" => Num(3.14)); - v!("50%" => Num(0.5)); - v!("4.5cm" => Len(Length::cm(4.5))); - v!("12e1pt" => Len(Length::pt(12e1))); - v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00))); - v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string")); - - // Content. - v!("{_hi_}" => Tree![I, T("hi"), I]); - e!("[val: {_hi_}]" => ); - v!("[hi]" => Tree![F!("hi")]); - e!("[val: [hi]]" => ); - - // Healed colors. - v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff))); - e!("[val: #12345]" => s(0,6, 0,12, "invalid color")); - e!("[val: #a5]" => s(0,6, 0,9, "invalid color")); - e!("[val: #14b2ah]" => s(0,6, 0,13, "invalid color")); - e!("[val: #f075ff011]" => s(0,6, 0,16, "invalid color")); - - // Unclosed string. - v!("\"hello" => Str("hello]")); - e!("[val: \"hello]" => s(0,13, 0,13, "expected quote"), - s(0,13, 0,13, "expected closing bracket")); - - // Spanned. - ts!("[val: 1.4]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.4))))); - } - - #[test] - fn test_parse_expressions() { - // Coerced table. - v!("(hi)" => Id("hi")); - - // Operations. - v!("-1" => Neg(Num(1.0))); - v!("-- 1" => Neg(Neg(Num(1.0)))); - v!("3.2in + 6pt" => Add(Len(Length::inches(3.2)), Len(Length::pt(6.0)))); - v!("5 - 0.01" => Sub(Num(5.0), Num(0.01))); - v!("(3mm * 2)" => Mul(Len(Length::mm(3.0)), Num(2.0))); - v!("12e-3cm/1pt" => Div(Len(Length::cm(12e-3)), Len(Length::pt(1.0)))); - - // More complex. - v!("(3.2in + 6pt)*(5/2-1)" => Mul( - Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))), - Sub(Div(Num(5.0), Num(2.0)), Num(1.0)) - )); - v!("(6.3E+2+4* - 3.2pt)/2" => Div( - Add(Num(6.3e2), Mul(Num(4.0), Neg(Len(Length::pt(3.2))))), - Num(2.0) - )); - - // Associativity of multiplication and division. - v!("3/4*5" => Mul(Div(Num(3.0), Num(4.0)), Num(5.0))); - - // Spanned. - ts!("[val: 1 + 3]" => s(0,0, 0,12, F!( - s(0,1, 0,4, "val"); s(0,6, 0,11, Add( - s(0,6, 0,7, Num(1.0)), - s(0,10, 0,11, Num(3.0)), - )) - ))); - - // Span of parenthesized expression contains parens. - ts!("[val: (1)]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.0))))); - - // Invalid expressions. - v!("4pt--" => Len(Length::pt(4.0))); - e!("[val: 4pt--]" => s(0,10, 0,11, "dangling minus"), - s(0,6, 0,10, "missing right summand")); - - v!("3mm+4pt*" => Add(Len(Length::mm(3.0)), Len(Length::pt(4.0)))); - e!("[val: 3mm+4pt*]" => s(0,10, 0,14, "missing right factor")); - } - - #[test] - fn test_parse_tables() { - // Okay. - v!("()" => Table![]); - v!("(false)" => Bool(false)); - v!("(true,)" => Table![Bool(true)]); - v!("(key=val)" => Table!["key" => Id("val")]); - v!("(1, 2)" => Table![Num(1.0), Num(2.0)]); - v!("(1, key=\"value\")" => Table![Num(1.0), "key" => Str("value")]); - - // Decorations. - d!("[val: key=hi]" => s(0,6, 0,9, TableKey)); - d!("[val: (key=hi)]" => s(0,7, 0,10, TableKey)); - d!("[val: f(key=hi)]" => s(0,8, 0,11, TableKey)); - - // Spanned with spacing around keyword arguments. - ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0,0, 4,2, F!( - s(0,1, 0,4, "val"); s(1,1, 1,3, "hi") => s(3,4, 4,1, Str("s\n")) - ))); - e!("[val: \n hi \n = /* //\n */ \"s\n\"]" => ); - } - - #[test] - fn test_parse_tables_compute_func_calls() { - v!("empty()" => Call!("empty")); - v!("add ( 1 , 2 )" => Call!("add"; Num(1.0), Num(2.0))); - v!("items(\"fire\", #f93a6d)" => Call!("items"; - Str("fire"), Color(RgbaColor::new(0xf9, 0x3a, 0x6d, 0xff)) - )); - - // More complex. - v!("css(1pt, rgb(90, 102, 254), \"solid\")" => Call!( - "css"; - Len(Length::pt(1.0)), - Call!("rgb"; Num(90.0), Num(102.0), Num(254.0)), - Str("solid"), - )); - - // Unclosed. - v!("lang(δΈ­ζ–‡]" => Call!("lang"; Id("δΈ­ζ–‡"))); - e!("[val: lang(δΈ­ζ–‡]" => s(0,13, 0,13, "expected closing paren")); - - // Invalid name. - v!("πŸ‘ (\"abc\", 13e-5)" => Table!(Str("abc"), Num(13.0e-5))); - e!("[val: πŸ‘ (\"abc\", 13e-5)]" => s(0,6, 0,7, "expected value, found invalid token")); - } - - #[test] - fn test_parse_tables_nested() { - v!("(1, ( ab=(), d = (3, 14pt) )), false" => - Table![ - Num(1.0), - Table!( - "ab" => Table![], - "d" => Table!(Num(3.0), Len(Length::pt(14.0))), - ), - ], - Bool(false), - ); - } - - #[test] - fn test_parse_tables_errors() { - // Expected value. - e!("[val: (=)]" => s(0,7, 0,8, "expected value, found equals sign")); - e!("[val: (,)]" => s(0,7, 0,8, "expected value, found comma")); - v!("(\x07 abc,)" => Table![Id("abc")]); - e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token")); - e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma")); - e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren")); - - // Expected comma. - v!("(true false)" => Table![Bool(true), Bool(false)]); - e!("[val: (true false)]" => s(0,11, 0,11, "expected comma")); - - // Expected closing paren. - e!("[val: (#000]" => s(0,11, 0,11, "expected closing paren")); - e!("[val: (key]" => s(0,10, 0,10, "expected closing paren")); - e!("[val: (key=]" => s(0,11, 0,11, "expected value"), - s(0,11, 0,11, "expected closing paren")); - - // Bad key. - v!("true=you" => Bool(true), Id("you")); - e!("[val: true=you]" => - s(0,10, 0,10, "expected comma"), - s(0,10, 0,11, "expected value, found equals sign")); - - // Unexpected equals sign. - v!("z=y=4" => Num(4.0), "z" => Id("y")); - e!("[val: z=y=4]" => - s(0,9, 0,9, "expected comma"), - s(0,9, 0,10, "expected value, found equals sign")); - } -} diff --git a/src/syntax/parsing/escaping.rs b/src/syntax/parsing/escaping.rs new file mode 100644 index 000000000..5f06388eb --- /dev/null +++ b/src/syntax/parsing/escaping.rs @@ -0,0 +1,243 @@ +use crate::syntax::tokens::is_newline_char; + +/// Resolves all escape sequences in a string. +pub fn unescape_string(string: &str) -> String { + let mut iter = string.chars().peekable(); + let mut out = String::with_capacity(string.len()); + + while let Some(c) = iter.next() { + if c == '\\' { + match iter.next() { + Some('\\') => out.push('\\'), + Some('"') => out.push('"'), + Some('u') if iter.peek() == Some(&'{') => { + iter.next(); + + let mut sequence = String::new(); + let terminated = loop { + match iter.peek() { + // TODO: Feedback that closing brace is missing. + Some('}') => { + iter.next(); + break true; + } + Some(&c) if c.is_ascii_hexdigit() => { + iter.next(); + sequence.push(c); + } + _ => break false, + } + }; + + // TODO: Feedback that escape sequence is wrong. + if let Some(c) = hex_to_char(&sequence) { + out.push(c); + } else { + out.push_str("\\u{"); + out.push_str(&sequence); + if terminated { + out.push('}'); + } + } + } + Some('n') => out.push('\n'), + Some('t') => out.push('\t'), + Some(c) => { + out.push('\\'); + out.push(c); + } + None => out.push('\\'), + } + } else { + out.push(c); + } + } + + out +} + +/// Resolves all escape sequences in raw markup (between backticks) and splits it into +/// into lines. +pub fn unescape_raw(raw: &str) -> Vec { + let mut iter = raw.chars(); + let mut text = String::new(); + + while let Some(c) = iter.next() { + if c == '\\' { + if let Some(c) = iter.next() { + if c != '\\' && c != '`' { + text.push('\\'); + } + + text.push(c); + } else { + text.push('\\'); + } + } else { + text.push(c); + } + } + + split_lines(&text) +} + +/// Resolves all escape sequences in code markup (between triple backticks) and splits it +/// into into lines. +pub fn unescape_code(raw: &str) -> Vec { + let mut iter = raw.chars().peekable(); + let mut text = String::new(); + let mut backticks = 0u32; + let mut update_backtick_count; + + while let Some(c) = iter.next() { + update_backtick_count = true; + + if c == '\\' && backticks > 0 { + let mut tail = String::new(); + let mut escape_success = false; + let mut backticks_after_slash = 0u32; + + while let Some(&s) = iter.peek() { + match s { + '\\' => { + if backticks_after_slash == 0 { + tail.push('\\'); + } else { + // Pattern like `\`\` should fail + // escape and just be printed verbantim. + break; + } + } + '`' => { + tail.push(s); + backticks_after_slash += 1; + if backticks_after_slash == 2 { + escape_success = true; + iter.next(); + break; + } + } + _ => break, + } + + iter.next(); + } + + if !escape_success { + text.push(c); + backticks = backticks_after_slash; + update_backtick_count = false; + } else { + backticks = 0; + } + + text.push_str(&tail); + } else { + text.push(c); + } + + if update_backtick_count { + if c == '`' { + backticks += 1; + } else { + backticks = 0; + } + } + } + + split_lines(&text) +} + +/// Converts a hexademical sequence (without braces or "\u") into a character. +pub fn hex_to_char(sequence: &str) -> Option { + u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) +} + +/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks). +pub fn split_lines(text: &str) -> Vec { + let mut iter = text.chars().peekable(); + let mut line = String::new(); + let mut lines = Vec::new(); + + while let Some(c) = iter.next() { + if is_newline_char(c) { + if c == '\r' && iter.peek() == Some(&'\n') { + iter.next(); + } + + lines.push(std::mem::take(&mut line)); + } else { + line.push(c); + } + } + + lines.push(line); + lines +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + #[rustfmt::skip] + fn test_unescape_strings() { + fn test(string: &str, expected: &str) { + assert_eq!(unescape_string(string), expected.to_string()); + } + + test(r#"hello world"#, "hello world"); + test(r#"hello\nworld"#, "hello\nworld"); + test(r#"a\"bc"#, "a\"bc"); + test(r#"a\u{2603}bc"#, "aβ˜ƒbc"); + test(r#"a\u{26c3bg"#, "aπ¦°»g"); + test(r#"av\u{6797"#, "avζž—"); + test(r#"a\\"#, "a\\"); + test(r#"a\\\nbc"#, "a\\\nbc"); + test(r#"a\tbc"#, "a\tbc"); + test(r"🌎", "🌎"); + test(r"🌎\", r"🌎\"); + test(r"\🌎", r"\🌎"); + } + + #[test] + #[rustfmt::skip] + fn test_unescape_raws() { + fn test(raw: &str, expected: Vec<&str>) { + assert_eq!(unescape_raw(raw), expected); + } + + test("raw\\`", vec!["raw`"]); + test("raw\\\\`", vec!["raw\\`"]); + test("raw\ntext", vec!["raw", "text"]); + test("a\r\nb", vec!["a", "b"]); + test("a\n\nb", vec!["a", "", "b"]); + test("a\r\x0Bb", vec!["a", "", "b"]); + test("a\r\n\r\nb", vec!["a", "", "b"]); + test("raw\\a", vec!["raw\\a"]); + test("raw\\", vec!["raw\\"]); + } + + #[test] + #[rustfmt::skip] + fn test_unescape_code() { + fn test(raw: &str, expected: Vec<&str>) { + assert_eq!(unescape_code(raw), expected); + } + + test("code\\`", vec!["code\\`"]); + test("code`\\``", vec!["code```"]); + test("code`\\`a", vec!["code`\\`a"]); + test("code``hi`\\``", vec!["code``hi```"]); + test("code`\\\\``", vec!["code`\\``"]); + test("code`\\`\\`go", vec!["code`\\`\\`go"]); + test("code`\\`\\``", vec!["code`\\```"]); + test("code\ntext", vec!["code", "text"]); + test("a\r\nb", vec!["a", "b"]); + test("a\n\nb", vec!["a", "", "b"]); + test("a\r\x0Bb", vec!["a", "", "b"]); + test("a\r\n\r\nb", vec!["a", "", "b"]); + test("code\\a", vec!["code\\a"]); + test("code\\", vec!["code\\"]); + } +} diff --git a/src/syntax/parsing/mod.rs b/src/syntax/parsing/mod.rs new file mode 100644 index 000000000..bf34340f7 --- /dev/null +++ b/src/syntax/parsing/mod.rs @@ -0,0 +1,9 @@ +//! Parsing of source code into syntax trees. + +mod escaping; +mod parser; + +pub use parser::parse; + +#[cfg(test)] +mod tests; diff --git a/src/syntax/parsing/parser.rs b/src/syntax/parsing/parser.rs new file mode 100644 index 000000000..ca41bf131 --- /dev/null +++ b/src/syntax/parsing/parser.rs @@ -0,0 +1,660 @@ +use std::str::FromStr; + +use super::escaping::*; +use crate::color::RgbaColor; +use crate::compute::table::SpannedEntry; +use crate::syntax::decoration::Decoration; +use crate::syntax::span::{Pos, Span, Spanned}; +use crate::syntax::tokens::{Token, TokenMode, Tokens}; +use crate::syntax::tree::*; +use crate::{Feedback, Pass}; + +/// Parse a string of source code. +pub fn parse(src: &str) -> Pass { + Parser::new(src).parse() +} + +struct Parser<'s> { + tokens: Tokens<'s>, + peeked: Option>>>, + delimiters: Vec<(Pos, Token<'static>)>, + at_block_or_line_start: bool, + feedback: Feedback, +} + +impl<'s> Parser<'s> { + fn new(src: &'s str) -> Self { + Self { + tokens: Tokens::new(src, TokenMode::Body), + peeked: None, + delimiters: vec![], + at_block_or_line_start: true, + feedback: Feedback::new(), + } + } + + fn parse(mut self) -> Pass { + let tree = self.parse_body_contents(); + Pass::new(tree, self.feedback) + } +} + +// Typesetting content. +impl Parser<'_> { + fn parse_body_contents(&mut self) -> SyntaxTree { + let mut tree = SyntaxTree::new(); + + self.at_block_or_line_start = true; + while !self.eof() { + if let Some(node) = self.parse_node() { + tree.push(node); + } + } + + tree + } + + fn parse_node(&mut self) -> Option> { + let token = self.peek()?; + let end = Span::at(token.span.end); + + // Set block or line start to false because most nodes have that effect, but + // remember the old value to actually check it for hashtags and because comments + // and spaces want to retain it. + let was_at_block_or_line_start = self.at_block_or_line_start; + self.at_block_or_line_start = false; + + Some(match token.v { + // Starting from two newlines counts as a paragraph break, a single + // newline does not. + Token::Space(n) => { + if n == 0 { + self.at_block_or_line_start = was_at_block_or_line_start; + } else if n >= 1 { + self.at_block_or_line_start = true; + } + + self.with_span(if n >= 2 { + SyntaxNode::Parbreak + } else { + SyntaxNode::Spacing + }) + } + + Token::LineComment(_) | Token::BlockComment(_) => { + self.at_block_or_line_start = was_at_block_or_line_start; + self.eat(); + return None; + } + + Token::LeftBracket => { + let call = self.parse_bracket_call(false); + self.at_block_or_line_start = false; + call.map(SyntaxNode::Call) + } + + Token::Star => self.with_span(SyntaxNode::ToggleBolder), + Token::Underscore => self.with_span(SyntaxNode::ToggleItalic), + Token::Backslash => self.with_span(SyntaxNode::Linebreak), + + Token::Hashtag if was_at_block_or_line_start => { + self.parse_heading().map(SyntaxNode::Heading) + } + + Token::Raw { raw, terminated } => { + if !terminated { + error!(@self.feedback, end, "expected backtick"); + } + self.with_span(SyntaxNode::Raw(unescape_raw(raw))) + } + + Token::Code { lang, raw, terminated } => { + if !terminated { + error!(@self.feedback, end, "expected backticks"); + } + + let lang = lang.and_then(|lang| { + if let Some(ident) = Ident::new(lang.v) { + Some(Spanned::new(ident, lang.span)) + } else { + error!(@self.feedback, lang.span, "invalid identifier"); + None + } + }); + + let mut lines = unescape_code(raw); + let block = lines.len() > 1; + + if lines.last().map(|s| s.is_empty()).unwrap_or(false) { + lines.pop(); + } + + self.with_span(SyntaxNode::Code(Code { lang, lines, block })) + } + + Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), + Token::Hashtag => self.with_span(SyntaxNode::Text("#".to_string())), + + Token::UnicodeEscape { sequence, terminated } => { + if !terminated { + error!(@self.feedback, end, "expected closing brace"); + } + + if let Some(c) = hex_to_char(sequence) { + self.with_span(SyntaxNode::Text(c.to_string())) + } else { + error!(@self.feedback, token.span, "invalid unicode escape sequence"); + self.eat(); + return None; + } + } + + unexpected => { + error!(@self.feedback, token.span, "unexpected {}", unexpected.name()); + self.eat(); + return None; + } + }) + } + + fn parse_heading(&mut self) -> Spanned { + let start = self.pos(); + self.assert(Token::Hashtag); + + let mut level = 0; + while self.peekv() == Some(Token::Hashtag) { + level += 1; + self.eat(); + } + + let span = Span::new(start, self.pos()); + let level = Spanned::new(level, span); + + if level.v > 5 { + warning!( + @self.feedback, level.span, + "section depth larger than 6 has no effect", + ); + } + + self.skip_ws(); + + let mut tree = SyntaxTree::new(); + while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) { + if let Some(node) = self.parse_node() { + tree.push(node); + } + } + + let span = Span::new(start, self.pos()); + Spanned::new(Heading { level, tree }, span) + } +} + +// Function calls. +impl Parser<'_> { + fn parse_bracket_call(&mut self, chained: bool) -> Spanned { + let before_bracket = self.pos(); + if !chained { + self.start_group(Group::Bracket); + self.tokens.push_mode(TokenMode::Header); + } + + let before_name = self.pos(); + self.start_group(Group::Subheader); + self.skip_ws(); + let name = self.parse_ident().unwrap_or_else(|| { + self.expected_found_or_at("function name", before_name); + Spanned::new(Ident(String::new()), Span::at(before_name)) + }); + + self.skip_ws(); + + let mut args = match self.eatv() { + Some(Token::Colon) => self.parse_table_contents().0, + Some(_) => { + self.expected_at("colon", name.span.end); + while self.eat().is_some() {} + TableExpr::new() + } + None => TableExpr::new(), + }; + + self.end_group(); + self.skip_ws(); + let (has_chained_child, end) = if self.peek().is_some() { + let item = self.parse_bracket_call(true); + let span = item.span; + let t = vec![item.map(SyntaxNode::Call)]; + args.push(SpannedEntry::val(Spanned::new(Expr::Tree(t), span))); + (true, span.end) + } else { + self.tokens.pop_mode(); + (false, self.end_group().end) + }; + + let start = if chained { before_name } else { before_bracket }; + let mut span = Span::new(start, end); + + if self.check(Token::LeftBracket) && !has_chained_child { + self.start_group(Group::Bracket); + self.tokens.push_mode(TokenMode::Body); + + let body = self.parse_body_contents(); + + self.tokens.pop_mode(); + let body_span = self.end_group(); + + let expr = Expr::Tree(body); + args.push(SpannedEntry::val(Spanned::new(expr, body_span))); + span.expand(body_span); + } + + Spanned::new(CallExpr { name, args }, span) + } + + fn parse_paren_call(&mut self, name: Spanned) -> Spanned { + self.start_group(Group::Paren); + let args = self.parse_table_contents().0; + let args_span = self.end_group(); + let span = Span::merge(name.span, args_span); + Spanned::new(CallExpr { name, args }, span) + } +} + +// Tables. +impl Parser<'_> { + fn parse_table_contents(&mut self) -> (TableExpr, bool) { + let mut table = TableExpr::new(); + let mut comma_and_keyless = true; + + while { + self.skip_ws(); + !self.eof() + } { + let (key, val) = if let Some(ident) = self.parse_ident() { + self.skip_ws(); + + match self.peekv() { + Some(Token::Equals) => { + self.eat(); + self.skip_ws(); + if let Some(value) = self.parse_expr() { + (Some(ident), value) + } else { + self.expected("value"); + continue; + } + } + + Some(Token::LeftParen) => { + let call = self.parse_paren_call(ident); + (None, call.map(Expr::Call)) + } + + _ => (None, ident.map(Expr::Ident)), + } + } else if let Some(value) = self.parse_expr() { + (None, value) + } else { + self.expected("value"); + continue; + }; + + let behind = val.span.end; + if let Some(key) = key { + comma_and_keyless = false; + table.insert(key.v.0, SpannedEntry::new(key.span, val)); + self.feedback + .decorations + .push(Spanned::new(Decoration::TableKey, key.span)); + } else { + table.push(SpannedEntry::val(val)); + } + + if { + self.skip_ws(); + self.eof() + } { + break; + } + + self.expect_at(Token::Comma, behind); + comma_and_keyless = false; + } + + let coercable = comma_and_keyless && !table.is_empty(); + (table, coercable) + } +} + +type Binop = fn(Box>, Box>) -> Expr; + +// Expressions and values. +impl Parser<'_> { + fn parse_expr(&mut self) -> Option> { + self.parse_binops("summand", Self::parse_term, |token| match token { + Token::Plus => Some(Expr::Add), + Token::Hyphen => Some(Expr::Sub), + _ => None, + }) + } + + fn parse_term(&mut self) -> Option> { + self.parse_binops("factor", Self::parse_factor, |token| match token { + Token::Star => Some(Expr::Mul), + Token::Slash => Some(Expr::Div), + _ => None, + }) + } + + /// Parse expression of the form ` ( )*`. + fn parse_binops( + &mut self, + operand_name: &str, + mut parse_operand: impl FnMut(&mut Self) -> Option>, + mut parse_op: impl FnMut(Token) -> Option, + ) -> Option> { + let mut left = parse_operand(self)?; + + self.skip_ws(); + while let Some(token) = self.peek() { + if let Some(op) = parse_op(token.v) { + self.eat(); + self.skip_ws(); + + if let Some(right) = parse_operand(self) { + let span = Span::merge(left.span, right.span); + let v = op(Box::new(left), Box::new(right)); + left = Spanned::new(v, span); + self.skip_ws(); + continue; + } + + error!( + @self.feedback, Span::merge(left.span, token.span), + "missing right {}", operand_name, + ); + } + break; + } + + Some(left) + } + + fn parse_factor(&mut self) -> Option> { + if let Some(hyph) = self.check_eat(Token::Hyphen) { + self.skip_ws(); + if let Some(factor) = self.parse_factor() { + let span = Span::merge(hyph.span, factor.span); + Some(Spanned::new(Expr::Neg(Box::new(factor)), span)) + } else { + error!(@self.feedback, hyph.span, "dangling minus"); + None + } + } else { + self.parse_value() + } + } + + fn parse_value(&mut self) -> Option> { + let Spanned { v: token, span } = self.peek()?; + Some(match token { + // This could be a function call or an identifier. + Token::Ident(id) => { + let name = Spanned::new(Ident(id.to_string()), span); + self.eat(); + self.skip_ws(); + if self.check(Token::LeftParen) { + self.parse_paren_call(name).map(Expr::Call) + } else { + name.map(Expr::Ident) + } + } + + Token::Str { string, terminated } => { + if !terminated { + self.expected_at("quote", span.end); + } + self.with_span(Expr::Str(unescape_string(string))) + } + + Token::Bool(b) => self.with_span(Expr::Bool(b)), + Token::Number(n) => self.with_span(Expr::Number(n)), + Token::Length(s) => self.with_span(Expr::Length(s)), + Token::Hex(s) => { + if let Ok(color) = RgbaColor::from_str(s) { + self.with_span(Expr::Color(color)) + } else { + // Heal color by assuming black. + error!(@self.feedback, span, "invalid color"); + let healed = RgbaColor::new_healed(0, 0, 0, 255); + self.with_span(Expr::Color(healed)) + } + } + + // This could be a table or a parenthesized expression. We parse as + // a table in any case and coerce the table into a value if it is + // coercable (length 1 and no trailing comma). + Token::LeftParen => { + self.start_group(Group::Paren); + let (table, coercable) = self.parse_table_contents(); + let span = self.end_group(); + + let expr = if coercable { + table.into_values().next().expect("table is coercable").val.v + } else { + Expr::Table(table) + }; + + Spanned::new(expr, span) + } + + // This is a content expression. + Token::LeftBrace => { + self.start_group(Group::Brace); + self.tokens.push_mode(TokenMode::Body); + + let tree = self.parse_body_contents(); + + self.tokens.pop_mode(); + let span = self.end_group(); + Spanned::new(Expr::Tree(tree), span) + } + + // This is a bracketed function call. + Token::LeftBracket => { + let call = self.parse_bracket_call(false); + let tree = vec![call.map(SyntaxNode::Call)]; + Spanned::new(Expr::Tree(tree), span) + } + + _ => return None, + }) + } + + fn parse_ident(&mut self) -> Option> { + self.peek().and_then(|token| match token.v { + Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), + _ => None, + }) + } +} + +// Error handling. +impl Parser<'_> { + fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { + if self.check(token) { + self.eat(); + true + } else { + self.expected_at(token.name(), pos); + false + } + } + + fn expected(&mut self, thing: &str) { + if let Some(found) = self.eat() { + error!( + @self.feedback, found.span, + "expected {}, found {}", thing, found.v.name(), + ); + } else { + error!(@self.feedback, Span::at(self.pos()), "expected {}", thing); + } + } + + fn expected_at(&mut self, thing: &str, pos: Pos) { + error!(@self.feedback, Span::at(pos), "expected {}", thing); + } + + fn expected_found_or_at(&mut self, thing: &str, pos: Pos) { + if self.eof() { + self.expected_at(thing, pos) + } else { + self.expected(thing); + } + } +} + +// Parsing primitives. +impl<'s> Parser<'s> { + fn start_group(&mut self, group: Group) { + let start = self.pos(); + if let Some(start_token) = group.start() { + self.assert(start_token); + } + self.delimiters.push((start, group.end())); + } + + fn end_group(&mut self) -> Span { + let peeked = self.peek(); + + let (start, end_token) = self.delimiters.pop().expect("group was not started"); + + if end_token != Token::Chain && peeked != None { + self.delimiters.push((start, end_token)); + assert_eq!(peeked, None, "unfinished group"); + } + + match self.peeked.unwrap() { + Some(token) if token.v == end_token => { + self.peeked = None; + Span::new(start, token.span.end) + } + _ => { + let end = self.pos(); + if end_token != Token::Chain { + error!( + @self.feedback, Span::at(end), + "expected {}", end_token.name(), + ); + } + Span::new(start, end) + } + } + } + + fn skip_ws(&mut self) { + while matches!( + self.peekv(), + Some(Token::Space(_)) | + Some(Token::LineComment(_)) | + Some(Token::BlockComment(_)) + ) { + self.eat(); + } + } + + fn eatv(&mut self) -> Option> { + self.eat().map(Spanned::value) + } + + fn peekv(&mut self) -> Option> { + self.peek().map(Spanned::value) + } + + fn assert(&mut self, token: Token<'_>) { + assert!(self.check_eat(token).is_some()); + } + + fn check_eat(&mut self, token: Token<'_>) -> Option>> { + if self.check(token) { self.eat() } else { None } + } + + /// Checks if the next token is of some kind + fn check(&mut self, token: Token<'_>) -> bool { + self.peekv() == Some(token) + } + + fn with_span(&mut self, v: T) -> Spanned { + let span = self.eat().expect("expected token").span; + Spanned::new(v, span) + } + + fn eof(&mut self) -> bool { + self.peek().is_none() + } + + fn eat(&mut self) -> Option>> { + let token = self.peek()?; + self.peeked = None; + Some(token) + } + + fn peek(&mut self) -> Option>> { + let tokens = &mut self.tokens; + let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; + + // Check for unclosed groups. + if Group::is_delimiter(token.v) { + if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { + return None; + } + } + + Some(token) + } + + fn pos(&self) -> Pos { + self.peeked + .flatten() + .map(|s| s.span.start) + .unwrap_or_else(|| self.tokens.pos()) + } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum Group { + Paren, + Bracket, + Brace, + Subheader, +} + +impl Group { + fn is_delimiter(token: Token<'_>) -> bool { + matches!( + token, + Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain + ) + } + + fn start(self) -> Option> { + match self { + Self::Paren => Some(Token::LeftParen), + Self::Bracket => Some(Token::LeftBracket), + Self::Brace => Some(Token::LeftBrace), + Self::Subheader => None, + } + } + + fn end(self) -> Token<'static> { + match self { + Self::Paren => Token::RightParen, + Self::Bracket => Token::RightBracket, + Self::Brace => Token::RightBrace, + Self::Subheader => Token::Chain, + } + } +} diff --git a/src/syntax/parsing/tests.rs b/src/syntax/parsing/tests.rs new file mode 100644 index 000000000..7fdf02ca3 --- /dev/null +++ b/src/syntax/parsing/tests.rs @@ -0,0 +1,509 @@ +#![allow(non_snake_case)] + +use super::parse; +use crate::color::RgbaColor; +use crate::compute::table::SpannedEntry; +use crate::length::Length; +use crate::syntax::decoration::Decoration::*; +use crate::syntax::span::Spanned; +use crate::syntax::tests::*; +use crate::syntax::tree::*; + +// ------------------------------ Construct Syntax Nodes ------------------------------ // + +use SyntaxNode::{ + Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I, +}; + +fn T(text: &str) -> SyntaxNode { + SyntaxNode::Text(text.to_string()) +} + +macro_rules! H { + ($level:expr, $($tts:tt)*) => { + SyntaxNode::Heading(Heading { + level: Spanned::zero($level), + tree: Tree![@$($tts)*], + }) + }; +} + +macro_rules! R { + ($($line:expr),* $(,)?) => { + SyntaxNode::Raw(vec![$($line.to_string()),*]) + }; +} + +macro_rules! C { + ($lang:expr, $($line:expr),* $(,)?) => {{ + let lines = vec![$($line.to_string()) ,*]; + SyntaxNode::Code(Code { + lang: $lang, + block: lines.len() > 1, + lines, + }) + }}; +} + +fn Lang<'a, T: Into>>(lang: T) -> Option> { + Some(Into::>::into(lang).map(|s| Ident(s.to_string()))) +} + +macro_rules! F { + ($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) } +} + +// ------------------------------- Construct Expressions ------------------------------ // + +use Expr::{Bool, Color, Length as Len, Number as Num}; + +fn Id(ident: &str) -> Expr { + Expr::Ident(Ident(ident.to_string())) +} +fn Str(string: &str) -> Expr { + Expr::Str(string.to_string()) +} + +macro_rules! Table { + (@table=$table:expr,) => {}; + (@table=$table:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{ + let key = Into::>::into($key); + let val = Into::>::into($value); + $table.insert(key.v, SpannedEntry::new(key.span, val)); + Table![@table=$table, $($($tts)*)?]; + }}; + (@table=$table:expr, $value:expr $(, $($tts:tt)*)?) => { + let val = Into::>::into($value); + $table.push(SpannedEntry::val(val)); + Table![@table=$table, $($($tts)*)?]; + }; + (@$($tts:tt)*) => {{ + #[allow(unused_mut)] + let mut table = TableExpr::new(); + Table![@table=table, $($tts)*]; + table + }}; + ($($tts:tt)*) => { Expr::Table(Table![@$($tts)*]) }; +} + +macro_rules! Tree { + (@$($node:expr),* $(,)?) => { + vec![$(Into::>::into($node)),*] + }; + ($($tts:tt)*) => { Expr::Tree(Tree![@$($tts)*]) }; +} + +macro_rules! Call { + (@$name:expr $(; $($tts:tt)*)?) => {{ + let name = Into::>::into($name); + CallExpr { + name: name.map(|n| Ident(n.to_string())), + args: Table![@$($($tts)*)?], + } + }}; + ($($tts:tt)*) => { Expr::Call(Call![@$($tts)*]) }; +} + +fn Neg>>(e1: T) -> Expr { + Expr::Neg(Box::new(e1.into())) +} +fn Add>>(e1: T, e2: T) -> Expr { + Expr::Add(Box::new(e1.into()), Box::new(e2.into())) +} +fn Sub>>(e1: T, e2: T) -> Expr { + Expr::Sub(Box::new(e1.into()), Box::new(e2.into())) +} +fn Mul>>(e1: T, e2: T) -> Expr { + Expr::Mul(Box::new(e1.into()), Box::new(e2.into())) +} +fn Div>>(e1: T, e2: T) -> Expr { + Expr::Div(Box::new(e1.into()), Box::new(e2.into())) +} + +// ------------------------------------ Test Macros ----------------------------------- // + +// Test syntax trees with or without spans. +macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } +macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } +macro_rules! test { + (@spans=$spans:expr, $src:expr => $($tts:tt)*) => { + let exp = Tree![@$($tts)*]; + let pass = parse($src); + check($src, exp, pass.output, $spans); + }; +} + +// Test expressions. +macro_rules! v { + ($src:expr => $($tts:tt)*) => { + t!(concat!("[val: ", $src, "]") => F!("val"; $($tts)*)); + } +} + +// Test error messages. +macro_rules! e { + ($src:expr => $($tts:tt)*) => { + let exp = vec![$($tts)*]; + let pass = parse($src); + let found = pass.feedback.diagnostics.iter() + .map(|s| s.as_ref().map(|e| e.message.as_str())) + .collect::>(); + check($src, exp, found, true); + }; +} + +// Test decorations. +macro_rules! d { + ($src:expr => $($tts:tt)*) => { + let exp = vec![$($tts)*]; + let pass = parse($src); + check($src, exp, pass.feedback.decorations, true); + }; +} + +// --------------------------------------- Tests -------------------------------------- // + +#[test] +fn test_parse_groups() { + e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"), + s(0,2, 0,2, "expected closing bracket")); + + e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"), + s(0,5, 0,6, "unexpected closing brace")); +} + +#[test] +fn test_parse_simple_nodes() { + t!("" => ); + t!("hi" => T("hi")); + t!("*hi" => B, T("hi")); + t!("hi_" => T("hi"), I); + t!("hi you" => T("hi"), S, T("you")); + t!("special~name" => T("special"), T("\u{00A0}"), T("name")); + t!("special\\~name" => T("special"), T("~"), T("name")); + t!("\\u{1f303}" => T("πŸŒƒ")); + t!("\n\n\nhello" => P, T("hello")); + t!(r"a\ b" => T("a"), L, S, T("b")); + t!("`py`" => R!["py"]); + t!("`hi\nyou" => R!["hi", "you"]); + e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); + t!("`hi\\`du`" => R!["hi`du"]); + + ts!("```java out```" => s(0,0, 0,14, C![Lang(s(0,3, 0,7, "java")), "out"])); + t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]); + t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![ + Lang("typst"), " Typst uses ``` to indicate code blocks" + ]); + + e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks")); + e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier")); + e!("\\u{d421c809}" => s(0,0, 0,12, "invalid unicode escape sequence")); + e!("\\u{abc" => s(0,6, 0,6, "expected closing brace")); + t!("πŸ’œ\n\n 🌍" => T("πŸ’œ"), P, T("🌍")); + + ts!("hi" => s(0,0, 0,2, T("hi"))); + ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B)); + ts!("πŸ’œ\n\n 🌍" => s(0,0, 0,1, T("πŸ’œ")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍"))); +} + +#[test] +fn test_parse_comments() { + // In body. + t!("hi// you\nw" => T("hi"), S, T("w")); + t!("first//\n//\nsecond" => T("first"), S, S, T("second")); + t!("first//\n \nsecond" => T("first"), P, T("second")); + t!("first/*\n \n*/second" => T("first"), T("second")); + e!("🌎\n*/n" => s(1,0, 1,2, "unexpected end of block comment")); + + // In header. + t!("[val:/*12pt*/]" => F!("val")); + t!("[val \n /* \n */:]" => F!("val")); + e!("[val \n /* \n */:]" => ); + e!("[val : 12, /* \n */ 14]" => ); +} + +#[test] +fn test_parse_headings() { + t!("## Hello world!" => H![1, T("Hello"), S, T("world!")]); + + // Handle various whitespace usages. + t!("####Simple" => H![3, T("Simple")]); + t!(" # Whitespace!" => S, H![0, T("Whitespace!")]); + t!(" /* TODO: Improve */ ## Analysis" => S, S, H!(1, T("Analysis"))); + + // Complex heading contents. + t!("Some text [box][### Valuable facts]" => T("Some"), S, T("text"), S, + F!("box"; Tree![H!(2, T("Valuable"), S, T("facts"))]) + ); + t!("### Grandiose stuff [box][Get it \n\n straight]" => H![2, + T("Grandiose"), S, T("stuff"), S, + F!("box"; Tree![T("Get"), S, T("it"), P, T("straight")]) + ]); + t!("###### Multiline \\ headings" => H![5, T("Multiline"), S, L, S, T("headings")]); + + // Things that should not become headings. + t!("\\## Text" => T("#"), T("#"), S, T("Text")); + t!(" ###### # Text" => S, H!(5, T("#"), S, T("Text"))); + t!("I am #1" => T("I"), S, T("am"), S, T("#"), T("1")); + t!("[box][\n] # hi" => F!("box"; Tree![S]), S, T("#"), S, T("hi")); + + // Depth warnings. + e!("########" => s(0,0, 0,8, "section depth larger than 6 has no effect")); +} + +#[test] +fn test_parse_function_names() { + // No closing bracket. + t!("[" => F!("")); + e!("[" => s(0,1, 0,1, "expected function name"), + s(0,1, 0,1, "expected closing bracket")); + + // No name. + e!("[]" => s(0,1, 0,1, "expected function name")); + e!("[\"]" => s(0,1, 0,3, "expected function name, found string"), + s(0,3, 0,3, "expected closing bracket")); + + // A valid name. + t!("[hi]" => F!("hi")); + t!("[ f]" => F!("f")); + + // An invalid name. + e!("[12]" => s(0,1, 0,3, "expected function name, found number")); + e!("[ 🌎]" => s(0,3, 0,4, "expected function name, found invalid token")); +} + +#[test] +fn test_parse_chaining() { + // Things the parser has to make sense of + t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Table![Num(5.0), Num(2.1)], Tree![F!("you")])); + t!("[box >>][Hi]" => F!("box"; Tree![T("Hi")])); + t!("[box >> pad: 1pt][Hi]" => F!("box"; Tree![ + F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi"))) + ])); + t!("[bold: 400, >> emph >> sub: 1cm]" => F!("bold"; Num(400.0), Tree![ + F!("emph"; Tree!(F!("sub"; Len(Length::cm(1.0))))) + ])); + + // Errors for unclosed / empty predecessor groups + e!("[hi: (5.0, 2.1 >> you]" => s(0, 15, 0, 15, "expected closing paren")); + e!("[>> abc]" => s(0, 1, 0, 1, "expected function name")); +} + +#[test] +fn test_parse_colon_starting_func_args() { + // Just colon without args. + e!("[val:]" => ); + + // Wrong token. + t!("[val=]" => F!("val")); + e!("[val=]" => s(0,4, 0,4, "expected colon")); + e!("[val/🌎:$]" => s(0,4, 0,4, "expected colon")); + + // String in invalid header without colon still parsed as string + // Note: No "expected quote" error because not even the string was + // expected. + e!("[val/\"]" => s(0,4, 0,4, "expected colon"), + s(0,7, 0,7, "expected closing bracket")); +} + +#[test] +fn test_parse_function_bodies() { + t!("[val: 1][*Hi*]" => F!("val"; Num(1.0), Tree![B, T("Hi"), B])); + e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment")); + + // Raw in body. + t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]])); + e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket")); + + // Crazy. + t!("[v][[v][v][v]]" => F!("v"; Tree![F!("v"; Tree![T("v")]), F!("v")])); + + // Spanned. + ts!(" [box][Oh my]" => + s(0,0, 0,1, S), + s(0,1, 0,13, F!(s(0,2, 0,5, "box"); + s(0,6, 0,13, Tree![ + s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my")) + ]) + )) + ); +} + +#[test] +fn test_parse_values() { + // Simple. + v!("_" => Id("_")); + v!("name" => Id("name")); + v!("Ξ±" => Id("Ξ±")); + v!("\"hi\"" => Str("hi")); + v!("true" => Bool(true)); + v!("false" => Bool(false)); + v!("1.0e-4" => Num(1e-4)); + v!("3.14" => Num(3.14)); + v!("50%" => Num(0.5)); + v!("4.5cm" => Len(Length::cm(4.5))); + v!("12e1pt" => Len(Length::pt(12e1))); + v!("#f7a20500" => Color(RgbaColor::new(0xf7, 0xa2, 0x05, 0x00))); + v!("\"a\n[]\\\"string\"" => Str("a\n[]\"string")); + + // Content. + v!("{_hi_}" => Tree![I, T("hi"), I]); + e!("[val: {_hi_}]" => ); + v!("[hi]" => Tree![F!("hi")]); + e!("[val: [hi]]" => ); + + // Healed colors. + v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff))); + e!("[val: #12345]" => s(0,6, 0,12, "invalid color")); + e!("[val: #a5]" => s(0,6, 0,9, "invalid color")); + e!("[val: #14b2ah]" => s(0,6, 0,13, "invalid color")); + e!("[val: #f075ff011]" => s(0,6, 0,16, "invalid color")); + + // Unclosed string. + v!("\"hello" => Str("hello]")); + e!("[val: \"hello]" => s(0,13, 0,13, "expected quote"), + s(0,13, 0,13, "expected closing bracket")); + + // Spanned. + ts!("[val: 1.4]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.4))))); +} + +#[test] +fn test_parse_expressions() { + // Coerced table. + v!("(hi)" => Id("hi")); + + // Operations. + v!("-1" => Neg(Num(1.0))); + v!("-- 1" => Neg(Neg(Num(1.0)))); + v!("3.2in + 6pt" => Add(Len(Length::inches(3.2)), Len(Length::pt(6.0)))); + v!("5 - 0.01" => Sub(Num(5.0), Num(0.01))); + v!("(3mm * 2)" => Mul(Len(Length::mm(3.0)), Num(2.0))); + v!("12e-3cm/1pt" => Div(Len(Length::cm(12e-3)), Len(Length::pt(1.0)))); + + // More complex. + v!("(3.2in + 6pt)*(5/2-1)" => Mul( + Add(Len(Length::inches(3.2)), Len(Length::pt(6.0))), + Sub(Div(Num(5.0), Num(2.0)), Num(1.0)) + )); + v!("(6.3E+2+4* - 3.2pt)/2" => Div( + Add(Num(6.3e2), Mul(Num(4.0), Neg(Len(Length::pt(3.2))))), + Num(2.0) + )); + + // Associativity of multiplication and division. + v!("3/4*5" => Mul(Div(Num(3.0), Num(4.0)), Num(5.0))); + + // Spanned. + ts!("[val: 1 + 3]" => s(0,0, 0,12, F!( + s(0,1, 0,4, "val"); s(0,6, 0,11, Add( + s(0,6, 0,7, Num(1.0)), + s(0,10, 0,11, Num(3.0)), + )) + ))); + + // Span of parenthesized expression contains parens. + ts!("[val: (1)]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.0))))); + + // Invalid expressions. + v!("4pt--" => Len(Length::pt(4.0))); + e!("[val: 4pt--]" => s(0,10, 0,11, "dangling minus"), + s(0,6, 0,10, "missing right summand")); + + v!("3mm+4pt*" => Add(Len(Length::mm(3.0)), Len(Length::pt(4.0)))); + e!("[val: 3mm+4pt*]" => s(0,10, 0,14, "missing right factor")); +} + +#[test] +fn test_parse_tables() { + // Okay. + v!("()" => Table![]); + v!("(false)" => Bool(false)); + v!("(true,)" => Table![Bool(true)]); + v!("(key=val)" => Table!["key" => Id("val")]); + v!("(1, 2)" => Table![Num(1.0), Num(2.0)]); + v!("(1, key=\"value\")" => Table![Num(1.0), "key" => Str("value")]); + + // Decorations. + d!("[val: key=hi]" => s(0,6, 0,9, TableKey)); + d!("[val: (key=hi)]" => s(0,7, 0,10, TableKey)); + d!("[val: f(key=hi)]" => s(0,8, 0,11, TableKey)); + + // Spanned with spacing around keyword arguments. + ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0,0, 4,2, F!( + s(0,1, 0,4, "val"); s(1,1, 1,3, "hi") => s(3,4, 4,1, Str("s\n")) + ))); + e!("[val: \n hi \n = /* //\n */ \"s\n\"]" => ); +} + +#[test] +fn test_parse_tables_compute_func_calls() { + v!("empty()" => Call!("empty")); + v!("add ( 1 , 2 )" => Call!("add"; Num(1.0), Num(2.0))); + v!("items(\"fire\", #f93a6d)" => Call!("items"; + Str("fire"), Color(RgbaColor::new(0xf9, 0x3a, 0x6d, 0xff)) + )); + + // More complex. + v!("css(1pt, rgb(90, 102, 254), \"solid\")" => Call!( + "css"; + Len(Length::pt(1.0)), + Call!("rgb"; Num(90.0), Num(102.0), Num(254.0)), + Str("solid"), + )); + + // Unclosed. + v!("lang(δΈ­ζ–‡]" => Call!("lang"; Id("δΈ­ζ–‡"))); + e!("[val: lang(δΈ­ζ–‡]" => s(0,13, 0,13, "expected closing paren")); + + // Invalid name. + v!("πŸ‘ (\"abc\", 13e-5)" => Table!(Str("abc"), Num(13.0e-5))); + e!("[val: πŸ‘ (\"abc\", 13e-5)]" => s(0,6, 0,7, "expected value, found invalid token")); +} + +#[test] +fn test_parse_tables_nested() { + v!("(1, ( ab=(), d = (3, 14pt) )), false" => + Table![ + Num(1.0), + Table!( + "ab" => Table![], + "d" => Table!(Num(3.0), Len(Length::pt(14.0))), + ), + ], + Bool(false), + ); +} + +#[test] +fn test_parse_tables_errors() { + // Expected value. + e!("[val: (=)]" => s(0,7, 0,8, "expected value, found equals sign")); + e!("[val: (,)]" => s(0,7, 0,8, "expected value, found comma")); + v!("(\x07 abc,)" => Table![Id("abc")]); + e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token")); + e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma")); + e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren")); + + // Expected comma. + v!("(true false)" => Table![Bool(true), Bool(false)]); + e!("[val: (true false)]" => s(0,11, 0,11, "expected comma")); + + // Expected closing paren. + e!("[val: (#000]" => s(0,11, 0,11, "expected closing paren")); + e!("[val: (key]" => s(0,10, 0,10, "expected closing paren")); + e!("[val: (key=]" => s(0,11, 0,11, "expected value"), + s(0,11, 0,11, "expected closing paren")); + + // Bad key. + v!("true=you" => Bool(true), Id("you")); + e!("[val: true=you]" => + s(0,10, 0,10, "expected comma"), + s(0,10, 0,11, "expected value, found equals sign")); + + // Unexpected equals sign. + v!("z=y=4" => Num(4.0), "z" => Id("y")); + e!("[val: z=y=4]" => + s(0,9, 0,9, "expected comma"), + s(0,9, 0,10, "expected value, found equals sign")); +} diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 94dfc1243..715db1099 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -4,7 +4,7 @@ use std::fmt::{self, Debug, Formatter}; use super::decoration::Decoration; use super::span::{SpanVec, Spanned}; -use super::Ident; +use super::tokens::is_identifier; use crate::color::RgbaColor; use crate::compute::table::{SpannedEntry, Table}; use crate::compute::value::{TableValue, Value}; @@ -157,6 +157,32 @@ impl Debug for Expr { } } +/// An identifier as defined by unicode with a few extra permissible characters. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Ident(pub String); + +impl Ident { + /// Create a new identifier from a string checking that it is a valid. + pub fn new(ident: impl AsRef + Into) -> Option { + if is_identifier(ident.as_ref()) { + Some(Self(ident.into())) + } else { + None + } + } + + /// Return a reference to the underlying string. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl Debug for Ident { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "`{}`", self.0) + } +} + /// A table of expressions. /// /// # Example