diff --git a/Cargo.toml b/Cargo.toml index 31cbecc44..15eea562c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["The Typst Project Developers"] edition = "2018" +[lib] +bench = false + [workspace] members = ["main"] diff --git a/src/eval/value.rs b/src/eval/value.rs index 56af4322d..6a63a66f7 100644 --- a/src/eval/value.rs +++ b/src/eval/value.rs @@ -15,12 +15,10 @@ use crate::syntax::{Ident, Span, SpanWith, Spanned, SynNode, SynTree}; use crate::{DynFuture, Feedback, Pass}; /// A computational value. -#[derive(Clone)] +#[derive(Clone, PartialEq)] pub enum Value { /// An identifier: `ident`. Ident(Ident), - /// A string: `"string"`. - Str(String), /// A boolean: `true, false`. Bool(bool), /// A number: `1.2, 200%`. @@ -29,6 +27,8 @@ pub enum Value { Length(Length), /// A color value with alpha channel: `#f79143ff`. Color(RgbaColor), + /// A string: `"string"`. + Str(String), /// A dictionary value: `(false, 12cm, greeting="hi")`. Dict(DictValue), /// A syntax tree containing typesetting content. @@ -45,11 +45,11 @@ impl Value { pub fn name(&self) -> &'static str { match self { Self::Ident(_) => "identifier", - Self::Str(_) => "string", Self::Bool(_) => "bool", Self::Number(_) => "number", Self::Length(_) => "length", Self::Color(_) => "color", + Self::Str(_) => "string", Self::Dict(_) => "dict", Self::Tree(_) => "syntax tree", Self::Func(_) => "function", @@ -65,9 +65,6 @@ impl Spanned { /// the value is represented as layoutable content in a reasonable way. pub fn into_commands(self) -> Commands { match self.v { - Value::Commands(commands) => commands, - Value::Tree(tree) => vec![Command::LayoutSyntaxTree(tree)], - // Forward to each entry, separated with spaces. Value::Dict(dict) => { let mut commands = vec![]; @@ -75,7 +72,7 @@ impl Spanned { for entry in dict.into_values() { if let Some(last_end) = end { let span = Span::new(last_end, entry.key.start); - let tree = vec![SynNode::Spacing.span_with(span)]; + let tree = vec![SynNode::Space.span_with(span)]; commands.push(Command::LayoutSyntaxTree(tree)); } @@ -85,6 +82,9 @@ impl Spanned { commands } + Value::Tree(tree) => vec![Command::LayoutSyntaxTree(tree)], + Value::Commands(commands) => commands, + // Format with debug. val => { let fmt = format!("{:?}", val); @@ -99,37 +99,19 @@ impl Debug for Value { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Ident(i) => i.fmt(f), - Self::Str(s) => s.fmt(f), Self::Bool(b) => b.fmt(f), Self::Number(n) => n.fmt(f), Self::Length(s) => s.fmt(f), Self::Color(c) => c.fmt(f), + Self::Str(s) => s.fmt(f), Self::Dict(t) => t.fmt(f), Self::Tree(t) => t.fmt(f), - Self::Func(_) => f.pad(""), + Self::Func(c) => c.fmt(f), Self::Commands(c) => c.fmt(f), } } } -impl PartialEq for Value { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::Ident(a), Self::Ident(b)) => a == b, - (Self::Str(a), Self::Str(b)) => a == b, - (Self::Bool(a), Self::Bool(b)) => a == b, - (Self::Number(a), Self::Number(b)) => a == b, - (Self::Length(a), Self::Length(b)) => a == b, - (Self::Color(a), Self::Color(b)) => a == b, - (Self::Dict(a), Self::Dict(b)) => a == b, - (Self::Tree(a), Self::Tree(b)) => a == b, - (Self::Func(a), Self::Func(b)) => Rc::ptr_eq(a, b), - (Self::Commands(a), Self::Commands(b)) => a == b, - _ => false, - } - } -} - /// An executable function value. /// /// The first argument is a dictionary containing the arguments passed to the @@ -140,8 +122,45 @@ impl PartialEq for Value { /// layouting engine to do what the function pleases. /// /// The dynamic function object is wrapped in an `Rc` to keep `Value` clonable. -pub type FuncValue = - Rc) -> DynFuture>>; +#[derive(Clone)] +pub struct FuncValue(pub Rc); + +/// The dynamic function type backtick [`FuncValue`]. +/// +/// [`FuncValue`]: struct.FuncValue.html +pub type FuncType = dyn Fn(Span, DictValue, LayoutContext<'_>) -> DynFuture>; + +impl FuncValue { + /// Create a new function value from a rust function or closure. + pub fn new(f: F) -> Self + where + F: Fn(Span, DictValue, LayoutContext<'_>) -> DynFuture>, + { + Self(Rc::new(f)) + } +} + +impl Eq for FuncValue {} + +impl PartialEq for FuncValue { + fn eq(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.0, &other.0) + } +} + +impl Deref for FuncValue { + type Target = FuncType; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl Debug for FuncValue { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.pad("") + } +} /// A dictionary of values. /// @@ -262,8 +281,7 @@ impl DictValue { /// Generated `"unexpected argument"` errors for all remaining entries. pub fn unexpected(&self, f: &mut Feedback) { for entry in self.values() { - let span = Span::merge(entry.key, entry.val.span); - error!(@f, span, "unexpected argument"); + error!(@f, entry.key.join(entry.val.span), "unexpected argument"); } } } diff --git a/src/layout/tree.rs b/src/layout/tree.rs index f8e4160ca..56313383c 100644 --- a/src/layout/tree.rs +++ b/src/layout/tree.rs @@ -58,19 +58,7 @@ impl<'a> TreeLayouter<'a> { }; match &node.v { - SynNode::Spacing => self.layout_space(), - SynNode::Linebreak => self.layouter.finish_line(), - SynNode::Parbreak => self.layout_parbreak(), - - SynNode::ToggleItalic => { - self.style.text.italic = !self.style.text.italic; - decorate(self, Decoration::Italic); - } - SynNode::ToggleBolder => { - self.style.text.bolder = !self.style.text.bolder; - decorate(self, Decoration::Bold); - } - + SynNode::Space => self.layout_space(), SynNode::Text(text) => { if self.style.text.italic { decorate(self, Decoration::Italic); @@ -81,8 +69,19 @@ impl<'a> TreeLayouter<'a> { self.layout_text(text).await; } - SynNode::Raw(raw) => self.layout_raw(raw).await, + SynNode::Linebreak => self.layouter.finish_line(), + SynNode::Parbreak => self.layout_parbreak(), + SynNode::ToggleItalic => { + self.style.text.italic = !self.style.text.italic; + decorate(self, Decoration::Italic); + } + SynNode::ToggleBolder => { + self.style.text.bolder = !self.style.text.bolder; + decorate(self, Decoration::Bold); + } + SynNode::Heading(heading) => self.layout_heading(heading).await, + SynNode::Raw(raw) => self.layout_raw(raw).await, SynNode::Expr(expr) => { self.layout_expr(expr.span_with(node.span)).await; @@ -116,7 +115,7 @@ impl<'a> TreeLayouter<'a> { async fn layout_heading(&mut self, heading: &NodeHeading) { let style = self.style.text.clone(); - self.style.text.font_scale *= 1.5 - 0.1 * heading.level.v.min(5) as f64; + self.style.text.font_scale *= 1.5 - 0.1 * heading.level.v as f64; self.style.text.bolder = true; self.layout_parbreak(); diff --git a/src/library/boxed.rs b/src/library/boxed.rs index e02b8c0c4..850252642 100644 --- a/src/library/boxed.rs +++ b/src/library/boxed.rs @@ -13,7 +13,7 @@ pub async fn boxed( ) -> Pass { let mut f = Feedback::new(); - let content = args.take::().unwrap_or(SynTree::new()); + let content = args.take::().unwrap_or_default(); ctx.base = ctx.spaces[0].size; ctx.spaces.truncate(1); diff --git a/src/library/mod.rs b/src/library/mod.rs index a5fdfc4c1..43f743187 100644 --- a/src/library/mod.rs +++ b/src/library/mod.rs @@ -14,9 +14,7 @@ pub use font::*; pub use page::*; pub use spacing::*; -use std::rc::Rc; - -use crate::eval::Scope; +use crate::eval::{FuncValue, Scope}; use crate::prelude::*; macro_rules! std { @@ -32,7 +30,7 @@ macro_rules! std { macro_rules! wrap { ($func:expr) => { - Rc::new(|name, args, ctx| Box::pin($func(name, args, ctx))) + FuncValue::new(|name, args, ctx| Box::pin($func(name, args, ctx))) }; } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index cc0b63781..2f34357c7 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,11 +1,13 @@ //! Parsing and tokenization. mod lines; +mod parser; mod resolve; mod scanner; mod tokens; pub use lines::*; +pub use parser::*; pub use resolve::*; pub use scanner::*; pub use tokens::*; @@ -15,634 +17,469 @@ use std::str::FromStr; use crate::color::RgbaColor; use crate::eval::DictKey; use crate::syntax::*; -use crate::{Feedback, Pass}; +use crate::Pass; /// Parse a string of source code. pub fn parse(src: &str) -> Pass { - Parser::new(src).parse() + let mut p = Parser::new(src); + Pass::new(tree(&mut p), p.finish()) } -struct Parser<'s> { - tokens: Tokens<'s>, - peeked: Option>>>, - delimiters: Vec<(Pos, Token<'static>)>, - at_block_or_line_start: bool, - feedback: Feedback, +/// Parse a syntax tree. +fn tree(p: &mut Parser) -> SynTree { + // We keep track of whether we are at the start of a block or paragraph + // to know whether headings are allowed. + let mut at_start = true; + let mut tree = vec![]; + while !p.eof() { + if let Some(node) = node(p, at_start) { + if node.v == SynNode::Parbreak { + at_start = true; + } else if node.v != SynNode::Space { + at_start = false; + } + tree.push(node); + } + } + tree } -impl<'s> Parser<'s> { - fn new(src: &'s str) -> Self { - Self { - tokens: Tokens::new(src, TokenMode::Body), - peeked: None, - delimiters: vec![], - at_block_or_line_start: true, - feedback: Feedback::new(), - } - } - - fn parse(mut self) -> Pass { - let tree = self.parse_body_contents(); - Pass::new(tree, self.feedback) - } -} - -// Typesetting content. -impl Parser<'_> { - fn parse_body_contents(&mut self) -> SynTree { - let mut tree = SynTree::new(); - - self.at_block_or_line_start = true; - while !self.eof() { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - tree - } - - fn parse_node(&mut self) -> Option> { - let token = self.peek()?; - let end = Span::at(token.span.end); - - // Set block or line start to false because most nodes have that effect, but - // remember the old value to actually check it for hashtags and because comments - // and spaces want to retain it. - let was_at_block_or_line_start = self.at_block_or_line_start; - self.at_block_or_line_start = false; - - Some(match token.v { - // Starting from two newlines counts as a paragraph break, a single - // newline does not. - Token::Space(n) => { - if n == 0 { - self.at_block_or_line_start = was_at_block_or_line_start; - } else if n >= 1 { - self.at_block_or_line_start = true; - } - - self.with_span(if n >= 2 { SynNode::Parbreak } else { SynNode::Spacing }) - } - - Token::LineComment(_) | Token::BlockComment(_) => { - self.at_block_or_line_start = was_at_block_or_line_start; - self.eat(); - return None; - } - - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - self.at_block_or_line_start = false; - call.map(|c| SynNode::Expr(Expr::Call(c))) - } - - Token::Star => self.with_span(SynNode::ToggleBolder), - Token::Underscore => self.with_span(SynNode::ToggleItalic), - Token::Backslash => self.with_span(SynNode::Linebreak), - - Token::Hashtag if was_at_block_or_line_start => { - self.parse_heading().map(SynNode::Heading) - } - - Token::Raw { raw, backticks, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backtick(s)"); - } - - let raw = resolve::resolve_raw(raw, backticks); - self.with_span(SynNode::Raw(raw)) - } - - Token::Text(text) => self.with_span(SynNode::Text(text.to_string())), - Token::Hashtag => self.with_span(SynNode::Text("#".to_string())), - - Token::UnicodeEscape { sequence, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected closing brace"); - } - - if let Some(c) = resolve::resolve_hex(sequence) { - self.with_span(SynNode::Text(c.to_string())) - } else { - error!(@self.feedback, token.span, "invalid unicode escape sequence"); - // TODO: Decide whether to render the escape sequence. - self.eat(); - return None; - } - } - - unexpected => { - error!(@self.feedback, token.span, "unexpected {}", unexpected.name()); - self.eat(); - return None; - } - }) - } - - fn parse_heading(&mut self) -> Spanned { - let start = self.pos(); - self.assert(Token::Hashtag); - - let mut level = 0; - while self.peekv() == Some(Token::Hashtag) { - level += 1; - self.eat(); - } - - let span = Span::new(start, self.pos()); - let level = level.span_with(span); - - if level.v > 5 { - warning!( - @self.feedback, level.span, - "section depth larger than 6 has no effect", - ); - } - - self.skip_ws(); - - let mut tree = SynTree::new(); - while !self.eof() && !matches!(self.peekv(), Some(Token::Space(n)) if n >= 1) { - if let Some(node) = self.parse_node() { - tree.push(node); - } - } - - let span = Span::new(start, self.pos()); - NodeHeading { level, contents: tree }.span_with(span) - } -} - -// Function calls. -impl Parser<'_> { - fn parse_bracket_call(&mut self, chained: bool) -> Spanned { - let before_bracket = self.pos(); - if !chained { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Header); - } - - let before_name = self.pos(); - self.start_group(Group::Subheader); - self.skip_ws(); - let name = self.parse_ident().unwrap_or_else(|| { - self.expected_found_or_at("function name", before_name); - Ident(String::new()).span_with(Span::at(before_name)) - }); - - self.skip_ws(); - - let mut args = match self.eatv() { - Some(Token::Colon) => self.parse_dict_contents().0, - Some(_) => { - self.expected_at("colon", name.span.end); - while self.eat().is_some() {} - LitDict::default() - } - None => LitDict::default(), - }; - - self.end_group(); - self.skip_ws(); - let (has_chained_child, end) = if self.peek().is_some() { - let item = self.parse_bracket_call(true); - let span = item.span; - let tree = vec![item.map(|c| SynNode::Expr(Expr::Call(c)))]; - let expr = Expr::Lit(Lit::Content(tree)); - args.0.push(LitDictEntry { key: None, value: expr.span_with(span) }); - (true, span.end) - } else { - self.tokens.pop_mode(); - (false, self.end_group().end) - }; - - let start = if chained { before_name } else { before_bracket }; - let mut span = Span::new(start, end); - - if self.check(Token::LeftBracket) && !has_chained_child { - self.start_group(Group::Bracket); - self.tokens.push_mode(TokenMode::Body); - let body = self.parse_body_contents(); - self.tokens.pop_mode(); - let body_span = self.end_group(); - - let expr = Expr::Lit(Lit::Content(body)); - args.0.push(LitDictEntry { - key: None, - value: expr.span_with(body_span), - }); - span.expand(body_span); - } - - ExprCall { name, args }.span_with(span) - } - - fn parse_paren_call(&mut self, name: Spanned) -> Spanned { - self.start_group(Group::Paren); - let args = self.parse_dict_contents().0; - let args_span = self.end_group(); - let span = Span::merge(name.span, args_span); - ExprCall { name, args }.span_with(span) - } -} - -// Dicts. -impl Parser<'_> { - fn parse_dict_contents(&mut self) -> (LitDict, bool) { - let mut dict = LitDict::default(); - let mut comma_and_keyless = true; - - while { - self.skip_ws(); - !self.eof() - } { - let (key, value) = if let Some(ident) = self.parse_ident() { - self.skip_ws(); - - match self.peekv() { - Some(Token::Equals) => { - self.eat(); - self.skip_ws(); - if let Some(value) = self.parse_expr() { - (Some(ident.map(|id| DictKey::Str(id.0))), value) - } else { - self.expected("value"); - continue; - } - } - - Some(Token::LeftParen) => { - let call = self.parse_paren_call(ident); - (None, call.map(Expr::Call)) - } - - _ => (None, ident.map(|id| Expr::Lit(Lit::Ident(id)))), - } - } else if let Some(value) = self.parse_expr() { - (None, value) +/// Parse a syntax node. +fn node(p: &mut Parser, at_start: bool) -> Option> { + let token = p.eat()?; + let span = token.span; + Some(match token.v { + // Spaces. + Token::Space(newlines) => { + if newlines < 2 { + SynNode::Space.span_with(span) } else { - self.expected("value"); - continue; - }; - - if let Some(key) = &key { - comma_and_keyless = false; - self.feedback - .decorations - .push(Decoration::DictKey.span_with(key.span)); + SynNode::Parbreak.span_with(span) } + } + Token::Text(text) => SynNode::Text(text.into()).span_with(span), - let behind = value.span.end; - dict.0.push(LitDictEntry { key, value }); + // Comments. + Token::LineComment(_) | Token::BlockComment(_) => return None, - if { - self.skip_ws(); - self.eof() - } { + // Markup. + Token::Star => SynNode::ToggleBolder.span_with(span), + Token::Underscore => SynNode::ToggleItalic.span_with(span), + Token::Backslash => SynNode::Linebreak.span_with(span), + Token::Hashtag => { + if at_start { + heading(p, span.start).map(SynNode::Heading) + } else { + SynNode::Text(p.get(span).into()).span_with(span) + } + } + Token::Raw(token) => raw(p, token, span).map(SynNode::Raw), + Token::UnicodeEscape(token) => unicode_escape(p, token, span).map(SynNode::Text), + + // Functions. + Token::LeftBracket => { + p.jump(span.start); + bracket_call(p).map(Expr::Call).map(SynNode::Expr) + } + + // Bad tokens. + _ => { + p.diag_unexpected(token); + return None; + } + }) +} + +/// Parse a heading. +fn heading(p: &mut Parser, start: Pos) -> Spanned { + // Parse the section depth. + let count = p.eat_while(|c| c == Token::Hashtag); + let span = (start, p.pos()); + let level = (count.min(5) as u8).span_with(span); + if count > 5 { + p.diag(warning!(span, "section depth larger than 6 has no effect")); + } + + // Parse the heading contents. + p.skip_white(); + let mut contents = vec![]; + while p.check(|t| !matches!(t, Token::Space(n) if n >= 1)) { + if let Some(node) = node(p, false) { + contents.push(node); + } + } + + NodeHeading { level, contents }.span_with((start, p.pos())) +} + +/// Parse a raw block. +fn raw(p: &mut Parser, token: TokenRaw, span: Span) -> Spanned { + let raw = resolve::resolve_raw(token.text, token.backticks); + + if !token.terminated { + p.diag(error!(span.end, "expected backtick(s)")); + } + + raw.span_with(span) +} + +/// Parse a unicode escape sequence. +fn unicode_escape( + p: &mut Parser, + token: TokenUnicodeEscape, + span: Span, +) -> Spanned { + let text = if let Some(c) = resolve::resolve_hex(token.sequence) { + c.to_string() + } else { + // Print out the escape sequence verbatim if it is + // invalid. + p.diag(error!(span, "invalid unicode escape sequence")); + p.get(span).into() + }; + + if !token.terminated { + p.diag(error!(span.end, "expected closing brace")); + } + + text.span_with(span) +} + +/// Parse a bracketed function call. +fn bracket_call(p: &mut Parser) -> Spanned { + let before_bracket = p.pos(); + p.start_group(Group::Bracket); + p.push_mode(TokenMode::Header); + + // One header is guaranteed, but there may be more (through chaining). + let mut outer = vec![]; + let mut inner = bracket_subheader(p); + + while p.eat_if(Token::Chain).is_some() { + outer.push(inner); + inner = bracket_subheader(p); + } + + p.pop_mode(); + p.end_group(); + + if p.peek() == Some(Token::LeftBracket) { + let expr = bracket_body(p).map(Lit::Content).map(Expr::Lit); + inner.span.expand(expr.span); + inner.v.args.0.push(LitDictEntry { key: None, expr }); + } + + while let Some(mut top) = outer.pop() { + let span = inner.span; + let node = inner.map(Expr::Call).map(SynNode::Expr); + let expr = Expr::Lit(Lit::Content(vec![node])).span_with(span); + top.v.args.0.push(LitDictEntry { key: None, expr }); + inner = top; + } + + inner.v.span_with((before_bracket, p.pos())) +} + +/// Parse one subheader of a bracketed function call. +fn bracket_subheader(p: &mut Parser) -> Spanned { + p.start_group(Group::Subheader); + let before_name = p.pos(); + + p.skip_white(); + let name = ident(p).unwrap_or_else(|| { + if p.eof() { + p.diag_expected_at("function name", before_name); + } else { + p.diag_expected("function name"); + } + Ident(String::new()).span_with(before_name) + }); + + p.skip_white(); + let args = if p.eat_if(Token::Colon).is_some() { + dict_contents(p).0 + } else { + // Ignore the rest if there's no colon. + if !p.eof() { + p.diag_expected_at("colon", p.pos()); + } + p.eat_while(|_| true); + LitDict::new() + }; + + ExprCall { name, args }.span_with(p.end_group()) +} + +/// Parse the body of a bracketed function call. +fn bracket_body(p: &mut Parser) -> Spanned { + p.start_group(Group::Bracket); + p.push_mode(TokenMode::Body); + let tree = tree(p); + p.pop_mode(); + tree.span_with(p.end_group()) +} + +/// Parse an expression: `term (+ term)*`. +fn expr(p: &mut Parser) -> Option> { + binops(p, "summand", term, |token| match token { + Token::Plus => Some(BinOp::Add), + Token::Hyphen => Some(BinOp::Sub), + _ => None, + }) +} + +/// Parse a term: `factor (* factor)*`. +fn term(p: &mut Parser) -> Option> { + binops(p, "factor", factor, |token| match token { + Token::Star => Some(BinOp::Mul), + Token::Slash => Some(BinOp::Div), + _ => None, + }) +} + +/// Parse binary operations of the from `a ( b)*`. +fn binops( + p: &mut Parser, + operand_name: &str, + operand: fn(&mut Parser) -> Option>, + op: fn(Token) -> Option, +) -> Option> { + let mut lhs = operand(p)?; + + loop { + p.skip_white(); + if let Some(op) = p.eat_map(op) { + p.skip_white(); + + if let Some(rhs) = operand(p) { + let span = lhs.span.join(rhs.span); + let expr = Expr::Binary(ExprBinary { + lhs: lhs.map(Box::new), + op, + rhs: rhs.map(Box::new), + }); + lhs = expr.span_with(span); + p.skip_white(); + } else { + let span = lhs.span.join(op.span); + p.diag(error!(span, "missing right {}", operand_name)); break; } - - self.expect_at(Token::Comma, behind); - comma_and_keyless = false; + } else { + break; } + } - let coercable = comma_and_keyless && !dict.0.is_empty(); - (dict, coercable) + Some(lhs) +} + +/// Parse a factor of the form `-?value`. +fn factor(p: &mut Parser) -> Option> { + if let Some(op) = p.eat_map(|token| match token { + Token::Hyphen => Some(UnOp::Neg), + _ => None, + }) { + p.skip_white(); + if let Some(expr) = factor(p) { + let span = op.span.join(expr.span); + let expr = Expr::Unary(ExprUnary { op, expr: expr.map(Box::new) }); + Some(expr.span_with(span)) + } else { + p.diag(error!(op.span, "missing factor")); + None + } + } else { + value(p) } } -// Expressions and values. -impl Parser<'_> { - fn parse_expr(&mut self) -> Option> { - self.parse_binops("summand", Self::parse_term, |token| match token { - Token::Plus => Some(BinOp::Add), - Token::Hyphen => Some(BinOp::Sub), - _ => None, - }) - } +/// Parse a value. +fn value(p: &mut Parser) -> Option> { + let Spanned { v: token, span } = p.eat()?; + Some(match token { + // Bracketed function call. + Token::LeftBracket => { + p.jump(span.start); + let call = bracket_call(p); + let span = call.span; + let node = call.map(Expr::Call).map(SynNode::Expr); + Expr::Lit(Lit::Content(vec![node])).span_with(span) + } - fn parse_term(&mut self) -> Option> { - self.parse_binops("factor", Self::parse_factor, |token| match token { - Token::Star => Some(BinOp::Mul), - Token::Slash => Some(BinOp::Div), - _ => None, - }) - } + // Content expression. + Token::LeftBrace => { + p.jump(span.start); + content(p).map(Lit::Content).map(Expr::Lit) + } - /// Parse expression of the form ` ( )*`. - fn parse_binops( - &mut self, - operand_name: &str, - mut parse_operand: impl FnMut(&mut Self) -> Option>, - mut parse_op: impl FnMut(Token) -> Option, - ) -> Option> { - let mut left = parse_operand(self)?; + // Dictionary or just a parenthesized expression. + Token::LeftParen => { + p.jump(span.start); + parenthesized(p) + } - self.skip_ws(); - while let Some(token) = self.peek() { - if let Some(op) = parse_op(token.v) { - self.eat(); - self.skip_ws(); + // Function or just ident. + Token::Ident(id) => { + let ident = Ident(id.into()).span_with(span); - if let Some(right) = parse_operand(self) { - let span = Span::merge(left.span, right.span); - let expr = Expr::Binary(ExprBinary { - lhs: left.map(Box::new), - op: op.span_with(token.span), - rhs: right.map(Box::new), - }); - left = expr.span_with(span); - self.skip_ws(); - continue; - } - - error!( - @self.feedback, Span::merge(left.span, token.span), - "missing right {}", operand_name, - ); + p.skip_white(); + if p.peek() == Some(Token::LeftParen) { + paren_call(p, ident).map(Expr::Call) + } else { + ident.map(Lit::Ident).map(Expr::Lit) } + } + + // Atomic values. + Token::Bool(b) => Expr::Lit(Lit::Bool(b)).span_with(span), + Token::Number(f) => Expr::Lit(Lit::Float(f)).span_with(span), + Token::Length(l) => Expr::Lit(Lit::Length(l)).span_with(span), + Token::Hex(hex) => color(p, hex, span).map(Lit::Color).map(Expr::Lit), + Token::Str(token) => string(p, token, span).map(Lit::Str).map(Expr::Lit), + + // No value. + _ => { + p.jump(span.start); + return None; + } + }) +} + +// Parse a content expression: `{...}`. +fn content(p: &mut Parser) -> Spanned { + p.start_group(Group::Brace); + p.push_mode(TokenMode::Body); + let tree = tree(p); + p.pop_mode(); + tree.span_with(p.end_group()) +} + +/// Parse a parenthesized expression: `(a + b)`, `(1, key="value"). +fn parenthesized(p: &mut Parser) -> Spanned { + p.start_group(Group::Paren); + let (dict, coercable) = dict_contents(p); + let expr = if coercable { + dict.0.into_iter().next().expect("dict is coercable").expr.v + } else { + Expr::Lit(Lit::Dict(dict)) + }; + expr.span_with(p.end_group()) +} + +/// Parse a parenthesized function call. +fn paren_call(p: &mut Parser, name: Spanned) -> Spanned { + p.start_group(Group::Paren); + let args = dict_contents(p).0; + let span = name.span.join(p.end_group()); + ExprCall { name, args }.span_with(span) +} + +/// Parse the contents of a dictionary. +fn dict_contents(p: &mut Parser) -> (LitDict, bool) { + let mut dict = LitDict::new(); + let mut comma_and_keyless = true; + + loop { + p.skip_white(); + if p.eof() { break; } - Some(left) - } - - fn parse_factor(&mut self) -> Option> { - if let Some(hyph) = self.check_eat(Token::Hyphen) { - self.skip_ws(); - if let Some(factor) = self.parse_factor() { - let span = Span::merge(hyph.span, factor.span); - let expr = Expr::Unary(ExprUnary { - op: UnOp::Neg.span_with(hyph.span), - expr: factor.map(Box::new), - }); - Some(expr.span_with(span)) - } else { - error!(@self.feedback, hyph.span, "dangling minus"); - None - } + let entry = if let Some(entry) = dict_entry(p) { + entry } else { - self.parse_value() + p.diag_expected("value"); + continue; + }; + + if let Some(key) = &entry.key { + comma_and_keyless = false; + p.deco(Decoration::DictKey.span_with(key.span)); } + + let behind = entry.expr.span.end; + dict.0.push(entry); + + p.skip_white(); + if p.eof() { + break; + } + + if p.eat_if(Token::Comma).is_none() { + p.diag_expected_at("comma", behind); + } + + comma_and_keyless = false; } - fn parse_value(&mut self) -> Option> { - let Spanned { v: token, span } = self.peek()?; - Some(match token { - // This could be a function call or an identifier. - Token::Ident(id) => { - let name = Ident(id.to_string()).span_with(span); - self.eat(); - self.skip_ws(); - if self.check(Token::LeftParen) { - self.parse_paren_call(name).map(Expr::Call) + let coercable = comma_and_keyless && !dict.0.is_empty(); + (dict, coercable) +} + +/// Parse a single entry in a dictionary. +fn dict_entry(p: &mut Parser) -> Option { + if let Some(ident) = ident(p) { + p.skip_white(); + match p.peek() { + // Key-value pair. + Some(Token::Equals) => { + p.eat_assert(Token::Equals); + p.skip_white(); + if let Some(expr) = expr(p) { + Some(LitDictEntry { + key: Some(ident.map(|id| DictKey::Str(id.0))), + expr, + }) } else { - name.map(|n| Expr::Lit(Lit::Ident(n))) + None } } - Token::Str { string, terminated } => { - if !terminated { - self.expected_at("quote", span.end); - } - self.with_span(Expr::Lit(Lit::Str(resolve::resolve_string(string)))) - } + // Function call. + Some(Token::LeftParen) => Some(LitDictEntry { + key: None, + expr: paren_call(p, ident).map(Expr::Call), + }), - Token::Bool(b) => self.with_span(Expr::Lit(Lit::Bool(b))), - Token::Number(n) => self.with_span(Expr::Lit(Lit::Float(n))), - Token::Length(s) => self.with_span(Expr::Lit(Lit::Length(s))), - Token::Hex(s) => { - let color = RgbaColor::from_str(s).unwrap_or_else(|_| { - // Heal color by assuming black. - error!(@self.feedback, span, "invalid color"); - RgbaColor::new_healed(0, 0, 0, 255) - }); - self.with_span(Expr::Lit(Lit::Color(color))) - } + // Just an identifier. + _ => Some(LitDictEntry { + key: None, + expr: ident.map(|id| Expr::Lit(Lit::Ident(id))), + }), + } + } else if let Some(expr) = expr(p) { + Some(LitDictEntry { key: None, expr }) + } else { + None + } +} - // This could be a dictionary or a parenthesized expression. We - // parse as a dictionary in any case and coerce into a value if - // that's coercable (length 1 and no trailing comma). - Token::LeftParen => { - self.start_group(Group::Paren); - let (dict, coercable) = self.parse_dict_contents(); - let span = self.end_group(); +/// Parse an identifier. +fn ident(p: &mut Parser) -> Option> { + p.eat_map(|token| match token { + Token::Ident(id) => Some(Ident(id.into())), + _ => None, + }) +} - let expr = if coercable { - dict.0.into_iter().next().expect("dict is coercable").value.v - } else { - Expr::Lit(Lit::Dict(dict)) - }; - - expr.span_with(span) - } - - // This is a content expression. - Token::LeftBrace => { - self.start_group(Group::Brace); - self.tokens.push_mode(TokenMode::Body); - let tree = self.parse_body_contents(); - self.tokens.pop_mode(); - let span = self.end_group(); - Expr::Lit(Lit::Content(tree)).span_with(span) - } - - // This is a bracketed function call. - Token::LeftBracket => { - let call = self.parse_bracket_call(false); - let tree = vec![call.map(|c| SynNode::Expr(Expr::Call(c)))]; - Expr::Lit(Lit::Content(tree)).span_with(span) - } - - _ => return None, +/// Parse a color. +fn color(p: &mut Parser, hex: &str, span: Span) -> Spanned { + RgbaColor::from_str(hex) + .unwrap_or_else(|_| { + // Heal color by assuming black. + p.diag(error!(span, "invalid color")); + RgbaColor::new_healed(0, 0, 0, 255) }) - } - - fn parse_ident(&mut self) -> Option> { - self.peek().and_then(|token| match token.v { - Token::Ident(id) => Some(self.with_span(Ident(id.to_string()))), - _ => None, - }) - } + .span_with(span) } -// Error handling. -impl Parser<'_> { - fn expect_at(&mut self, token: Token<'_>, pos: Pos) -> bool { - if self.check(token) { - self.eat(); - true - } else { - self.expected_at(token.name(), pos); - false - } +/// Parse a string. +fn string(p: &mut Parser, token: TokenStr, span: Span) -> Spanned { + if !token.terminated { + p.diag_expected_at("quote", span.end); } - fn expected(&mut self, thing: &str) { - if let Some(found) = self.eat() { - error!( - @self.feedback, found.span, - "expected {}, found {}", thing, found.v.name(), - ); - } else { - error!(@self.feedback, Span::at(self.pos()), "expected {}", thing); - } - } - - fn expected_at(&mut self, thing: &str, pos: Pos) { - error!(@self.feedback, Span::at(pos), "expected {}", thing); - } - - fn expected_found_or_at(&mut self, thing: &str, pos: Pos) { - if self.eof() { - self.expected_at(thing, pos) - } else { - self.expected(thing); - } - } -} - -// Parsing primitives. -impl<'s> Parser<'s> { - fn start_group(&mut self, group: Group) { - let start = self.pos(); - if let Some(start_token) = group.start() { - self.assert(start_token); - } - self.delimiters.push((start, group.end())); - } - - fn end_group(&mut self) -> Span { - let peeked = self.peek(); - - let (start, end_token) = self.delimiters.pop().expect("group was not started"); - - if end_token != Token::Chain && peeked != None { - self.delimiters.push((start, end_token)); - assert_eq!(peeked, None, "unfinished group"); - } - - match self.peeked.unwrap() { - Some(token) if token.v == end_token => { - self.peeked = None; - Span::new(start, token.span.end) - } - _ => { - let end = self.pos(); - if end_token != Token::Chain { - error!( - @self.feedback, Span::at(end), - "expected {}", end_token.name(), - ); - } - Span::new(start, end) - } - } - } - - fn skip_ws(&mut self) { - while matches!( - self.peekv(), - Some(Token::Space(_)) | - Some(Token::LineComment(_)) | - Some(Token::BlockComment(_)) - ) { - self.eat(); - } - } - - fn eatv(&mut self) -> Option> { - self.eat().map(Spanned::value) - } - - fn peekv(&mut self) -> Option> { - self.peek().map(Spanned::value) - } - - fn assert(&mut self, token: Token<'_>) { - assert!(self.check_eat(token).is_some()); - } - - fn check_eat(&mut self, token: Token<'_>) -> Option>> { - if self.check(token) { self.eat() } else { None } - } - - /// Checks if the next token is of some kind - fn check(&mut self, token: Token<'_>) -> bool { - self.peekv() == Some(token) - } - - fn with_span(&mut self, v: T) -> Spanned { - let span = self.eat().expect("expected token").span; - v.span_with(span) - } - - fn eof(&mut self) -> bool { - self.peek().is_none() - } - - fn eat(&mut self) -> Option>> { - let token = self.peek()?; - self.peeked = None; - Some(token) - } - - fn peek(&mut self) -> Option>> { - let tokens = &mut self.tokens; - let token = (*self.peeked.get_or_insert_with(|| tokens.next()))?; - - // Check for unclosed groups. - if Group::is_delimiter(token.v) { - if self.delimiters.iter().rev().any(|&(_, end)| token.v == end) { - return None; - } - } - - Some(token) - } - - fn pos(&self) -> Pos { - self.peeked - .flatten() - .map(|s| s.span.start) - .unwrap_or_else(|| self.tokens.pos()) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum Group { - Paren, - Bracket, - Brace, - Subheader, -} - -impl Group { - fn is_delimiter(token: Token<'_>) -> bool { - matches!( - token, - Token::RightParen | Token::RightBracket | Token::RightBrace | Token::Chain - ) - } - - fn start(self) -> Option> { - match self { - Self::Paren => Some(Token::LeftParen), - Self::Bracket => Some(Token::LeftBracket), - Self::Brace => Some(Token::LeftBrace), - Self::Subheader => None, - } - } - - fn end(self) -> Token<'static> { - match self { - Self::Paren => Token::RightParen, - Self::Bracket => Token::RightBracket, - Self::Brace => Token::RightBrace, - Self::Subheader => Token::Chain, - } - } + resolve::resolve_string(token.string).span_with(span) } #[cfg(test)] diff --git a/src/parse/parser.rs b/src/parse/parser.rs new file mode 100644 index 000000000..d0735931a --- /dev/null +++ b/src/parse/parser.rs @@ -0,0 +1,292 @@ +use std::fmt::{self, Debug, Formatter}; + +use super::{Scanner, TokenMode, Tokens}; +use crate::diagnostic::Diagnostic; +use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token}; +use crate::Feedback; + +/// A convenient token-based parser. +pub struct Parser<'s> { + tokens: Tokens<'s>, + modes: Vec, + groups: Vec<(Pos, Group)>, + f: Feedback, +} + +impl<'s> Parser<'s> { + /// Create a new parser for the source string. + pub fn new(src: &'s str) -> Self { + Self { + tokens: Tokens::new(src, TokenMode::Body), + modes: vec![], + groups: vec![], + f: Feedback::new(), + } + } + + /// Finish parsing and return the accumulated feedback. + pub fn finish(self) -> Feedback { + self.f + } + + /// Add a diagnostic to the feedback. + pub fn diag(&mut self, diag: Spanned) { + self.f.diagnostics.push(diag); + } + + /// Eat the next token and add a diagnostic that it was not expected thing. + pub fn diag_expected(&mut self, thing: &str) { + if let Some(found) = self.eat() { + self.diag(error!( + found.span, + "expected {}, found {}", + thing, + found.v.name(), + )); + } else { + self.diag_expected_at(thing, self.pos()); + } + } + + /// Add a diagnostic that the thing was expected at the given position. + pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) { + self.diag(error!(pos, "expected {}", thing)); + } + + /// Add a diagnostic that the given token was unexpected. + pub fn diag_unexpected(&mut self, token: Spanned) { + self.diag(error!(token.span, "unexpected {}", token.v.name())); + } + + /// Add a decoration to the feedback. + pub fn deco(&mut self, deco: Spanned) { + self.f.decorations.push(deco); + } + + /// Update the token mode and push the previous mode onto a stack. + pub fn push_mode(&mut self, mode: TokenMode) { + self.modes.push(self.tokens.mode()); + self.tokens.set_mode(mode); + } + + /// Pop the topmost token mode from the stack. + /// + /// # Panics + /// This panics if there is no mode on the stack. + pub fn pop_mode(&mut self) { + self.tokens.set_mode(self.modes.pop().expect("no pushed mode")); + } + + /// Continues parsing in a group. + /// + /// When the end delimiter of the group is reached, all subsequent calls to + /// `eat()` and `peek()` return `None`. Parsing can only continue with + /// a matching call to `end_group`. + /// + /// # Panics + /// This panics if the next token does not start the given group. + pub fn start_group(&mut self, group: Group) { + let start = self.pos(); + match group { + Group::Paren => self.eat_assert(Token::LeftParen), + Group::Bracket => self.eat_assert(Token::LeftBracket), + Group::Brace => self.eat_assert(Token::LeftBrace), + Group::Subheader => {} + } + self.groups.push((start, group)); + } + + /// Ends the parsing of a group and returns the span of the whole group. + /// + /// # Panics + /// This panics if no group was started. + pub fn end_group(&mut self) -> Span { + debug_assert_eq!(self.peek(), None, "unfinished group"); + + let (start, group) = self.groups.pop().expect("unstarted group"); + let end = match group { + Group::Paren => Some(Token::RightParen), + Group::Bracket => Some(Token::RightBracket), + Group::Brace => Some(Token::RightBrace), + Group::Subheader => None, + }; + + if let Some(token) = end { + let next = self.tokens.clone().next().map(|s| s.v); + if next == Some(token) { + self.tokens.next(); + } else { + self.diag(error!(self.pos(), "expected {}", token.name())); + } + } + + Span::new(start, self.pos()) + } + + /// Consume the next token. + pub fn eat(&mut self) -> Option>> { + next_group_aware(&mut self.tokens, &self.groups) + } + + /// Consume the next token if it is the given one. + pub fn eat_if(&mut self, t: Token) -> Option>> { + // Don't call eat() twice if it suceeds. + // + // TODO: Benchmark this vs. the naive version. + let before = self.pos(); + let token = self.eat()?; + if token.v == t { + Some(token) + } else { + self.jump(before); + None + } + } + + /// Consume the next token if the closure maps to `Some`. + pub fn eat_map( + &mut self, + mut f: impl FnMut(Token<'s>) -> Option, + ) -> Option> { + let before = self.pos(); + let token = self.eat()?; + if let Some(t) = f(token.v) { + Some(t.span_with(token.span)) + } else { + self.jump(before); + None + } + } + + /// Consume the next token, debug-asserting that it is the given one. + pub fn eat_assert(&mut self, t: Token) { + let next = self.eat(); + debug_assert_eq!(next.map(|s| s.v), Some(t)); + } + + /// Consume tokens while the condition is true. + /// + /// Returns how many tokens were eaten. + pub fn eat_while(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize { + self.eat_until(|t| !f(t)) + } + + /// Consume tokens until the condition is true. + /// + /// Returns how many tokens were eaten. + pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize { + let mut count = 0; + let mut before = self.pos(); + while let Some(t) = self.eat() { + if f(t.v) { + // Undo the last eat by jumping. This prevents + // double-tokenization by not peeking all the time. + // + // TODO: Benchmark this vs. the naive peeking version. + self.jump(before); + break; + } + before = self.pos(); + count += 1; + } + count + } + + /// Peek at the next token without consuming it. + pub fn peek(&self) -> Option> { + next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v) + } + + /// Checks whether the next token fulfills a condition. + /// + /// Returns `false` if there is no next token. + pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool { + self.peek().map(f).unwrap_or(false) + } + + /// Whether the there is no next token. + pub fn eof(&self) -> bool { + self.peek().is_none() + } + + /// Skip whitespace tokens. + pub fn skip_white(&mut self) { + self.eat_while(|t| { + matches!(t, + Token::Space(_) | + Token::LineComment(_) | + Token::BlockComment(_)) + }); + } + + /// The position in the string at which the last token ends and next token + /// will start. + pub fn pos(&self) -> Pos { + self.tokens.pos() + } + + /// Jump to a position in the source string. + pub fn jump(&mut self, pos: Pos) { + self.tokens.jump(pos); + } + + /// The full source string. + pub fn src(&self) -> &'s str { + self.scanner().src() + } + + /// The part of the source string that is spanned by the given span. + pub fn get(&self, span: Span) -> &'s str { + self.scanner().get(span.start.to_usize() .. span.end.to_usize()) + } + + /// The underlying scanner. + pub fn scanner(&self) -> &Scanner<'s> { + self.tokens.scanner() + } +} + +/// Wraps `tokens.next()`, but is group-aware. +fn next_group_aware<'s>( + tokens: &mut Tokens<'s>, + groups: &[(Pos, Group)], +) -> Option>> { + let pos = tokens.pos(); + let token = tokens.next(); + + let group = match token?.v { + Token::RightParen => Group::Paren, + Token::RightBracket => Group::Bracket, + Token::RightBrace => Group::Brace, + Token::Chain => Group::Subheader, + _ => return token, + }; + + if groups.iter().rev().any(|&(_, g)| g == group) { + tokens.jump(pos); + None + } else { + token + } +} + +impl Debug for Parser<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let s = self.scanner(); + write!(f, "Parser({}|{})", s.eaten(), s.rest()) + } +} + +/// A group, confined by optional start and end delimiters. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Group { + /// A parenthesized group: `(...)`. + Paren, + /// A bracketed group: `[...]`. + Bracket, + /// A curly-braced group: `{...}`. + Brace, + /// A group ended by a chained subheader or a closing bracket: + /// `... >>`, `...]`. + Subheader, +} diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index f99193739..6036a74e7 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -3,7 +3,7 @@ use super::{is_newline, Scanner}; use crate::syntax::{Ident, NodeRaw}; -/// Resolves all escape sequences in a string. +/// Resolve all escape sequences in a string. pub fn resolve_string(string: &str) -> String { let mut out = String::with_capacity(string.len()); let mut s = Scanner::new(string); @@ -48,10 +48,10 @@ pub fn resolve_hex(sequence: &str) -> Option { u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) } -/// Resolves the language tag and trims the raw text. -pub fn resolve_raw(raw: &str, backticks: usize) -> NodeRaw { +/// Resolve the language tag and trims the raw text. +pub fn resolve_raw(text: &str, backticks: usize) -> NodeRaw { if backticks > 1 { - let (tag, inner) = split_at_lang_tag(raw); + let (tag, inner) = split_at_lang_tag(text); let (lines, had_newline) = trim_and_split_raw(inner); NodeRaw { lang: Ident::new(tag), @@ -61,7 +61,7 @@ pub fn resolve_raw(raw: &str, backticks: usize) -> NodeRaw { } else { NodeRaw { lang: None, - lines: split_lines(raw), + lines: split_lines(text), inline: true, } } @@ -76,7 +76,7 @@ fn split_at_lang_tag(raw: &str) -> (&str, &str) { ) } -/// Trims raw text and splits it into lines. +/// Trim raw text and splits it into lines. /// /// Returns whether at least one newline was contained in `raw`. fn trim_and_split_raw(raw: &str) -> (Vec, bool) { @@ -101,7 +101,7 @@ fn trim_and_split_raw(raw: &str) -> (Vec, bool) { (lines, had_newline) } -/// Splits a string into a vector of lines +/// Split a string into a vector of lines /// (respecting Unicode, Unix, Mac and Windows line breaks). pub fn split_lines(text: &str) -> Vec { let mut s = Scanner::new(text); @@ -147,8 +147,8 @@ mod tests { #[test] fn test_split_at_lang_tag() { - fn test(raw: &str, lang: &str, inner: &str) { - assert_eq!(split_at_lang_tag(raw), (lang, inner)); + fn test(text: &str, lang: &str, inner: &str) { + assert_eq!(split_at_lang_tag(text), (lang, inner)); } test("typst it!", "typst", " it!"); @@ -161,8 +161,8 @@ mod tests { #[test] fn test_trim_raw() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(trim_and_split_raw(raw).0, expected); + fn test(text: &str, expected: Vec<&str>) { + assert_eq!(trim_and_split_raw(text).0, expected); } test(" hi", vec!["hi"]); @@ -178,8 +178,8 @@ mod tests { #[test] fn test_split_lines() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(split_lines(raw), expected); + fn test(text: &str, expected: Vec<&str>) { + assert_eq!(split_lines(text), expected); } test("raw\ntext", vec!["raw", "text"]); diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 9447222d9..6ff8c801e 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -4,7 +4,8 @@ use std::fmt::{self, Debug, Formatter}; use std::slice::SliceIndex; use std::str::Chars; -/// A low-level featureful char scanner. +/// A low-level featureful char-based scanner. +#[derive(Clone)] pub struct Scanner<'s> { src: &'s str, iter: Chars<'s>, @@ -98,24 +99,22 @@ impl<'s> Scanner<'s> { /// Checks whether the next character fulfills a condition. /// - /// Returns `false` is there is no next character. + /// Returns `false` if there is no next character. pub fn check(&self, f: impl FnMut(char) -> bool) -> bool { self.peek().map(f).unwrap_or(false) } - /// Go back to the where the index says. - fn reset(&mut self) { - self.iter = self.src[self.index ..].chars(); + /// Whether the end of the source string is reached. + pub fn eof(&self) -> bool { + self.iter.as_str().is_empty() } -} -impl<'s> Scanner<'s> { - /// The current index in the string. + /// The current index in the source string. pub fn index(&self) -> usize { self.index } - /// The previous index in the string. + /// The previous index in the source string. pub fn prev_index(&self) -> usize { self.src[.. self.index] .chars() @@ -124,6 +123,17 @@ impl<'s> Scanner<'s> { .unwrap_or(0) } + /// Jump to an index in the source string. + pub fn jump(&mut self, index: usize) { + self.index = index; + self.reset(); + } + + /// The full source string. + pub fn src(&self) -> &'s str { + self.src + } + /// Slice a part out of the source string. pub fn get(&self, index: I) -> &'s str where @@ -132,11 +142,6 @@ impl<'s> Scanner<'s> { &self.src[index] } - /// The full source string. - pub fn src(&self) -> &'s str { - self.src - } - /// The full source string up to the current index. pub fn eaten(&self) -> &'s str { &self.src[.. self.index] @@ -151,6 +156,11 @@ impl<'s> Scanner<'s> { pub fn rest(&self) -> &'s str { &self.src[self.index ..] } + + /// Go back to the where the index says. + fn reset(&mut self) { + self.iter = self.src[self.index ..].chars(); + } } impl Debug for Scanner<'_> { diff --git a/src/parse/tests.rs b/src/parse/tests.rs index 9d6b673ff..a1b1fb13c 100644 --- a/src/parse/tests.rs +++ b/src/parse/tests.rs @@ -14,7 +14,7 @@ use crate::syntax::*; use Decoration::*; use SynNode::{ - Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I, + Linebreak as L, Parbreak as P, Space as S, ToggleBolder as B, ToggleItalic as I, }; fn T(text: &str) -> SynNode { @@ -80,21 +80,21 @@ fn Str(string: &str) -> Expr { macro_rules! Dict { (@dict=$dict:expr,) => {}; - (@dict=$dict:expr, $key:expr => $value:expr $(, $($tts:tt)*)?) => {{ + (@dict=$dict:expr, $key:expr => $expr:expr $(, $($tts:tt)*)?) => {{ let key = Into::>::into($key); let key = key.map(Into::::into); - let value = Into::>::into($value); - $dict.0.push(LitDictEntry { key: Some(key), value }); + let expr = Into::>::into($expr); + $dict.0.push(LitDictEntry { key: Some(key), expr }); Dict![@dict=$dict, $($($tts)*)?]; }}; - (@dict=$dict:expr, $value:expr $(, $($tts:tt)*)?) => { - let value = Into::>::into($value); - $dict.0.push(LitDictEntry { key: None, value }); + (@dict=$dict:expr, $expr:expr $(, $($tts:tt)*)?) => { + let expr = Into::>::into($expr); + $dict.0.push(LitDictEntry { key: None, expr }); Dict![@dict=$dict, $($($tts)*)?]; }; (@$($tts:tt)*) => {{ #[allow(unused_mut)] - let mut dict = LitDict::default(); + let mut dict = LitDict::new(); Dict![@dict=dict, $($tts)*]; dict }}; @@ -344,7 +344,6 @@ fn test_parse_function_names() { fn test_parse_chaining() { // Things the parser has to make sense of t!("[hi: (5.0, 2.1 >> you]" => F!("hi"; Dict![Float(5.0), Float(2.1)], Tree![F!("you")])); - t!("[box >>][Hi]" => F!("box"; Tree![T("Hi")])); t!("[box >> pad: 1pt][Hi]" => F!("box"; Tree![ F!("pad"; Len(Length::pt(1.0)), Tree!(T("Hi"))) ])); @@ -354,7 +353,8 @@ fn test_parse_chaining() { // Errors for unclosed / empty predecessor groups e!("[hi: (5.0, 2.1 >> you]" => s(15, 15, "expected closing paren")); - e!("[>> abc]" => s(1, 1, "expected function name")); + e!("[>> abc]" => s(1, 1, "expected function name")); + e!("[box >>][Hi]" => s(7, 7, "expected function name")); } #[test] @@ -482,7 +482,7 @@ fn test_parse_expressions() { // Invalid expressions. v!("4pt--" => Len(Length::pt(4.0))); - e!("[val: 4pt--]" => s(10, 11, "dangling minus"), + e!("[val: 4pt--]" => s(10, 11, "missing factor"), s(6, 10, "missing right summand")); v!("3mm+4pt*" => Binary(Add, Len(Length::mm(3.0)), Len(Length::pt(4.0)))); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 9f30f5876..72d7b2d96 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,17 +1,19 @@ //! Tokenization. +use std::fmt::{self, Debug, Formatter}; + use super::{is_newline, Scanner}; use crate::length::Length; -use crate::syntax::{is_ident, Pos, Span, SpanWith, Spanned, Token}; +use crate::syntax::token::*; +use crate::syntax::{is_ident, Pos, Span, SpanWith, Spanned}; use TokenMode::*; /// An iterator over the tokens of a string of source code. -#[derive(Debug)] +#[derive(Clone)] pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, - stack: Vec, } /// Whether to tokenize in header mode which yields expression, comma and @@ -26,30 +28,34 @@ pub enum TokenMode { impl<'s> Tokens<'s> { /// Create a new token iterator with the given mode. pub fn new(src: &'s str, mode: TokenMode) -> Self { - Self { - s: Scanner::new(src), - mode, - stack: vec![], - } + Self { s: Scanner::new(src), mode } } - /// Change the token mode and push the old one on a stack. - pub fn push_mode(&mut self, mode: TokenMode) { - self.stack.push(self.mode); + /// Get the current token mode. + pub fn mode(&self) -> TokenMode { + self.mode + } + + /// Change the token mode. + pub fn set_mode(&mut self, mode: TokenMode) { self.mode = mode; } - /// Pop the old token mode from the stack. This panics if there is no mode - /// on the stack. - pub fn pop_mode(&mut self) { - self.mode = self.stack.pop().expect("no pushed mode"); - } - /// The position in the string at which the last token ends and next token /// will start. pub fn pos(&self) -> Pos { self.s.index().into() } + + /// Jump to a position in the source string. + pub fn jump(&mut self, pos: Pos) { + self.s.jump(pos.to_usize()); + } + + /// The underlying scanner. + pub fn scanner(&self) -> &Scanner<'s> { + &self.s + } } impl<'s> Iterator for Tokens<'s> { @@ -59,8 +65,12 @@ impl<'s> Iterator for Tokens<'s> { fn next(&mut self) -> Option { let start = self.s.index(); let token = match self.s.eat()? { - // Whitespace. - c if c.is_whitespace() => self.read_whitespace(c), + // Whitespace with fast path for just a single space. + ' ' if !self.s.check(|c| c.is_whitespace()) => Token::Space(0), + c if c.is_whitespace() => { + self.s.jump(start); + self.read_whitespace() + } // Comments. '/' if self.s.eat_if('/') => self.read_line_comment(), @@ -76,8 +86,8 @@ impl<'s> Iterator for Tokens<'s> { // Syntactic elements in body text. '*' if self.mode == Body => Token::Star, '_' if self.mode == Body => Token::Underscore, - '`' if self.mode == Body => self.read_raw(), '#' if self.mode == Body => Token::Hashtag, + '`' if self.mode == Body => self.read_raw(), '~' if self.mode == Body => Token::Text("\u{00A0}"), '\\' if self.mode == Body => self.read_escaped(), @@ -88,12 +98,12 @@ impl<'s> Iterator for Tokens<'s> { ',' if self.mode == Header => Token::Comma, '=' if self.mode == Header => Token::Equals, '>' if self.mode == Header && self.s.eat_if('>') => Token::Chain, - - // Expressions in headers. '+' if self.mode == Header => Token::Plus, '-' if self.mode == Header => Token::Hyphen, '*' if self.mode == Header => Token::Star, '/' if self.mode == Header => Token::Slash, + + // Expressions in headers. '#' if self.mode == Header => self.read_hex(), '"' if self.mode == Header => self.read_string(), @@ -107,18 +117,7 @@ impl<'s> Iterator for Tokens<'s> { } impl<'s> Tokens<'s> { - fn read_whitespace(&mut self, first: char) -> Token<'s> { - // Shortcut for common case of exactly one space. - if first == ' ' && !self.s.check(|c| c.is_whitespace()) { - return Token::Space(0); - } - - // Uneat the first char if it's a newline, so that it's counted in the - // loop. - if is_newline(first) { - self.s.uneat(); - } - + fn read_whitespace(&mut self) -> Token<'s> { // Count the number of newlines. let mut newlines = 0; while let Some(c) = self.s.eat_merging_crlf() { @@ -169,27 +168,6 @@ impl<'s> Tokens<'s> { Token::BlockComment(self.s.get(start .. end)) } - fn read_hex(&mut self) -> Token<'s> { - // This parses more than the permissable 0-9, a-f, A-F character ranges - // to provide nicer error messages later. - Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric())) - } - - fn read_string(&mut self) -> Token<'s> { - let mut escaped = false; - Token::Str { - string: self.s.eat_until(|c| { - if c == '"' && !escaped { - true - } else { - escaped = c == '\\' && !escaped; - false - } - }), - terminated: self.s.eat_if('"'), - } - } - fn read_raw(&mut self) -> Token<'s> { let mut backticks = 1; while self.s.eat_if('`') { @@ -210,11 +188,11 @@ impl<'s> Tokens<'s> { let terminated = found == backticks; let end = self.s.index() - if terminated { found } else { 0 }; - Token::Raw { - raw: self.s.get(start .. end), + Token::Raw(TokenRaw { + text: self.s.get(start .. end), backticks, terminated, - } + }) } fn read_escaped(&mut self) -> Token<'s> { @@ -228,10 +206,10 @@ impl<'s> Tokens<'s> { 'u' if self.s.peek_nth(1) == Some('{') => { self.s.eat_assert('u'); self.s.eat_assert('{'); - Token::UnicodeEscape { + Token::UnicodeEscape(TokenUnicodeEscape { sequence: self.s.eat_while(|c| c.is_ascii_hexdigit()), terminated: self.s.eat_if('}'), - } + }) } c if c.is_whitespace() => Token::Backslash, _ => Token::Text("\\"), @@ -241,6 +219,27 @@ impl<'s> Tokens<'s> { } } + fn read_hex(&mut self) -> Token<'s> { + // This parses more than the permissable 0-9, a-f, A-F character ranges + // to provide nicer error messages later. + Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric())) + } + + fn read_string(&mut self) -> Token<'s> { + let mut escaped = false; + Token::Str(TokenStr { + string: self.s.eat_until(|c| { + if c == '"' && !escaped { + true + } else { + escaped = c == '\\' && !escaped; + false + } + }), + terminated: self.s.eat_if('"'), + }) + } + fn read_text_or_expr(&mut self, start: usize) -> Token<'s> { let body = self.mode == Body; let header = self.mode == Header; @@ -268,6 +267,12 @@ impl<'s> Tokens<'s> { } } +impl Debug for Tokens<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "Tokens({}|{})", self.s.eaten(), self.s.rest()) + } +} + fn parse_expr(text: &str) -> Token<'_> { if let Ok(b) = text.parse::() { Token::Bool(b) @@ -303,13 +308,13 @@ mod tests { }; fn Str(string: &str, terminated: bool) -> Token { - Token::Str { string, terminated } + Token::Str(TokenStr { string, terminated }) } - fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token { - Token::Raw { raw, backticks, terminated } + fn Raw(text: &str, backticks: usize, terminated: bool) -> Token { + Token::Raw(TokenRaw { text, backticks, terminated }) } fn UE(sequence: &str, terminated: bool) -> Token { - Token::UnicodeEscape { sequence, terminated } + Token::UnicodeEscape(TokenUnicodeEscape { sequence, terminated }) } macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } @@ -388,64 +393,6 @@ mod tests { t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true)); } - #[test] - fn tokenize_header_tokens() { - t!(Header, "__main__" => Id("__main__")); - t!(Header, "_func_box" => Id("_func_box")); - t!(Header, ">main" => Invalid(">main")); - t!(Header, "🌓, 🌍," => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma); - t!(Header, "{abc}" => LB, Id("abc"), RB); - t!(Header, "(1,2)" => LP, Num(1.0), Comma, Num(2.0), RP); - t!(Header, "12_pt, 12pt" => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0))); - t!(Header, "f: arg >> g" => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g")); - t!(Header, "=3.14" => Equals, Num(3.14)); - t!(Header, "arg, _b, _1" => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1")); - t!(Header, "a:b" => Id("a"), Colon, Id("b")); - t!(Header, "(){}:=," => LP, RP, LB, RB, Colon, Equals, Comma); - t!(Body, "c=d, " => T("c=d,"), S(0)); - t!(Body, "a: b" => T("a:"), S(0), T("b")); - t!(Header, "a: true, x=1" => Id("a"), Colon, S(0), Bool(true), Comma, S(0), - Id("x"), Equals, Num(1.0)); - } - - #[test] - fn tokenize_numeric_values() { - t!(Header, "12.3e5" => Num(12.3e5)); - t!(Header, "120%" => Num(1.2)); - t!(Header, "12e4%" => Num(1200.0)); - t!(Header, "1e5in" => Len(Length::inches(100000.0))); - t!(Header, "2.3cm" => Len(Length::cm(2.3))); - t!(Header, "02.4mm" => Len(Length::mm(2.4))); - t!(Header, "2.4.cm" => Invalid("2.4.cm")); - t!(Header, "#6ae6dd" => Hex("6ae6dd")); - t!(Header, "#8A083c" => Hex("8A083c")); - } - - #[test] - fn tokenize_strings() { - t!(Body, "a \"hi\" string" => T("a"), S(0), T("\"hi\""), S(0), T("string")); - t!(Header, "\"hello" => Str("hello", false)); - t!(Header, "\"hello world\"" => Str("hello world", true)); - t!(Header, "\"hello\nworld\"" => Str("hello\nworld", true)); - t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false)); - t!(Header, r#""a\"bc""# => Str(r#"a\"bc"#, true)); - t!(Header, r#""a\\"bc""# => Str(r#"a\\"#, true), Id("bc"), Str("", false)); - t!(Header, r#""a\tbc"# => Str("a\\tbc", false)); - t!(Header, "\"🌎\"" => Str("🌎", true)); - } - - #[test] - fn tokenize_math() { - t!(Header, "12e-3in" => Len(Length::inches(12e-3))); - t!(Header, "-1" => Min, Num(1.0)); - t!(Header, "--1" => Min, Min, Num(1.0)); - t!(Header, "- 1" => Min, S(0), Num(1.0)); - t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)), - Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0)); - t!(Header, "(5 - 1) / 2.1" => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP, - S(0), Slash, S(0), Num(2.1)); - } - #[test] fn tokenize_escaped_symbols() { t!(Body, r"\\" => T(r"\")); @@ -475,6 +422,64 @@ mod tests { t!(Header, r"\," => Invalid(r"\"), Comma); } + #[test] + fn tokenize_header_tokens() { + t!(Header, "__main__" => Id("__main__")); + t!(Header, "_func_box" => Id("_func_box")); + t!(Header, ">main" => Invalid(">main")); + t!(Header, "🌓, 🌍," => Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma); + t!(Header, "{abc}" => LB, Id("abc"), RB); + t!(Header, "(1,2)" => LP, Num(1.0), Comma, Num(2.0), RP); + t!(Header, "12_pt, 12pt" => Invalid("12_pt"), Comma, S(0), Len(Length::pt(12.0))); + t!(Header, "f: arg >> g" => Id("f"), Colon, S(0), Id("arg"), S(0), Chain, S(0), Id("g")); + t!(Header, "=3.14" => Equals, Num(3.14)); + t!(Header, "arg, _b, _1" => Id("arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1")); + t!(Header, "a:b" => Id("a"), Colon, Id("b")); + t!(Header, "(){}:=," => LP, RP, LB, RB, Colon, Equals, Comma); + t!(Body, "c=d, " => T("c=d,"), S(0)); + t!(Body, "a: b" => T("a:"), S(0), T("b")); + t!(Header, "a: true, x=1" => Id("a"), Colon, S(0), Bool(true), Comma, S(0), + Id("x"), Equals, Num(1.0)); + } + + #[test] + fn tokenize_numeric_values() { + t!(Header, "12.3e5" => Num(12.3e5)); + t!(Header, "120%" => Num(1.2)); + t!(Header, "12e4%" => Num(1200.0)); + t!(Header, "1e5in" => Len(Length::inches(100000.0))); + t!(Header, "2.3cm" => Len(Length::cm(2.3))); + t!(Header, "02.4mm" => Len(Length::mm(2.4))); + t!(Header, "2.4.cm" => Invalid("2.4.cm")); + t!(Header, "#6ae6dd" => Hex("6ae6dd")); + t!(Header, "#8A083c" => Hex("8A083c")); + } + + #[test] + fn tokenize_strings() { + t!(Body, "a \"hi\" string" => T("a"), S(0), T("\"hi\""), S(0), T("string")); + t!(Header, "\"hello" => Str("hello", false)); + t!(Header, "\"hello world\"" => Str("hello world", true)); + t!(Header, "\"hello\nworld\"" => Str("hello\nworld", true)); + t!(Header, r#"1"hello\nworld"false"# => Num(1.0), Str("hello\\nworld", true), Bool(false)); + t!(Header, r#""a\"bc""# => Str(r#"a\"bc"#, true)); + t!(Header, r#""a\\"bc""# => Str(r#"a\\"#, true), Id("bc"), Str("", false)); + t!(Header, r#""a\tbc"# => Str("a\\tbc", false)); + t!(Header, "\"🌎\"" => Str("🌎", true)); + } + + #[test] + fn tokenize_math() { + t!(Header, "12e-3in" => Len(Length::inches(12e-3))); + t!(Header, "-1" => Min, Num(1.0)); + t!(Header, "--1" => Min, Min, Num(1.0)); + t!(Header, "- 1" => Min, S(0), Num(1.0)); + t!(Header, "6.1cm + 4pt,a=1*2" => Len(Length::cm(6.1)), S(0), Plus, S(0), Len(Length::pt(4.0)), + Comma, Id("a"), Equals, Num(1.0), Star, Num(2.0)); + t!(Header, "(5 - 1) / 2.1" => LP, Num(5.0), S(0), Min, S(0), Num(1.0), RP, + S(0), Slash, S(0), Num(2.1)); + } + #[test] fn tokenize_with_spans() { ts!(Body, "hello" => s(0, 5, T("hello"))); diff --git a/src/syntax/expr.rs b/src/syntax/ast/expr.rs similarity index 95% rename from src/syntax/expr.rs rename to src/syntax/ast/expr.rs index 7f4d03d50..c07c6216c 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/ast/expr.rs @@ -1,9 +1,8 @@ //! Expressions. -use super::span::{SpanWith, Spanned}; -use super::{Decoration, Ident, Lit, LitDict}; use crate::eval::Value; use crate::layout::LayoutContext; +use crate::syntax::{Decoration, Ident, Lit, LitDict, SpanWith, Spanned}; use crate::Feedback; /// An expression. @@ -50,7 +49,7 @@ impl ExprUnary { } /// A unary operator. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum UnOp { /// The negation operator: `-`. Neg, @@ -80,7 +79,7 @@ impl ExprBinary { } /// A binary operator. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum BinOp { /// The addition operator: `+`. Add, diff --git a/src/syntax/lit.rs b/src/syntax/ast/lit.rs similarity index 91% rename from src/syntax/lit.rs rename to src/syntax/ast/lit.rs index 3cd945839..bbdd0c81a 100644 --- a/src/syntax/lit.rs +++ b/src/syntax/ast/lit.rs @@ -1,10 +1,10 @@ //! Literals. -use super::{Expr, Ident, SpanWith, Spanned, SynTree}; use crate::color::RgbaColor; use crate::eval::{DictKey, DictValue, SpannedEntry, Value}; use crate::layout::LayoutContext; use crate::length::Length; +use crate::syntax::{Expr, Ident, SpanWith, Spanned, SynTree}; use crate::{DynFuture, Feedback}; /// A literal. @@ -55,7 +55,7 @@ impl Lit { } /// A dictionary literal: `(false, 12cm, greeting = "hi")`. -#[derive(Debug, Default, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct LitDict(pub Vec); impl LitDict { @@ -74,8 +74,8 @@ impl LitDict { let mut dict = DictValue::new(); for entry in &self.0 { - let val = entry.value.v.eval(ctx, f).await; - let spanned = val.span_with(entry.value.span); + let val = entry.expr.v.eval(ctx, f).await; + let spanned = val.span_with(entry.expr.span); if let Some(key) = &entry.key { dict.insert(&key.v, SpannedEntry::new(key.span, spanned)); } else { @@ -94,5 +94,5 @@ pub struct LitDictEntry { /// The key of the entry if there was one: `greeting`. pub key: Option>, /// The value of the entry: `"hi"`. - pub value: Spanned, + pub expr: Spanned, } diff --git a/src/syntax/ast/mod.rs b/src/syntax/ast/mod.rs new file mode 100644 index 000000000..56ae41342 --- /dev/null +++ b/src/syntax/ast/mod.rs @@ -0,0 +1,9 @@ +//! Abstract syntax tree definition. + +mod expr; +mod lit; +mod tree; + +pub use expr::*; +pub use lit::*; +pub use tree::*; diff --git a/src/syntax/tree.rs b/src/syntax/ast/tree.rs similarity index 98% rename from src/syntax/tree.rs rename to src/syntax/ast/tree.rs index 80bca3990..03aa34396 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/ast/tree.rs @@ -1,7 +1,6 @@ //! The syntax tree. -use super::span::{SpanVec, Spanned}; -use super::{Expr, Ident}; +use crate::syntax::{Expr, Ident, SpanVec, Spanned}; /// A collection of nodes which form a tree together with the nodes' children. pub type SynTree = SpanVec; @@ -11,7 +10,10 @@ pub type SynTree = SpanVec; #[derive(Debug, Clone, PartialEq)] pub enum SynNode { /// Whitespace containing less than two newlines. - Spacing, + Space, + /// Plain text. + Text(String), + /// A forced line break. Linebreak, /// A paragraph break. @@ -20,16 +22,25 @@ pub enum SynNode { ToggleItalic, /// Bolder was enabled / disabled. ToggleBolder, - /// Plain text. - Text(String), - /// An optionally syntax-highlighted raw block. - Raw(NodeRaw), + /// A section heading. Heading(NodeHeading), + /// An optionally syntax-highlighted raw block. + Raw(NodeRaw), + /// An expression. Expr(Expr), } +/// A section heading. +#[derive(Debug, Clone, PartialEq)] +pub struct NodeHeading { + /// The section depth (how many hashtags minus 1). + pub level: Spanned, + /// The contents of the heading. + pub contents: SynTree, +} + /// A raw block, rendered in monospace with optional syntax highlighting. /// /// Raw blocks start with an arbitrary number of backticks and end with the same @@ -108,12 +119,3 @@ pub struct NodeRaw { /// are inline-level when they contain no newlines. pub inline: bool, } - -/// A section heading. -#[derive(Debug, Clone, PartialEq)] -pub struct NodeHeading { - /// The section depth (how many hashtags minus 1). - pub level: Spanned, - /// The contents of the heading. - pub contents: SynTree, -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index f4472df55..98e1b4d74 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,19 +1,10 @@ //! Syntax types. -mod expr; -mod ident; -mod lit; -mod span; -mod token; -mod tree; +pub mod ast; +pub mod token; -/// Abstract syntax tree definition. -pub mod ast { - use super::*; - pub use expr::*; - pub use lit::*; - pub use tree::*; -} +mod ident; +mod span; pub use ast::*; pub use ident::*; diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 629297063..179c46de8 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -13,7 +13,7 @@ thread_local! { /// Annotate a value with a span. pub trait SpanWith: Sized { /// Wraps `self` in a `Spanned` with the given span. - fn span_with(self, span: Span) -> Spanned { + fn span_with(self, span: impl Into) -> Spanned { Spanned::new(self, span) } } @@ -50,8 +50,8 @@ pub struct Spanned { impl Spanned { /// Create a new instance from a value and its span. - pub fn new(v: T, span: Span) -> Self { - Self { v, span } + pub fn new(v: T, span: impl Into) -> Self { + Self { v, span: span.into() } } /// Create a new instance from a value with the zero span. @@ -123,16 +123,16 @@ impl Span { } /// Create a new span with the earlier start and later end position. - pub fn merge(a: Self, b: Self) -> Self { + pub fn join(self, other: Self) -> Self { Self { - start: a.start.min(b.start), - end: a.end.max(b.end), + start: self.start.min(other.start), + end: self.end.max(other.end), } } /// Expand a span by merging it with another span. pub fn expand(&mut self, other: Self) { - *self = Self::merge(*self, other) + *self = self.join(other) } /// When set to `false` comparisons with `PartialEq` ignore spans. @@ -164,6 +164,24 @@ impl PartialEq for Span { } } +impl From for Span +where + T: Into + Copy, +{ + fn from(pos: T) -> Self { + Self::at(pos) + } +} + +impl From<(T, T)> for Span +where + T: Into, +{ + fn from((start, end): (T, T)) -> Self { + Self::new(start, end) + } +} + impl Debug for Span { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "<{:?}-{:?}>", self.start, self.end) @@ -185,6 +203,12 @@ impl Pos { } } +impl Offset for Pos { + fn offset(self, by: Self) -> Self { + Pos(self.0 + by.0) + } +} + impl From for Pos { fn from(index: u32) -> Self { Self(index) @@ -197,12 +221,6 @@ impl From for Pos { } } -impl Offset for Pos { - fn offset(self, by: Self) -> Self { - Pos(self.0 + by.0) - } -} - impl Debug for Pos { fn fmt(&self, f: &mut Formatter) -> fmt::Result { Debug::fmt(&self.0, f) diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 4cb8501f9..5c159bbd8 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,4 +1,4 @@ -//! Tokenization. +//! Token definition. use crate::length::Length; @@ -8,6 +8,8 @@ pub enum Token<'s> { /// One or more whitespace characters. The contained `usize` denotes the /// number of newlines that were contained in the whitespace. Space(usize), + /// A consecutive non-markup string. + Text(&'s str), /// A line comment with inner string contents `//\n`. LineComment(&'s str), @@ -15,6 +17,20 @@ pub enum Token<'s> { /// can contain nested block comments. BlockComment(&'s str), + /// A star. It can appear in a function header where it signifies the + /// multiplication of expressions or the body where it modifies the styling. + Star, + /// An underscore in body-text. + Underscore, + /// A backslash followed by whitespace in text. + Backslash, + /// A hashtag indicating a section heading. + Hashtag, + /// A raw block. + Raw(TokenRaw<'s>), + /// A unicode escape sequence. + UnicodeEscape(TokenUnicodeEscape<'s>), + /// A left bracket starting a function invocation or body: `[`. LeftBracket, /// A right bracket ending a function invocation or body: `]`. @@ -28,37 +44,14 @@ pub enum Token<'s> { /// A right parenthesis in a function header: `)`. RightParen, - /// A double forward chevron in a function header: `>>`. - Chain, - /// A colon in a function header: `:`. Colon, /// A comma in a function header: `,`. Comma, /// An equals sign in a function header: `=`. Equals, - - /// An identifier in a function header: `center`. - Ident(&'s str), - /// A quoted string in a function header: `"..."`. - Str { - /// The string inside the quotes. - /// - /// _Note_: If the string contains escape sequences these are not yet - /// applied to be able to just store a string slice here instead of - /// a String. The escaping is done later in the parser. - string: &'s str, - /// Whether the closing quote was present. - terminated: bool, - }, - /// A boolean in a function header: `true | false`. - Bool(bool), - /// A number in a function header: `3.14`. - Number(f64), - /// A length in a function header: `12pt`. - Length(Length), - /// A hex value in a function header: `#20d82a`. - Hex(&'s str), + /// A double forward chevron in a function header: `>>`. + Chain, /// A plus in a function header, signifying the addition of expressions. Plus, /// A hyphen in a function header, signifying the subtraction of @@ -67,75 +60,95 @@ pub enum Token<'s> { /// A slash in a function header, signifying the division of expressions. Slash, - /// A star. It can appear in a function header where it signifies the - /// multiplication of expressions or the body where it modifies the styling. - Star, - /// An underscore in body-text. - Underscore, - /// A backslash followed by whitespace in text. - Backslash, - - /// A hashtag token in the body can indicate compute mode or headings. - Hashtag, - - /// A unicode escape sequence. - UnicodeEscape { - /// The escape sequence between two braces. - sequence: &'s str, - /// Whether the closing brace was present. - terminated: bool, - }, - - /// Raw block. - Raw { - /// The raw text between the backticks. - raw: &'s str, - /// The number of opening backticks. - backticks: usize, - /// Whether all closing backticks were present. - terminated: bool, - }, - - /// Any other consecutive string. - Text(&'s str), + /// An identifier in a function header: `center`. + Ident(&'s str), + /// A boolean in a function header: `true | false`. + Bool(bool), + /// A number in a function header: `3.14`. + Number(f64), + /// A length in a function header: `12pt`. + Length(Length), + /// A hex value in a function header: `#20d82a`. + Hex(&'s str), + /// A quoted string in a function header: `"..."`. + Str(TokenStr<'s>), /// Things that are not valid in the context they appeared in. Invalid(&'s str), } +/// A quoted string in a function header: `"..."`. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TokenStr<'s> { + /// The string inside the quotes. + /// + /// _Note_: If the string contains escape sequences these are not yet + /// applied to be able to just store a string slice here instead of + /// a `String`. The resolving is done later in the parser. + pub string: &'s str, + /// Whether the closing quote was present. + pub terminated: bool, +} + +/// A unicode escape sequence. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TokenUnicodeEscape<'s> { + /// The escape sequence between two braces. + pub sequence: &'s str, + /// Whether the closing brace was present. + pub terminated: bool, +} + +/// A raw block. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TokenRaw<'s> { + /// The raw text between the backticks. + pub text: &'s str, + /// The number of opening backticks. + pub backticks: usize, + /// Whether all closing backticks were present. + pub terminated: bool, +} + impl<'s> Token<'s> { /// The natural-language name for this token for use in error messages. pub fn name(self) -> &'static str { match self { Self::Space(_) => "space", + Self::Text(_) => "text", + Self::LineComment(_) => "line comment", Self::BlockComment(_) => "block comment", - Self::LeftBracket => "opening bracket", - Self::RightBracket => "closing bracket", - Self::LeftParen => "opening paren", - Self::RightParen => "closing paren", - Self::LeftBrace => "opening brace", - Self::RightBrace => "closing brace", - Self::Chain => "function chain operator", - Self::Colon => "colon", - Self::Comma => "comma", - Self::Equals => "equals sign", - Self::Ident(_) => "identifier", - Self::Str { .. } => "string", - Self::Bool(_) => "bool", - Self::Number(_) => "number", - Self::Length(_) => "length", - Self::Hex(_) => "hex value", - Self::Plus => "plus", - Self::Hyphen => "minus", - Self::Slash => "slash", + Self::Star => "star", Self::Underscore => "underscore", Self::Backslash => "backslash", Self::Hashtag => "hashtag", - Self::UnicodeEscape { .. } => "unicode escape sequence", Self::Raw { .. } => "raw block", - Self::Text(_) => "text", + Self::UnicodeEscape { .. } => "unicode escape sequence", + + Self::LeftBracket => "opening bracket", + Self::RightBracket => "closing bracket", + Self::LeftBrace => "opening brace", + Self::RightBrace => "closing brace", + Self::LeftParen => "opening paren", + Self::RightParen => "closing paren", + + Self::Colon => "colon", + Self::Comma => "comma", + Self::Equals => "equals sign", + Self::Chain => "function chaining operator", + Self::Plus => "plus sign", + Self::Hyphen => "minus sign", + Self::Slash => "slash", + + Self::Ident(_) => "identifier", + Self::Bool(_) => "bool", + Self::Number(_) => "number", + Self::Length(_) => "length", + Self::Hex(_) => "hex value", + Self::Str { .. } => "string", + Self::Invalid("*/") => "end of block comment", Self::Invalid(_) => "invalid token", }