diff --git a/Cargo.toml b/Cargo.toml index 2082d9163..b2c385d41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,9 +9,9 @@ build = "build.rs" toddle = { path = "../toddle", default-features = false } tide = { path = "../tide" } byteorder = "1" -smallvec = "0.6.10" -unicode-xid = "0.1.0" -async-trait = "0.1.22" +smallvec = "1" +unicode-xid = "0.2" +async-trait = "0.1" futures-executor = { version = "0.3", optional = true } [features] diff --git a/src/func/macros.rs b/src/func/macros.rs index 9e931ea2c..1083e53c6 100644 --- a/src/func/macros.rs +++ b/src/func/macros.rs @@ -75,6 +75,8 @@ macro_rules! function { parse($args:ident, $body:pat, $ctx:pat, $metadata:pat) $code:block $($rest:tt)* ) => { + use $crate::func::prelude::*; + impl $crate::func::ParseFunc for $type { type Meta = $meta; @@ -88,7 +90,8 @@ macro_rules! function { let mut $args = args; let val = $code; if !$args.is_empty() { - error!(unexpected_argument); + return Err($crate::TypesetError + ::with_message("unexpected arguments")); } Ok(val) } @@ -109,6 +112,8 @@ macro_rules! function { // (2-arg) Parse a layout-definition with all arguments. (@layout $type:ident | layout($this:ident, $ctx:pat) $code:block) => { + use $crate::func::prelude::*; + impl LayoutFunc for $type { fn layout<'a, 'life0, 'life1, 'async_trait>( &'a $this, @@ -138,13 +143,13 @@ macro_rules! function { macro_rules! parse { (forbidden: $body:expr) => { if $body.is_some() { - error!("unexpected body"); + return Err($crate::TypesetError::with_message("unexpected body")); } }; (optional: $body:expr, $ctx:expr) => ( if let Some(body) = $body { - Some($crate::syntax::parse(body, $ctx)) + Some($crate::syntax::parse(body, $ctx).0) } else { None } @@ -152,9 +157,9 @@ macro_rules! parse { (expected: $body:expr, $ctx:expr) => ( if let Some(body) = $body { - $crate::syntax::parse(body, $ctx)? + $crate::syntax::parse(body, $ctx).0 } else { - error!("expected body"); + Err($crate::TypesetError::with_message("unexpected body")) } ) } diff --git a/src/func/mod.rs b/src/func/mod.rs index 5f4918d92..90b2a31d3 100644 --- a/src/func/mod.rs +++ b/src/func/mod.rs @@ -119,6 +119,7 @@ pub enum Command<'a> { /// A map from identifiers to function parsers. pub struct Scope { parsers: HashMap>, + debug: Option> } /// A function which parses the source of a function into a function type which @@ -129,11 +130,30 @@ type Parser = dyn Fn( ParseContext ) -> ParseResult>; +fn make_parser(metadata: ::Meta) -> Box +where F: ParseFunc + LayoutFunc + 'static { + Box::new(move |a, b, c| { + F::parse(a, b, c, metadata.clone()) + .map(|f| Box::new(f) as Box) + }) +} + impl Scope { /// Create a new empty scope. pub fn new() -> Scope { Scope { parsers: HashMap::new(), + debug: None, + } + } + + /// Create a new scope with a debug parser that is invoked if not other + /// match is found. + pub fn with_debug() -> Scope + where F: ParseFunc + LayoutFunc + 'static { + Scope { + parsers: HashMap::new(), + debug: Some(make_parser::(())), } } @@ -154,16 +174,14 @@ impl Scope { where F: ParseFunc + LayoutFunc + 'static { self.parsers.insert( name.to_owned(), - Box::new(move |a, b, c| { - F::parse(a, b, c, metadata.clone()) - .map(|f| Box::new(f) as Box) - }) + make_parser::(metadata), ); } /// Return the parser with the given name if there is one. pub(crate) fn get_parser(&self, name: &str) -> Option<&Parser> { self.parsers.get(name).map(|x| &**x) + .or(self.debug.as_ref().map(|x| &**x)) } } diff --git a/src/lib.rs b/src/lib.rs index 5756cc21d..17188145e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ use toddle::Error as FontError; use crate::func::Scope; use crate::layout::{MultiLayout, LayoutResult}; -use crate::syntax::{parse, SyntaxTree, ParseContext, Span, ParseResult}; +use crate::syntax::{parse, SyntaxTree, Colorization, ErrorMap, ParseContext, Span}; use crate::style::{LayoutStyle, PageStyle, TextStyle}; #[macro_use] @@ -84,7 +84,7 @@ impl<'p> Typesetter<'p> { } /// Parse source code into a syntax tree. - pub fn parse(&self, src: &str) -> SyntaxTree { + pub fn parse(&self, src: &str) -> (SyntaxTree, Colorization, ErrorMap) { let scope = Scope::with_std(); parse(src, ParseContext { scope: &scope }) } @@ -115,7 +115,7 @@ impl<'p> Typesetter<'p> { /// Process source code directly into a layout. pub async fn typeset(&self, src: &str) -> TypesetResult { - let tree = self.parse(src); + let tree = self.parse(src).0; let layout = self.layout(&tree).await?; Ok(layout) } @@ -132,8 +132,8 @@ pub struct TypesetError { impl TypesetError { /// Create a new typesetting error. - pub fn with_message(message: String) -> TypesetError { - TypesetError { message, span: None } + pub fn with_message(message: impl Into) -> TypesetError { + TypesetError { message: message.into(), span: None } } } diff --git a/src/syntax/color.rs b/src/syntax/color.rs index 7f34fad71..65525480c 100644 --- a/src/syntax/color.rs +++ b/src/syntax/color.rs @@ -1,28 +1,3 @@ -/// Entities which can be colored by syntax highlighting. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum ColorToken { - Comment, +use super::*; - Bracket, - FuncName, - Colon, - Key, - Equals, - Comma, - - Paren, - Brace, - - ExprIdent, - ExprStr, - ExprNumber, - ExprSize, - ExprBool, - - Bold, - Italic, - Monospace, - - Invalid, -} diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs index e2df3c4e6..b06b29c80 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/expr.rs @@ -91,12 +91,6 @@ pub struct Object { pub pairs: Vec, } -#[derive(Clone, PartialEq)] -pub struct Pair { - pub key: Spanned, - pub value: Spanned, -} - impl Object { pub fn new() -> Object { Object { pairs: vec![] } @@ -120,7 +114,7 @@ impl Display for Object { if !first { write!(f, ", ")?; } - write!(f, "{}: {}", pair.key.v, pair.value.v)?; + write!(f, "{}", pair)?; first = false; } @@ -128,10 +122,23 @@ impl Display for Object { } } +#[derive(Clone, PartialEq)] +pub struct Pair { + pub key: Spanned, + pub value: Spanned, +} + +impl Display for Pair { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}: {}", self.key.v, self.value.v) + } +} + debug_display!(Ident); debug_display!(Expression); debug_display!(Tuple); debug_display!(Object); +debug_display!(Pair); /// Kinds of expressions. diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 11b35a067..1c72de4de 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -14,3 +14,247 @@ pub_use_mod!(expr); pub_use_mod!(tokens); pub_use_mod!(parsing); pub_use_mod!(span); + + +/// A minimal semantic entity of source code. +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum Token<'s> { + /// One or more whitespace characters. The contained `usize` denotes the + /// number of newlines that were contained in the whitespace. + Whitespace(usize), + + /// A line comment with inner string contents `//<&'s str>\n`. + LineComment(&'s str), + /// A block comment with inner string contents `/*<&'s str>*/`. The comment + /// can contain nested block comments. + BlockComment(&'s str), + /// An erroneous `*/` without an opening block comment. + StarSlash, + + /// A left bracket: `[`. + LeftBracket, + /// A right bracket: `]`. + RightBracket, + + /// A left parenthesis in a function header: `(`. + LeftParen, + /// A right parenthesis in a function header: `)`. + RightParen, + /// A left brace in a function header: `{`. + LeftBrace, + /// A right brace in a function header: `}`. + RightBrace, + + /// A colon in a function header: `:`. + Colon, + /// A comma in a function header: `:`. + Comma, + /// An equals sign in a function header: `=`. + Equals, + + /// An identifier in a function header: `center`. + ExprIdent(&'s str), + /// A quoted string in a function header: `"..."`. + ExprStr(&'s str), + /// A number in a function header: `3.14`. + ExprNumber(f64), + /// A size in a function header: `12pt`. + ExprSize(Size), + /// A boolean in a function header: `true | false`. + ExprBool(bool), + + /// A star in body-text. + Star, + /// An underscore in body-text. + Underscore, + /// A backtick in body-text. + Backtick, + + /// Any other consecutive string. + Text(&'s str), +} + +/// A tree representation of source code. +#[derive(Debug, PartialEq)] +pub struct SyntaxTree { + pub nodes: Vec>, +} + +impl SyntaxTree { + /// Create an empty syntax tree. + pub fn new() -> SyntaxTree { + SyntaxTree { nodes: vec![] } + } + + /// Add a node to the tree. + pub fn add(&mut self, node: Spanned) { + self.nodes.push(node); + } +} + +/// A node in the syntax tree. +#[derive(Debug, PartialEq)] +pub enum Node { + /// A number of whitespace characters containing less than two newlines. + Space, + /// Whitespace characters with more than two newlines. + Newline, + /// Plain text. + Text(String), + /// Italics enabled / disabled. + ToggleItalic, + /// Bolder enabled / disabled. + ToggleBolder, + /// Monospace enabled / disabled. + ToggleMonospace, + /// A function invocation. + Func(FuncCall), +} + +/// An invocation of a function. +#[derive(Debug)] +pub struct FuncCall(pub Box); + +impl PartialEq for FuncCall { + fn eq(&self, other: &FuncCall) -> bool { + &self.0 == &other.0 + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Colorization { + pub colors: Vec>, +} + +/// Entities which can be colored by syntax highlighting. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ColorToken { + Comment, + + Bracket, + FuncName, + Colon, + + Key, + Equals, + Comma, + + Paren, + Brace, + + ExprIdent, + ExprStr, + ExprNumber, + ExprSize, + ExprBool, + + Bold, + Italic, + Monospace, + + Invalid, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ErrorMap { + pub errors: Vec>, +} + +#[derive(Debug)] +pub struct FuncHeader { + pub name: Spanned, + pub args: FuncArgs, +} + +#[derive(Debug)] +pub struct FuncArgs { + positional: Tuple, + keyword: Object, +} + +impl FuncArgs { + fn new() -> FuncArgs { + FuncArgs { + positional: Tuple::new(), + keyword: Object::new(), + } + } + + /// Add a positional argument. + pub fn add_pos(&mut self, item: Spanned) { + self.positional.add(item); + } + + /// Force-extract the first positional argument. + pub fn get_pos(&mut self) -> ParseResult { + expect(self.get_pos_opt()) + } + + /// Extract the first positional argument. + pub fn get_pos_opt(&mut self) -> ParseResult> { + Ok(if !self.positional.items.is_empty() { + let spanned = self.positional.items.remove(0); + Some(E::from_expr(spanned)?) + } else { + None + }) + } + + /// Add a keyword argument. + pub fn add_key(&mut self, key: Spanned, value: Spanned) { + self.keyword.add(key, value); + } + + /// Add a keyword argument from an existing pair. + pub fn add_key_pair(&mut self, pair: Pair) { + self.keyword.add_pair(pair); + } + + /// Force-extract a keyword argument. + pub fn get_key(&mut self, name: &str) -> ParseResult { + expect(self.get_key_opt(name)) + } + + /// Extract a keyword argument. + pub fn get_key_opt(&mut self, name: &str) -> ParseResult> { + self.keyword.pairs.iter() + .position(|p| p.key.v.0 == name) + .map(|index| { + let value = self.keyword.pairs.swap_remove(index).value; + E::from_expr(value) + }) + .transpose() + } + + /// Iterator over positional arguments. + pub fn iter_pos(&mut self) -> std::vec::IntoIter> { + let tuple = std::mem::replace(&mut self.positional, Tuple::new()); + tuple.items.into_iter() + } + + /// Iterator over all keyword arguments. + pub fn iter_keys(&mut self) -> std::vec::IntoIter { + let object = std::mem::replace(&mut self.keyword, Object::new()); + object.pairs.into_iter() + } + + /// Clear the argument lists. + pub fn clear(&mut self) { + self.positional.items.clear(); + self.keyword.pairs.clear(); + } + + /// Whether both the positional and keyword argument lists are empty. + pub fn is_empty(&self) -> bool { + self.positional.items.is_empty() && self.keyword.pairs.is_empty() + } +} + +/// Extract the option expression kind from the option or return an error. +fn expect(opt: ParseResult>) -> ParseResult { + match opt { + Ok(Some(spanned)) => Ok(spanned), + Ok(None) => error!("expected {}", E::NAME), + Err(e) => Err(e), + } +} diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index 47322485c..bf3bea893 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -1,147 +1,10 @@ -use std::iter::Peekable; - use crate::func::Scope; use super::*; use Token::*; -/// A tree representation of source code. -#[derive(Debug, PartialEq)] -pub struct SyntaxTree { - pub nodes: Vec>, -} - -impl SyntaxTree { - /// Create an empty syntax tree. - pub fn new() -> SyntaxTree { - SyntaxTree { nodes: vec![] } - } -} - -/// A node in the syntax tree. -#[derive(Debug, PartialEq)] -pub enum Node { - /// A number of whitespace characters containing less than two newlines. - Space, - /// Whitespace characters with more than two newlines. - Newline, - /// Plain text. - Text(String), - /// Italics enabled / disabled. - ToggleItalic, - /// Bolder enabled / disabled. - ToggleBolder, - /// Monospace enabled / disabled. - ToggleMonospace, - /// A function invocation. - Func(FuncCall), -} - -/// An invocation of a function. -#[derive(Debug)] -pub struct FuncCall(pub Box); - -impl PartialEq for FuncCall { - fn eq(&self, other: &FuncCall) -> bool { - &self.0 == &other.0 - } -} - -#[derive(Debug)] -pub struct FuncArgs { - positional: Tuple, - keyword: Object, -} - -impl FuncArgs { - fn new() -> FuncArgs { - FuncArgs { - positional: Tuple::new(), - keyword: Object::new(), - } - } - - /// Add a positional argument. - pub fn add_pos(&mut self, item: Spanned) { - self.positional.add(item); - } - - /// Force-extract the first positional argument. - pub fn get_pos(&mut self) -> ParseResult { - expect(self.get_pos_opt()) - } - - /// Extract the first positional argument. - pub fn get_pos_opt(&mut self) -> ParseResult> { - Ok(if !self.positional.items.is_empty() { - let spanned = self.positional.items.remove(0); - Some(E::from_expr(spanned)?) - } else { - None - }) - } - - /// Add a keyword argument. - pub fn add_key(&mut self, key: Spanned, value: Spanned) { - self.keyword.add(key, value); - } - - /// Add a keyword argument from an existing pair. - pub fn add_key_pair(&mut self, pair: Pair) { - self.keyword.add_pair(pair); - } - - /// Force-extract a keyword argument. - pub fn get_key(&mut self, name: &str) -> ParseResult { - expect(self.get_key_opt(name)) - } - - /// Extract a keyword argument. - pub fn get_key_opt(&mut self, name: &str) -> ParseResult> { - self.keyword.pairs.iter() - .position(|p| p.key.v.0 == name) - .map(|index| { - let value = self.keyword.pairs.swap_remove(index).value; - E::from_expr(value) - }) - .transpose() - } - - /// Iterator over positional arguments. - pub fn iter_pos(&mut self) -> std::vec::IntoIter> { - let tuple = std::mem::replace(&mut self.positional, Tuple::new()); - tuple.items.into_iter() - } - - /// Iterator over all keyword arguments. - pub fn iter_keys(&mut self) -> std::vec::IntoIter { - let object = std::mem::replace(&mut self.keyword, Object::new()); - object.pairs.into_iter() - } - - /// Clear the argument lists. - pub fn clear(&mut self) { - self.positional.items.clear(); - self.keyword.pairs.clear(); - } - - /// Whether both the positional and keyword argument lists are empty. - pub fn is_empty(&self) -> bool { - self.positional.items.is_empty() && self.keyword.pairs.is_empty() - } -} - -/// Extract the option expression kind from the option or return an error. -fn expect(opt: ParseResult>) -> ParseResult { - match opt { - Ok(Some(spanned)) => Ok(spanned), - Ok(None) => error!("expected {}", E::NAME), - Err(e) => Err(e), - } -} - /// Parses source code into a syntax tree given a context. -pub fn parse(src: &str, ctx: ParseContext) -> SyntaxTree { +pub fn parse(src: &str, ctx: ParseContext) -> (SyntaxTree, Colorization, ErrorMap) { Parser::new(src, ctx).parse() } @@ -155,16 +18,13 @@ pub struct ParseContext<'a> { struct Parser<'s> { src: &'s str, ctx: ParseContext<'s>, - tokens: Peekable>, - errors: Vec>, - colored: Vec>, - span: Span, -} + colorization: Colorization, + error_map: ErrorMap, -macro_rules! defer { - ($($tts:tt)*) => ( - unimplemented!() - ); + tokens: Tokens<'s>, + peeked: Option>>>, + position: Position, + last_position: Position, } impl<'s> Parser<'s> { @@ -172,81 +32,128 @@ impl<'s> Parser<'s> { Parser { src, ctx, - tokens: Tokens::new(src).peekable(), - errors: vec![], - colored: vec![], - span: Span::ZERO, + error_map: ErrorMap { errors: vec![] }, + colorization: Colorization { colors: vec![] }, + + tokens: Tokens::new(src), + peeked: None, + position: Position::ZERO, + last_position: Position::ZERO, } } - fn parse(mut self) -> SyntaxTree { + fn parse(mut self) -> (SyntaxTree, Colorization, ErrorMap) { let mut tree = SyntaxTree::new(); loop { - self.skip_whitespace(); + if let Some(spanned) = self.eat() { + match spanned.v { + LineComment(_) | BlockComment(_) => {} - let start = self.position(); + Whitespace(newlines) => { + tree.add(spanned.map_v(if newlines >= 2 { + Node::Newline + } else { + Node::Space + })); + } - let node = match self.next() { - Some(LeftBracket) => self.parse_func().map(|f| Node::Func(f)), - Some(Star) => Some(Node::ToggleBolder), - Some(Underscore) => Some(Node::ToggleItalic), - Some(Backtick) => Some(Node::ToggleMonospace), - Some(Text(text)) => Some(Node::Text(text.to_owned())), - Some(other) => { self.unexpected(other); None }, - None => break, - }; + LeftBracket => { + if let Some(func) = self.parse_func() { + tree.add(func); + } + } - if let Some(node) = node { - let end = self.position(); - let span = Span { start, end }; + Star => tree.add(spanned.map_v(Node::ToggleBolder)), + Underscore => tree.add(spanned.map_v(Node::ToggleItalic)), + Backtick => tree.add(spanned.map_v(Node::ToggleMonospace)), + Text(text) => tree.add(spanned.map_v(Node::Text(text.to_owned()))), - tree.nodes.push(Spanned { v: node, span }); + _ => self.unexpected(spanned), + } + } else { + break; } } - tree + (tree, self.colorization, self.error_map) } - fn parse_func(&mut self) -> Option { - let (name, args) = self.parse_func_header()?; - self.parse_func_call(name, args) + fn parse_func(&mut self) -> Option> { + let start = self.last_pos(); + + let header = self.parse_func_header(); + let call = self.parse_func_call(header)?; + + let end = self.pos(); + let span = Span { start, end }; + + Some(Spanned { v: Node::Func(call), span }) } - fn parse_func_header(&mut self) -> Option<(Spanned, FuncArgs)> { - defer! { self.eat_until(|t| t == RightBracket, true); } - + fn parse_func_header(&mut self) -> Option { self.skip_whitespace(); - let name = self.parse_func_name()?; + let name = self.parse_func_name().or_else(|| { + self.eat_until(|t| t == RightBracket, true); + None + })?; self.skip_whitespace(); - - let args = match self.next() { - Some(Colon) => self.parse_func_args(), - Some(RightBracket) => FuncArgs::new(), + let args = match self.eat() { + Some(Spanned { v: Colon, .. }) => self.parse_func_args(), + Some(Spanned { v: RightBracket, .. }) => FuncArgs::new(), other => { self.expected("colon or closing bracket", other); + self.eat_until(|t| t == RightBracket, true); FuncArgs::new() } }; - Some((name, args)) + Some(FuncHeader { name, args }) } - fn parse_func_call( - &mut self, - name: Spanned, - args: FuncArgs, - ) -> Option { - unimplemented!() + fn parse_func_call(&mut self, header: Option) -> Option { + println!("peek: {:?}", self.peek()); + + let body = if self.peek() == Some(LeftBracket) { + self.eat(); + + let start = self.tokens.index(); + let found = self.tokens.move_to_closing_bracket(); + let end = self.tokens.index(); + + self.last_position = self.position; + self.position = self.tokens.pos(); + + let body = &self.src[start .. end]; + + if found { + assert_eq!(self.eat().map(Spanned::value), Some(RightBracket)); + } else { + self.error_here("expected closing bracket"); + } + + Some(body) + } else { + None + }; + + let header = header?; + let name = header.name; + let parser = self.ctx.scope.get_parser(name.v.as_str()).or_else(|| { + self.error(format!("unknown function: `{}`", name.v), name.span); + None + })?; + + Some(FuncCall(parser(header.args, body, self.ctx).unwrap())) } fn parse_func_name(&mut self) -> Option> { - match self.next() { - Some(ExprIdent(ident)) => { - self.color_span(ColorToken::FuncName, self.span(), true); - Some(Spanned { v: Ident(ident.to_string()), span: self.span() }) + match self.eat() { + Some(Spanned { v: ExprIdent(ident), span }) => { + self.color(Spanned { v: ColorToken::FuncName, span }, true); + Some(Spanned { v: Ident(ident.to_string()), span }) } other => { self.expected("identifier", other); @@ -256,119 +163,16 @@ impl<'s> Parser<'s> { } fn parse_func_args(&mut self) -> FuncArgs { - enum State { - Start, - Identifier(Spanned), - Assignment(Spanned), - Value, - } - - impl State { - fn expected(&self) -> &'static str { - match self { - State::Start => "value or key", - State::Identifier(_) => "comma or assignment", - State::Assignment(_) => "value", - State::Value => "comma", - } - } - } - - let mut args = FuncArgs::new(); - let mut state = State::Start; - - loop { - self.skip_whitespace(); - - /* - let token = self.next(); - match token { - Some(ExprIdent(ident)) => match state { - State::Start => { - state = State::Identifier(Spanned { - v: Ident(ident.to_string()), - span: self.span(), - }); - } - State::Identifier(prev) => { - self.expected(state.expected(), token); - args.add_pos(prev.map(|id| Expression::Ident(id))); - state = State::Identifier(Spanned { - v: Ident(ident.to_string()), - span: self.span(), - }); - } - State::Assignment(key) => { - let span = Span::merge(key.span, self.span()); - args.add_key(Spanned::new(KeyArg { - key, - value: Spanned { - v: Expression::Ident(Ident(ident.to_string())), - span: self.span(), - }, - }, span)); - state = State::Value; - } - State::Value => { - self.expected(state.expected(), token); - state = State::Identifier(Spanned { - v: Ident(ident.to_string()), - span: self.span(), - }); - } - } - - // Handle expressions. - Some(Expr(_)) | Some(LeftParen) | Some(LeftBrace) => { - let expr = match token.unwrap() { - Expr(e) => e, - LeftParen => self.parse_tuple(), - LeftBrace => self.parse_object(), - _ => unreachable!(), - } - } - - // Handle commas after values. - Some(Comma) => match state { - State::Identifier(ident) => { - args.add_pos(ident.map(|id| Expression::Ident(id))); - state = State::Start; - } - State::Value => state = State::Start, - _ => self.expected(state.expected(), token), - } - - // Handle the end of the function header. - Some(RightBracket) => { - match state { - State::Identifier(ident) => { - args.add_pos(ident.map(|id| Expression::Ident(id))); - } - State::Assignment(_) => { - self.expected(state.expected(), token); - } - _ => {} - } - - break; - } - } - */ - } - - args + // unimplemented!() + FuncArgs::new() } - fn handle_expr(&mut self, expr: Spanned) { - + fn parse_tuple(&mut self) -> Spanned { + unimplemented!("parse_tuple") } - fn parse_tuple(&mut self) -> Spanned { - unimplemented!() - } - - fn parse_object(&mut self) -> Spanned { - unimplemented!() + fn parse_object(&mut self) -> Spanned { + unimplemented!("parse_object") } fn skip_whitespace(&mut self) { @@ -378,68 +182,52 @@ impl<'s> Parser<'s> { }, false) } - fn eat_until(&mut self, mut f: F, eat_match: bool) - where F: FnMut(Token<'s>) -> bool { - while let Some(token) = self.tokens.peek() { - if f(token.v) { - if eat_match { - self.next(); - } - break; - } - - self.next(); + fn expected(&mut self, thing: &str, found: Option>) { + if let Some(Spanned { v: found, span }) = found { + self.error( + format!("expected {}, found {}", thing, name(found)), + span + ); + } else { + self.error_here(format!("expected {}", thing)); } } - fn next(&mut self) -> Option> { - self.tokens.next().map(|spanned| { - self.color_token(&spanned.v, spanned.span); - self.span = spanned.span; - spanned.v - }) + fn unexpected(&mut self, found: Spanned) { + self.error_map.errors.push(found.map(|t| format!("unexpected {}", name(t)))); } - fn span(&self) -> Span { - self.span + fn error(&mut self, message: impl Into, span: Span) { + self.error_map.errors.push(Spanned { v: message.into(), span }); } - fn position(&self) -> Position { - self.span.end + fn error_here(&mut self, message: impl Into) { + self.error(message, Span::at(self.pos())); } - fn unexpected(&mut self, found: Token) { - self.errors.push(Spanned { - v: format!("unexpected {}", name(found)), - span: self.span(), - }); + fn color(&mut self, token: Spanned, replace_last: bool) { + if replace_last { + if let Some(last) = self.colorization.colors.last_mut() { + *last = token; + return; + } + } + + self.colorization.colors.push(token); } - fn expected(&mut self, thing: &str, found: Option) { - let message = if let Some(found) = found { - format!("expected {}, found {}", thing, name(found)) - } else { - format!("expected {}", thing) - }; - - self.errors.push(Spanned { - v: message, - span: self.span(), - }); - } - - fn color_token(&mut self, token: &Token<'s>, span: Span) { - let colored = match token { + fn color_token(&mut self, token: Spanned>) { + let colored = match token.v { LineComment(_) | BlockComment(_) => Some(ColorToken::Comment), - StarSlash => Some(ColorToken::Invalid), + StarSlash => Some(ColorToken::Invalid), LeftBracket | RightBracket => Some(ColorToken::Bracket), LeftParen | RightParen => Some(ColorToken::Paren), LeftBrace | RightBrace => Some(ColorToken::Brace), - Colon => Some(ColorToken::Colon), - Comma => Some(ColorToken::Comma), - Equals => Some(ColorToken::Equals), - ExprIdent(_) => Some(ColorToken::ExprIdent), - ExprStr(_) => Some(ColorToken::ExprStr), + Colon => Some(ColorToken::Colon), + Comma => Some(ColorToken::Comma), + Equals => Some(ColorToken::Equals), + ExprIdent(_) => Some(ColorToken::ExprIdent), + ExprStr(_) => Some(ColorToken::ExprStr), ExprNumber(_) => Some(ColorToken::ExprNumber), ExprSize(_) => Some(ColorToken::ExprSize), ExprBool(_) => Some(ColorToken::ExprBool), @@ -447,21 +235,49 @@ impl<'s> Parser<'s> { }; if let Some(color) = colored { - self.colored.push(Spanned { v: color, span }); + self.colorization.colors.push(Spanned { v: color, span: token.span }); } } - fn color_span(&mut self, color: ColorToken, span: Span, replace_last: bool) { - let token = Spanned { v: color, span }; - - if replace_last { - if let Some(last) = self.colored.last_mut() { - *last = token; - return; + fn eat_until(&mut self, mut f: F, eat_match: bool) + where F: FnMut(Token<'s>) -> bool { + while let Some(token) = self.peek() { + if f(token) { + if eat_match { + self.eat(); + } + break; } + + self.eat(); + } + } + + fn eat(&mut self) -> Option>> { + let token = self.peeked.take().unwrap_or_else(|| self.tokens.next()); + + self.last_position = self.position; + if let Some(spanned) = token { + self.color_token(spanned); + self.position = spanned.span.end; } - self.colored.push(token); + token + } + + fn peek(&mut self) -> Option> { + let iter = &mut self.tokens; + self.peeked + .get_or_insert_with(|| iter.next()) + .map(Spanned::value) + } + + fn pos(&self) -> Position { + self.position + } + + fn last_pos(&self) -> Position { + self.last_position } } diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 295a4382f..69d799654 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -6,64 +6,6 @@ use Token::*; use State::*; -/// A minimal semantic entity of source code. -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum Token<'s> { - /// One or more whitespace characters. The contained `usize` denotes the - /// number of newlines that were contained in the whitespace. - Whitespace(usize), - - /// A line comment with inner string contents `//<&'s str>\n`. - LineComment(&'s str), - /// A block comment with inner string contents `/*<&'s str>*/`. The comment - /// can contain nested block comments. - BlockComment(&'s str), - /// An erroneous `*/` without an opening block comment. - StarSlash, - - /// A left bracket: `[`. - LeftBracket, - /// A right bracket: `]`. - RightBracket, - - /// A left parenthesis in a function header: `(`. - LeftParen, - /// A right parenthesis in a function header: `)`. - RightParen, - /// A left brace in a function header: `{`. - LeftBrace, - /// A right brace in a function header: `}`. - RightBrace, - - /// A colon in a function header: `:`. - Colon, - /// A comma in a function header: `:`. - Comma, - /// An equals sign in a function header: `=`. - Equals, - - /// An identifier in a function header: `center`. - ExprIdent(&'s str), - /// A quoted string in a function header: `"..."`. - ExprStr(&'s str), - /// A number in a function header: `3.14`. - ExprNumber(f64), - /// A size in a function header: `12pt`. - ExprSize(Size), - /// A boolean in a function header: `true | false`. - ExprBool(bool), - - /// A star in body-text. - Star, - /// An underscore in body-text. - Underscore, - /// A backtick in body-text. - Backtick, - - /// Any other consecutive string. - Text(&'s str), -} - /// Decomposes text into a sequence of semantic tokens. pub fn tokenize(src: &str) -> Tokens { Tokens::new(src) @@ -97,6 +39,47 @@ impl<'s> Tokens<'s> { index: 0, } } + + /// The index in the string at which the last token ends and next token will + /// start. + pub fn index(&self) -> usize { + self.index + } + + /// The line-colunn position in the source at which the last token ends and + /// next token will start. + pub fn pos(&self) -> Position { + self.position + } + + /// Move through the string until an unbalanced closing bracket is found + /// without tokenizing the contents. + /// + /// Returns whether a closing bracket was found or the end of the string was + /// reached. + pub fn move_to_closing_bracket(&mut self) -> bool { + let mut escaped = false; + let mut depth = 0; + + self.read_string_until(|n| { + match n { + '[' if !escaped => depth += 1, + ']' if !escaped => { + if depth == 0 { + return true; + } else { + depth -= 1; + } + } + '\\' => escaped = !escaped, + _ => escaped = false, + } + + false + }, false, 0, 0); + + self.peek() == Some(']') + } } impl<'s> Iterator for Tokens<'s> { @@ -118,8 +101,13 @@ impl<'s> Iterator for Tokens<'s> { // Functions. '[' => { - self.stack.push(self.state); - self.state = Header; + if self.state == Header || self.state == Body { + self.stack.push(self.state); + self.state = Header; + } else { + self.state = Body; + } + LeftBracket } ']' => { @@ -221,12 +209,10 @@ impl<'s> Tokens<'s> { fn parse_string(&mut self) -> Token<'s> { let mut escaped = false; ExprStr(self.read_string_until(|n| { - if n == '"' && !escaped { - return true; - } else if n == '\\' { - escaped = !escaped; - } else { - escaped = false; + match n { + '"' if !escaped => return true, + '\\' => escaped = !escaped, + _ => escaped = false, } false @@ -316,14 +302,6 @@ impl<'s> Tokens<'s> { fn peek(&mut self) -> Option { self.iter.peek().copied() } - - fn index(&self) -> usize { - self.index - } - - fn pos(&self) -> Position { - self.position - } } fn parse_percentage(text: &str) -> Option { diff --git a/tests/parse.rs b/tests/parse.rs index 3e46dd4a1..616f4d70b 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -1,47 +1,159 @@ #![allow(unused_imports)] +#![allow(dead_code)] #![allow(non_snake_case)] +use typstc::func::Scope; use typstc::size::Size; use typstc::syntax::*; -use Token::{ - Whitespace as W, - LineComment as LC, BlockComment as BC, StarSlash as SS, - LeftBracket as LB, RightBracket as RB, - LeftParen as LP, RightParen as RP, - LeftBrace as LBR, RightBrace as RBR, - Colon as CL, Comma as CM, Equals as EQ, - ExprIdent as ID, ExprStr as STR, ExprSize as SIZE, - ExprNumber as NUM, ExprBool as BOOL, - Star as ST, Underscore as U, Backtick as B, Text as T, -}; +use typstc::{function, parse}; + + +mod token_shorthands { + pub use super::Token::{ + Whitespace as W, + LineComment as LC, BlockComment as BC, StarSlash as SS, + LeftBracket as LB, RightBracket as RB, + LeftParen as LP, RightParen as RP, + LeftBrace as LBR, RightBrace as RBR, + Colon as CL, Comma as CM, Equals as EQ, + ExprIdent as ID, ExprStr as STR, ExprSize as SIZE, + ExprNumber as NUM, ExprBool as BOOL, + Star as ST, Underscore as U, Backtick as B, Text as T, + }; +} + +mod node_shorthands { + use super::Node; + pub use Node::{ + Space as S, Newline as N, Text, + ToggleItalic as I, ToggleBolder as B, ToggleMonospace as M, + Func, + }; + pub fn T(text: &str) -> Node { Node::Text(text.to_string()) } +} + +macro_rules! F { + (@body None) => (None); + (@body Some([$($tts:tt)*])) => ({ + let nodes = vec![$($tts)*].into_iter() + .map(|v| Spanned { v, span: Span::ZERO }) + .collect(); + + Some(SyntaxTree { nodes }) + }); + + ($($body:tt)*) => ({ + Func(FuncCall(Box::new(DebugFn { + pos: vec![], + key: vec![], + body: F!(@body $($body)*), + }))) + }); +} + +function! { + #[derive(Debug, PartialEq)] + pub struct DebugFn { + pos: Vec>, + key: Vec, + body: Option, + } + + parse(args, body, ctx) { + DebugFn { + pos: args.iter_pos().collect(), + key: args.iter_keys().collect(), + body: parse!(optional: body, ctx), + } + } + + layout() { vec![] } +} + +impl DebugFn { + fn compare(&self, other: &DebugFn) -> bool { + self.pos.iter().zip(&other.pos).all(|(a, b)| a.v == b.v) + && self.key.iter().zip(&other.key) + .all(|(a, b)| a.key.v == b.key.v && a.value.v == b.value.v) + && match (&self.body, &other.body) { + (Some(a), Some(b)) => compare(a, b), + (None, None) => true, + _ => false, + } + } +} + +fn downcast(func: &FuncCall) -> &DebugFn { + func.0.downcast::().expect("not a debug fn") +} + +fn compare(a: &SyntaxTree, b: &SyntaxTree) -> bool { + for (x, y) in a.nodes.iter().zip(&b.nodes) { + use node_shorthands::*; + let same = match (&x.v, &y.v) { + (S, S) | (N, N) | (I, I) | (B, B) | (M, M) => true, + (Text(t1), Text(t2)) => t1 == t2, + (Func(f1), Func(f2)) => { + downcast(f1).compare(downcast(f2)) + } + _ => false, + }; + + if !same { return false; } + } + true +} /// Parses the test syntax. macro_rules! tokens { - ($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({ + ($($task:ident $src:expr =>($line:expr)=> [$($tts:tt)*])*) => ({ #[allow(unused_mut)] let mut cases = Vec::new(); - $(cases.push(($line, $src, tokens!(@$task [$($target)*])));)* + $(cases.push(($line, $src, tokens!(@$task [$($tts)*])));)* cases }); - (@t $tokens:expr) => ({ - Target::Tokenized($tokens.to_vec()) + (@t [$($tts:tt)*]) => ({ + use token_shorthands::*; + Target::Tokenize(vec![$($tts)*]) }); - (@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({ - Target::TokenizedSpanned(vec![ - $(Spanned { v: $t, span: Span { + (@ts [$($tts:tt)*]) => ({ + use token_shorthands::*; + Target::TokenizeSpanned(tokens!(@__spans [$($tts)*])) + }); + + (@p [$($tts:tt)*]) => ({ + use node_shorthands::*; + + let nodes = vec![$($tts)*].into_iter() + .map(|v| Spanned { v, span: Span::ZERO }) + .collect(); + + Target::Parse(SyntaxTree { nodes }) + }); + + (@ps [$($tts:tt)*]) => ({ + use node_shorthands::*; + Target::ParseSpanned(tokens!(@__spans [$($tts)*])) + }); + + (@__spans [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)),* $(,)?]) => ({ + vec![ + $(Spanned { v: $v, span: Span { start: Position { line: $sl, column: $sc }, end: Position { line: $el, column: $ec }, }}),* - ]) + ] }); } #[derive(Debug)] enum Target { - Tokenized(Vec>), - TokenizedSpanned(Vec>>), + Tokenize(Vec>), + TokenizeSpanned(Vec>>), + Parse(SyntaxTree), + ParseSpanned(SyntaxTree), } fn main() { @@ -75,6 +187,7 @@ fn main() { println!(" - Source: {:?}", src); println!(" - Expected: {:?}", expected); println!(" - Found: {:?}", found); + println!(); failed += 1; errors = true; @@ -98,14 +211,26 @@ fn main() { fn test_case(src: &str, target: Target) -> (bool, String, String) { match target { - Target::Tokenized(tokens) => { + Target::Tokenize(tokens) => { let found: Vec<_> = tokenize(src).map(Spanned::value).collect(); (found == tokens, format!("{:?}", tokens), format!("{:?}", found)) } - Target::TokenizedSpanned(tokens) => { + Target::TokenizeSpanned(tokens) => { let found: Vec<_> = tokenize(src).collect(); (found == tokens, format!("{:?}", tokens), format!("{:?}", found)) } + + Target::Parse(tree) => { + let scope = Scope::with_debug::(); + let (found, _, errs) = parse(src, ParseContext { scope: &scope }); + (compare(&tree, &found), format!("{:?}", tree), format!("{:?}", found)) + } + + Target::ParseSpanned(tree) => { + let scope = Scope::with_debug::(); + let (found, _, _) = parse(src, ParseContext { scope: &scope }); + (tree == found, format!("{:?}", tree), format!("{:?}", found)) + } } } diff --git a/tests/parsing/tokens.rs b/tests/parsing/tokens.rs index 78d891f97..14f4e521e 100644 --- a/tests/parsing/tokens.rs +++ b/tests/parsing/tokens.rs @@ -46,6 +46,12 @@ t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST] t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB] t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")] +// Nested functions. +t "[f: [=][*]]" => [LB, ID("f"), CL, W(0), LB, EQ, RB, LB, ST, RB, RB] +t "[_][[,],]," => [LB, T("_"), RB, LB, LB, CM, RB, T(","), RB, T(",")] +t "[=][=][=]" => [LB, EQ, RB, LB, T("="), RB, LB, EQ, RB] +t "[=][[=][=][=]]" => [LB, EQ, RB, LB, LB, EQ, RB, LB, T("="), RB, LB, EQ, RB, RB] + // Escapes. t r"\[" => [T("[")] t r"\]" => [T("]")] @@ -68,7 +74,7 @@ ts "ab\r\nc" => [(0:0, 0:2, T("ab")), (0:2, 1:0, W(1)), (1:0, 1:1, T("c" ts "[a=10]" => [(0:0, 0:1, LB), (0:1, 0:2, ID("a")), (0:2, 0:3, EQ), (0:3, 0:5, NUM(10.0)), (0:5, 0:6, RB)] ts r#"[x = "(1)"]*"# => [(0:0, 0:1, LB), (0:1, 0:2, ID("x")), (0:2, 0:3, W(0)), - (0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")), - (0:10, 0:11, RB), (0:11, 0:12, ST)] + (0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")), + (0:10, 0:11, RB), (0:11, 0:12, ST)] ts "// ab\r\n\nf" => [(0:0, 0:5, LC(" ab")), (0:5, 2:0, W(2)), (2:0, 2:1, T("f"))] ts "/*b*/_" => [(0:0, 0:5, BC("b")), (0:5, 0:6, U)] diff --git a/tests/parsing/trees.rs b/tests/parsing/trees.rs new file mode 100644 index 000000000..78b168286 --- /dev/null +++ b/tests/parsing/trees.rs @@ -0,0 +1,20 @@ +p "" => [] +p "hi" => [T("hi")] +p "hi you" => [T("hi"), S, T("you")] +p "❤\n\n 🌍" => [T("❤"), N, T("🌍")] +p "[func]" => [F!(None)] +p "[tree][hi *you*]" => [F!(Some([T("hi"), S, B, T("you"), B]))] +// p "from [align: left] to" => [ +// T("from"), S, +// F!("align", pos=[ID("left")], None), +// S, T("to"), +// ] +// p "[box: x=1.2pt, false][a b c] bye" => [ +// F!( +// "box", +// pos=[BOOL(false)], +// key=["x": SIZE(Size::pt(1.2))], +// Some([T("a"), S, T("b"), S, T("c")]), +// ), +// S, T("bye"), +// ]