From d514a05af1e7249412b3ecd257cd4673db3cd14b Mon Sep 17 00:00:00 2001 From: Laurenz Date: Mon, 29 Apr 2019 12:43:58 +0200 Subject: [PATCH] =?UTF-8?q?Make=20parse=20tokens=20more=20static=20and=20e?= =?UTF-8?q?fficient=20=F0=9F=97=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/func.rs | 25 +++++---- src/lib.rs | 5 +- src/parsing.rs | 141 +++++++++++++++++++++++++++++++------------------ 3 files changed, 109 insertions(+), 62 deletions(-) diff --git a/src/func.rs b/src/func.rs index d85d7a18d..d90cd85ea 100644 --- a/src/func.rs +++ b/src/func.rs @@ -4,16 +4,12 @@ use std::any::Any; use std::collections::HashMap; use std::fmt::Debug; -use crate::syntax::{Token, FuncHeader, Expression}; -use crate::parsing::ParseResult; +use crate::syntax::{FuncHeader, Expression}; +use crate::parsing::{ParseTokens, ParseResult}; -/// An optional iterator over the tokens of a function body. -pub type BodyTokens<'a> = Option> + 'a>>; - /// Parser functions. -pub type ParseFunc = dyn Fn(&FuncHeader, BodyTokens<'_>, &Scope) - -> ParseResult>; +pub type ParseFunc = dyn Fn(ParseContext) -> ParseResult>; /// Types that act as functions. /// @@ -24,8 +20,7 @@ pub type ParseFunc = dyn Fn(&FuncHeader, BodyTokens<'_>, &Scope) /// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`. pub trait Function: FunctionBounds { /// Parse the function. - fn parse(header: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope) - -> ParseResult where Self: Sized; + fn parse(context: ParseContext) -> ParseResult where Self: Sized; /// Execute the function and optionally yield a return value. fn typeset(&self, header: &FuncHeader) -> Option; @@ -46,7 +41,7 @@ impl Scope { pub fn add(&mut self, name: &str) { self.parsers.insert( name.to_owned(), - Box::new(|header, tokens, scope| match F::parse(header, tokens, scope) { + Box::new(|context| match F::parse(context) { Ok(func) => Ok(Box::new(func)), Err(err) => Err(err), }) @@ -59,6 +54,16 @@ impl Scope { } } +/// The context for parsing a function. +pub struct ParseContext<'s, 't> { + /// The header of the function to be parsed. + pub header: &'s FuncHeader, + /// Tokens if the function has a body, otherwise nothing. + pub tokens: Option<&'s mut ParseTokens<'t>>, + /// The current scope containing function definitions. + pub scope: &'s Scope, +} + /// A helper trait that describes requirements for types that can implement [`Function`]. /// /// Automatically implemented for all types which fulfill to the bounds diff --git a/src/lib.rs b/src/lib.rs index 423aeb8de..22440b26b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,7 +44,7 @@ //! ``` use crate::syntax::SyntaxTree; -use crate::parsing::{Tokens, Parser, ParseError}; +use crate::parsing::{Parser, ParseTokens, ParseError}; use crate::doc::Document; use crate::font::FontProvider; use crate::engine::{Engine, Style, TypesetError}; @@ -107,7 +107,8 @@ impl<'p> Compiler<'p> { /// Parse source code into a syntax tree. #[inline] pub fn parse(&self, src: &str) -> Result { - Parser::new(Tokens::new(src)).parse() + let mut tokens = ParseTokens::new(src); + Parser::new(&mut tokens).parse() } /// Compile a portable typesetted document from source code. diff --git a/src/parsing.rs b/src/parsing.rs index a2a39ee6d..718d895bf 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -7,7 +7,7 @@ use std::mem::swap; use std::ops::Deref; use crate::syntax::*; -use crate::func::Scope; +use crate::func::{ParseContext, Scope}; use crate::utility::{Splinor, Spline, Splined, StrExt}; use unicode_segmentation::{UnicodeSegmentation, UWordBounds}; @@ -209,8 +209,8 @@ impl<'s> Tokens<'s> { } /// Transforms token streams to syntax trees. -pub struct Parser<'s, T> where T: Iterator> { - tokens: Peekable, +pub struct Parser<'s, 't> { + tokens: &'s mut ParseTokens<'t>, scope: ParserScope<'s>, state: ParserState, tree: SyntaxTree, @@ -227,21 +227,21 @@ enum ParserState { WroteNewline, } -impl<'s, T> Parser<'s, T> where T: Iterator> { +impl<'s, 't> Parser<'s, 't> { /// Create a new parser from a type that emits results of tokens. - pub fn new(tokens: T) -> Parser<'s, T> { + pub fn new(tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> { Parser::new_internal(ParserScope::Owned(Scope::new()), tokens) } /// Create a new parser with a scope containing function definitions. - pub fn with_scope(scope: &'s Scope, tokens: T) -> Parser<'s, T> { + pub fn with_scope(scope: &'s Scope, tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> { Parser::new_internal(ParserScope::Shared(scope), tokens) } /// Internal helper for construction. - fn new_internal(scope: ParserScope<'s>, tokens: T) -> Parser<'s, T> { + fn new_internal(scope: ParserScope<'s>, tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> { Parser { - tokens: tokens.peekable(), + tokens, scope, state: ParserState::Body, tree: SyntaxTree::new(), @@ -341,19 +341,33 @@ impl<'s, T> Parser<'s, T> where T: Iterator> { let parser = self.scope.get_parser(&header.name) .ok_or_else(|| ParseError::new(format!("unknown function: '{}'", &header.name)))?; - // Do the parsing dependend on whether the function has a body. + // Do the parsing dependent on whether the function has a body. let body = if has_body { - let mut func_tokens = FuncTokens::new(&mut self.tokens); - let borrowed = Box::new(&mut func_tokens) as Box>>; + self.tokens.start(); - let body = parser(&header, Some(borrowed), &self.scope)?; - if func_tokens.unexpected_end { + println!("starting with: {:?}", self.tokens); + + let body = parser(ParseContext { + header: &header, + tokens: Some(&mut self.tokens), + scope: &self.scope, + })?; + + self.tokens.finish(); + println!("finished with: {:?}", self.tokens); + + // Now the body should be closed. + if self.tokens.next() != Some(Token::RightBracket) { return Err(ParseError::new("expected closing bracket")); } body } else { - parser(&header, None, &self.scope)? + parser(ParseContext { + header: &header, + tokens: None, + scope: &self.scope, + })? }; // Finally this function is parsed to the end. @@ -433,45 +447,71 @@ impl Deref for ParserScope<'_> { } } -/// A token iterator that that stops after the first unbalanced right paren. -pub struct FuncTokens<'s, T> where T: Iterator> { - tokens: T, - parens: u32, - unexpected_end: bool, +/// A token iterator that iterates over exactly one body. +#[derive(Debug)] +pub struct ParseTokens<'s> { + tokens: Peekable>, + parens: Vec, } -impl<'s, T> FuncTokens<'s, T> where T: Iterator> { +impl<'s> ParseTokens<'s> { + /// Create a new iterator over text. + #[inline] + pub fn new(source: &'s str) -> ParseTokens<'s> { + ParseTokens::from_tokens(Tokens::new(source)) + } + /// Create a new iterator operating over an existing one. - pub fn new(tokens: T) -> FuncTokens<'s, T> { - FuncTokens { - tokens, - parens: 0, - unexpected_end: false, + #[inline] + pub fn from_tokens(tokens: Tokens<'s>) -> ParseTokens<'s> { + ParseTokens { + tokens: tokens.peekable(), + parens: vec![], } } + + /// Peek at the next token. + #[inline] + pub fn peek(&mut self) -> Option<&Token<'s>> { + let token = self.tokens.peek(); + if token == Some(&Token::RightBracket) && self.parens.last() == Some(&0) { + return None; + } + token + } + + /// Start a new substream of tokens. + fn start(&mut self) { + self.parens.push(0); + } + + /// Finish a substream of tokens. + fn finish(&mut self) { + self.parens.pop().unwrap(); + } } -impl<'s, T> Iterator for FuncTokens<'s, T> where T: Iterator> { +impl<'s> Iterator for ParseTokens<'s> { type Item = Token<'s>; fn next(&mut self) -> Option> { let token = self.tokens.next(); match token { - Some(Token::RightBracket) if self.parens == 0 => None, Some(Token::RightBracket) => { - self.parens -= 1; - token + match self.parens.last_mut() { + Some(&mut 0) => return None, + Some(top) => *top -= 1, + None => {} + } }, Some(Token::LeftBracket) => { - self.parens += 1; - token + if let Some(top) = self.parens.last_mut() { + *top += 1; + } } - None => { - self.unexpected_end = true; - None - } - token => token, - } + _ => {} + }; + token } } @@ -610,7 +650,7 @@ mod token_tests { #[cfg(test)] mod parse_tests { use super::*; - use crate::func::{Function, Scope, BodyTokens}; + use crate::func::{Function, Scope}; use Node::{Space as S, Newline as N, Func as F}; #[allow(non_snake_case)] @@ -621,10 +661,9 @@ mod parse_tests { struct TreeFn(SyntaxTree); impl Function for TreeFn { - fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope) - -> ParseResult where Self: Sized { - if let Some(tokens) = tokens { - Parser::with_scope(scope, tokens).parse().map(|tree| TreeFn(tree)) + fn parse(context: ParseContext) -> ParseResult where Self: Sized { + if let Some(tokens) = context.tokens { + Parser::with_scope(context.scope, tokens).parse().map(|tree| TreeFn(tree)) } else { Err(ParseError::new("expected body for tree fn")) } @@ -637,9 +676,8 @@ mod parse_tests { struct BodylessFn; impl Function for BodylessFn { - fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, _: &Scope) - -> ParseResult where Self: Sized { - if tokens.is_none() { + fn parse(context: ParseContext) -> ParseResult where Self: Sized { + if context.tokens.is_none() { Ok(BodylessFn) } else { Err(ParseError::new("unexpected body for bodyless fn")) @@ -679,22 +717,26 @@ mod parse_tests { /// Test if the source code parses into the syntax tree. fn test(src: &str, tree: SyntaxTree) { - assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap(), tree); + let mut tokens = ParseTokens::new(src); + assert_eq!(Parser::new(&mut tokens).parse().unwrap(), tree); } /// Test with a scope containing function definitions. fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) { - assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap(), tree); + let mut tokens = ParseTokens::new(src); + assert_eq!(Parser::with_scope(scope, &mut tokens).parse().unwrap(), tree); } /// Test if the source parses into the error. fn test_err(src: &str, err: &str) { - assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap_err().message, err); + let mut tokens = ParseTokens::new(src); + assert_eq!(Parser::new(&mut tokens).parse().unwrap_err().message, err); } /// Test with a scope if the source parses into the error. fn test_err_scoped(scope: &Scope, src: &str, err: &str) { - assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap_err().message, err); + let mut tokens = ParseTokens::new(src); + assert_eq!(Parser::with_scope(scope, &mut tokens).parse().unwrap_err().message, err); } /// Parse the basic cases. @@ -771,7 +813,6 @@ mod parse_tests { let mut scope = Scope::new(); scope.add::("func"); scope.add::("bold"); - test_scoped(&scope, "[func] ⺐.", tree! [ F(func! { name => "func",