Make parse tokens more static and efficient 🗜

This commit is contained in:
Laurenz 2019-04-29 12:43:58 +02:00
parent 383d8365cf
commit d514a05af1
3 changed files with 109 additions and 62 deletions

View File

@ -4,16 +4,12 @@ use std::any::Any;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::Debug; use std::fmt::Debug;
use crate::syntax::{Token, FuncHeader, Expression}; use crate::syntax::{FuncHeader, Expression};
use crate::parsing::ParseResult; use crate::parsing::{ParseTokens, ParseResult};
/// An optional iterator over the tokens of a function body.
pub type BodyTokens<'a> = Option<Box<dyn Iterator<Item=Token<'a>> + 'a>>;
/// Parser functions. /// Parser functions.
pub type ParseFunc = dyn Fn(&FuncHeader, BodyTokens<'_>, &Scope) pub type ParseFunc = dyn Fn(ParseContext) -> ParseResult<Box<dyn Function>>;
-> ParseResult<Box<dyn Function>>;
/// Types that act as functions. /// Types that act as functions.
/// ///
@ -24,8 +20,7 @@ pub type ParseFunc = dyn Fn(&FuncHeader, BodyTokens<'_>, &Scope)
/// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`. /// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`.
pub trait Function: FunctionBounds { pub trait Function: FunctionBounds {
/// Parse the function. /// Parse the function.
fn parse(header: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope) fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized;
-> ParseResult<Self> where Self: Sized;
/// Execute the function and optionally yield a return value. /// Execute the function and optionally yield a return value.
fn typeset(&self, header: &FuncHeader) -> Option<Expression>; fn typeset(&self, header: &FuncHeader) -> Option<Expression>;
@ -46,7 +41,7 @@ impl Scope {
pub fn add<F: Function + 'static>(&mut self, name: &str) { pub fn add<F: Function + 'static>(&mut self, name: &str) {
self.parsers.insert( self.parsers.insert(
name.to_owned(), name.to_owned(),
Box::new(|header, tokens, scope| match F::parse(header, tokens, scope) { Box::new(|context| match F::parse(context) {
Ok(func) => Ok(Box::new(func)), Ok(func) => Ok(Box::new(func)),
Err(err) => Err(err), Err(err) => Err(err),
}) })
@ -59,6 +54,16 @@ impl Scope {
} }
} }
/// The context for parsing a function.
pub struct ParseContext<'s, 't> {
/// The header of the function to be parsed.
pub header: &'s FuncHeader,
/// Tokens if the function has a body, otherwise nothing.
pub tokens: Option<&'s mut ParseTokens<'t>>,
/// The current scope containing function definitions.
pub scope: &'s Scope,
}
/// A helper trait that describes requirements for types that can implement [`Function`]. /// A helper trait that describes requirements for types that can implement [`Function`].
/// ///
/// Automatically implemented for all types which fulfill to the bounds /// Automatically implemented for all types which fulfill to the bounds

View File

@ -44,7 +44,7 @@
//! ``` //! ```
use crate::syntax::SyntaxTree; use crate::syntax::SyntaxTree;
use crate::parsing::{Tokens, Parser, ParseError}; use crate::parsing::{Parser, ParseTokens, ParseError};
use crate::doc::Document; use crate::doc::Document;
use crate::font::FontProvider; use crate::font::FontProvider;
use crate::engine::{Engine, Style, TypesetError}; use crate::engine::{Engine, Style, TypesetError};
@ -107,7 +107,8 @@ impl<'p> Compiler<'p> {
/// Parse source code into a syntax tree. /// Parse source code into a syntax tree.
#[inline] #[inline]
pub fn parse(&self, src: &str) -> Result<SyntaxTree, ParseError> { pub fn parse(&self, src: &str) -> Result<SyntaxTree, ParseError> {
Parser::new(Tokens::new(src)).parse() let mut tokens = ParseTokens::new(src);
Parser::new(&mut tokens).parse()
} }
/// Compile a portable typesetted document from source code. /// Compile a portable typesetted document from source code.

View File

@ -7,7 +7,7 @@ use std::mem::swap;
use std::ops::Deref; use std::ops::Deref;
use crate::syntax::*; use crate::syntax::*;
use crate::func::Scope; use crate::func::{ParseContext, Scope};
use crate::utility::{Splinor, Spline, Splined, StrExt}; use crate::utility::{Splinor, Spline, Splined, StrExt};
use unicode_segmentation::{UnicodeSegmentation, UWordBounds}; use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
@ -209,8 +209,8 @@ impl<'s> Tokens<'s> {
} }
/// Transforms token streams to syntax trees. /// Transforms token streams to syntax trees.
pub struct Parser<'s, T> where T: Iterator<Item=Token<'s>> { pub struct Parser<'s, 't> {
tokens: Peekable<T>, tokens: &'s mut ParseTokens<'t>,
scope: ParserScope<'s>, scope: ParserScope<'s>,
state: ParserState, state: ParserState,
tree: SyntaxTree, tree: SyntaxTree,
@ -227,21 +227,21 @@ enum ParserState {
WroteNewline, WroteNewline,
} }
impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> { impl<'s, 't> Parser<'s, 't> {
/// Create a new parser from a type that emits results of tokens. /// Create a new parser from a type that emits results of tokens.
pub fn new(tokens: T) -> Parser<'s, T> { pub fn new(tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
Parser::new_internal(ParserScope::Owned(Scope::new()), tokens) Parser::new_internal(ParserScope::Owned(Scope::new()), tokens)
} }
/// Create a new parser with a scope containing function definitions. /// Create a new parser with a scope containing function definitions.
pub fn with_scope(scope: &'s Scope, tokens: T) -> Parser<'s, T> { pub fn with_scope(scope: &'s Scope, tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
Parser::new_internal(ParserScope::Shared(scope), tokens) Parser::new_internal(ParserScope::Shared(scope), tokens)
} }
/// Internal helper for construction. /// Internal helper for construction.
fn new_internal(scope: ParserScope<'s>, tokens: T) -> Parser<'s, T> { fn new_internal(scope: ParserScope<'s>, tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
Parser { Parser {
tokens: tokens.peekable(), tokens,
scope, scope,
state: ParserState::Body, state: ParserState::Body,
tree: SyntaxTree::new(), tree: SyntaxTree::new(),
@ -341,19 +341,33 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> {
let parser = self.scope.get_parser(&header.name) let parser = self.scope.get_parser(&header.name)
.ok_or_else(|| ParseError::new(format!("unknown function: '{}'", &header.name)))?; .ok_or_else(|| ParseError::new(format!("unknown function: '{}'", &header.name)))?;
// Do the parsing dependend on whether the function has a body. // Do the parsing dependent on whether the function has a body.
let body = if has_body { let body = if has_body {
let mut func_tokens = FuncTokens::new(&mut self.tokens); self.tokens.start();
let borrowed = Box::new(&mut func_tokens) as Box<dyn Iterator<Item=Token<'_>>>;
let body = parser(&header, Some(borrowed), &self.scope)?; println!("starting with: {:?}", self.tokens);
if func_tokens.unexpected_end {
let body = parser(ParseContext {
header: &header,
tokens: Some(&mut self.tokens),
scope: &self.scope,
})?;
self.tokens.finish();
println!("finished with: {:?}", self.tokens);
// Now the body should be closed.
if self.tokens.next() != Some(Token::RightBracket) {
return Err(ParseError::new("expected closing bracket")); return Err(ParseError::new("expected closing bracket"));
} }
body body
} else { } else {
parser(&header, None, &self.scope)? parser(ParseContext {
header: &header,
tokens: None,
scope: &self.scope,
})?
}; };
// Finally this function is parsed to the end. // Finally this function is parsed to the end.
@ -433,45 +447,71 @@ impl Deref for ParserScope<'_> {
} }
} }
/// A token iterator that that stops after the first unbalanced right paren. /// A token iterator that iterates over exactly one body.
pub struct FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> { #[derive(Debug)]
tokens: T, pub struct ParseTokens<'s> {
parens: u32, tokens: Peekable<Tokens<'s>>,
unexpected_end: bool, parens: Vec<u32>,
} }
impl<'s, T> FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> { impl<'s> ParseTokens<'s> {
/// Create a new iterator over text.
#[inline]
pub fn new(source: &'s str) -> ParseTokens<'s> {
ParseTokens::from_tokens(Tokens::new(source))
}
/// Create a new iterator operating over an existing one. /// Create a new iterator operating over an existing one.
pub fn new(tokens: T) -> FuncTokens<'s, T> { #[inline]
FuncTokens { pub fn from_tokens(tokens: Tokens<'s>) -> ParseTokens<'s> {
tokens, ParseTokens {
parens: 0, tokens: tokens.peekable(),
unexpected_end: false, parens: vec![],
} }
} }
/// Peek at the next token.
#[inline]
pub fn peek(&mut self) -> Option<&Token<'s>> {
let token = self.tokens.peek();
if token == Some(&Token::RightBracket) && self.parens.last() == Some(&0) {
return None;
}
token
}
/// Start a new substream of tokens.
fn start(&mut self) {
self.parens.push(0);
}
/// Finish a substream of tokens.
fn finish(&mut self) {
self.parens.pop().unwrap();
}
} }
impl<'s, T> Iterator for FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> { impl<'s> Iterator for ParseTokens<'s> {
type Item = Token<'s>; type Item = Token<'s>;
fn next(&mut self) -> Option<Token<'s>> { fn next(&mut self) -> Option<Token<'s>> {
let token = self.tokens.next(); let token = self.tokens.next();
match token { match token {
Some(Token::RightBracket) if self.parens == 0 => None,
Some(Token::RightBracket) => { Some(Token::RightBracket) => {
self.parens -= 1; match self.parens.last_mut() {
token Some(&mut 0) => return None,
Some(top) => *top -= 1,
None => {}
}
}, },
Some(Token::LeftBracket) => { Some(Token::LeftBracket) => {
self.parens += 1; if let Some(top) = self.parens.last_mut() {
token *top += 1;
}
} }
None => { _ => {}
self.unexpected_end = true; };
None token
}
token => token,
}
} }
} }
@ -610,7 +650,7 @@ mod token_tests {
#[cfg(test)] #[cfg(test)]
mod parse_tests { mod parse_tests {
use super::*; use super::*;
use crate::func::{Function, Scope, BodyTokens}; use crate::func::{Function, Scope};
use Node::{Space as S, Newline as N, Func as F}; use Node::{Space as S, Newline as N, Func as F};
#[allow(non_snake_case)] #[allow(non_snake_case)]
@ -621,10 +661,9 @@ mod parse_tests {
struct TreeFn(SyntaxTree); struct TreeFn(SyntaxTree);
impl Function for TreeFn { impl Function for TreeFn {
fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope) fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
-> ParseResult<Self> where Self: Sized { if let Some(tokens) = context.tokens {
if let Some(tokens) = tokens { Parser::with_scope(context.scope, tokens).parse().map(|tree| TreeFn(tree))
Parser::with_scope(scope, tokens).parse().map(|tree| TreeFn(tree))
} else { } else {
Err(ParseError::new("expected body for tree fn")) Err(ParseError::new("expected body for tree fn"))
} }
@ -637,9 +676,8 @@ mod parse_tests {
struct BodylessFn; struct BodylessFn;
impl Function for BodylessFn { impl Function for BodylessFn {
fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, _: &Scope) fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
-> ParseResult<Self> where Self: Sized { if context.tokens.is_none() {
if tokens.is_none() {
Ok(BodylessFn) Ok(BodylessFn)
} else { } else {
Err(ParseError::new("unexpected body for bodyless fn")) Err(ParseError::new("unexpected body for bodyless fn"))
@ -679,22 +717,26 @@ mod parse_tests {
/// Test if the source code parses into the syntax tree. /// Test if the source code parses into the syntax tree.
fn test(src: &str, tree: SyntaxTree) { fn test(src: &str, tree: SyntaxTree) {
assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap(), tree); let mut tokens = ParseTokens::new(src);
assert_eq!(Parser::new(&mut tokens).parse().unwrap(), tree);
} }
/// Test with a scope containing function definitions. /// Test with a scope containing function definitions.
fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) { fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap(), tree); let mut tokens = ParseTokens::new(src);
assert_eq!(Parser::with_scope(scope, &mut tokens).parse().unwrap(), tree);
} }
/// Test if the source parses into the error. /// Test if the source parses into the error.
fn test_err(src: &str, err: &str) { fn test_err(src: &str, err: &str) {
assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap_err().message, err); let mut tokens = ParseTokens::new(src);
assert_eq!(Parser::new(&mut tokens).parse().unwrap_err().message, err);
} }
/// Test with a scope if the source parses into the error. /// Test with a scope if the source parses into the error.
fn test_err_scoped(scope: &Scope, src: &str, err: &str) { fn test_err_scoped(scope: &Scope, src: &str, err: &str) {
assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap_err().message, err); let mut tokens = ParseTokens::new(src);
assert_eq!(Parser::with_scope(scope, &mut tokens).parse().unwrap_err().message, err);
} }
/// Parse the basic cases. /// Parse the basic cases.
@ -771,7 +813,6 @@ mod parse_tests {
let mut scope = Scope::new(); let mut scope = Scope::new();
scope.add::<BodylessFn>("func"); scope.add::<BodylessFn>("func");
scope.add::<TreeFn>("bold"); scope.add::<TreeFn>("bold");
test_scoped(&scope, "[func] ⺐.", tree! [ test_scoped(&scope, "[func] ⺐.", tree! [
F(func! { F(func! {
name => "func", name => "func",