mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Make parse tokens more static and efficient 🗜
This commit is contained in:
parent
383d8365cf
commit
d514a05af1
25
src/func.rs
25
src/func.rs
@ -4,16 +4,12 @@ use std::any::Any;
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
use crate::syntax::{Token, FuncHeader, Expression};
|
use crate::syntax::{FuncHeader, Expression};
|
||||||
use crate::parsing::ParseResult;
|
use crate::parsing::{ParseTokens, ParseResult};
|
||||||
|
|
||||||
|
|
||||||
/// An optional iterator over the tokens of a function body.
|
|
||||||
pub type BodyTokens<'a> = Option<Box<dyn Iterator<Item=Token<'a>> + 'a>>;
|
|
||||||
|
|
||||||
/// Parser functions.
|
/// Parser functions.
|
||||||
pub type ParseFunc = dyn Fn(&FuncHeader, BodyTokens<'_>, &Scope)
|
pub type ParseFunc = dyn Fn(ParseContext) -> ParseResult<Box<dyn Function>>;
|
||||||
-> ParseResult<Box<dyn Function>>;
|
|
||||||
|
|
||||||
/// Types that act as functions.
|
/// Types that act as functions.
|
||||||
///
|
///
|
||||||
@ -24,8 +20,7 @@ pub type ParseFunc = dyn Fn(&FuncHeader, BodyTokens<'_>, &Scope)
|
|||||||
/// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`.
|
/// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`.
|
||||||
pub trait Function: FunctionBounds {
|
pub trait Function: FunctionBounds {
|
||||||
/// Parse the function.
|
/// Parse the function.
|
||||||
fn parse(header: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope)
|
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized;
|
||||||
-> ParseResult<Self> where Self: Sized;
|
|
||||||
|
|
||||||
/// Execute the function and optionally yield a return value.
|
/// Execute the function and optionally yield a return value.
|
||||||
fn typeset(&self, header: &FuncHeader) -> Option<Expression>;
|
fn typeset(&self, header: &FuncHeader) -> Option<Expression>;
|
||||||
@ -46,7 +41,7 @@ impl Scope {
|
|||||||
pub fn add<F: Function + 'static>(&mut self, name: &str) {
|
pub fn add<F: Function + 'static>(&mut self, name: &str) {
|
||||||
self.parsers.insert(
|
self.parsers.insert(
|
||||||
name.to_owned(),
|
name.to_owned(),
|
||||||
Box::new(|header, tokens, scope| match F::parse(header, tokens, scope) {
|
Box::new(|context| match F::parse(context) {
|
||||||
Ok(func) => Ok(Box::new(func)),
|
Ok(func) => Ok(Box::new(func)),
|
||||||
Err(err) => Err(err),
|
Err(err) => Err(err),
|
||||||
})
|
})
|
||||||
@ -59,6 +54,16 @@ impl Scope {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The context for parsing a function.
|
||||||
|
pub struct ParseContext<'s, 't> {
|
||||||
|
/// The header of the function to be parsed.
|
||||||
|
pub header: &'s FuncHeader,
|
||||||
|
/// Tokens if the function has a body, otherwise nothing.
|
||||||
|
pub tokens: Option<&'s mut ParseTokens<'t>>,
|
||||||
|
/// The current scope containing function definitions.
|
||||||
|
pub scope: &'s Scope,
|
||||||
|
}
|
||||||
|
|
||||||
/// A helper trait that describes requirements for types that can implement [`Function`].
|
/// A helper trait that describes requirements for types that can implement [`Function`].
|
||||||
///
|
///
|
||||||
/// Automatically implemented for all types which fulfill to the bounds
|
/// Automatically implemented for all types which fulfill to the bounds
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
use crate::syntax::SyntaxTree;
|
use crate::syntax::SyntaxTree;
|
||||||
use crate::parsing::{Tokens, Parser, ParseError};
|
use crate::parsing::{Parser, ParseTokens, ParseError};
|
||||||
use crate::doc::Document;
|
use crate::doc::Document;
|
||||||
use crate::font::FontProvider;
|
use crate::font::FontProvider;
|
||||||
use crate::engine::{Engine, Style, TypesetError};
|
use crate::engine::{Engine, Style, TypesetError};
|
||||||
@ -107,7 +107,8 @@ impl<'p> Compiler<'p> {
|
|||||||
/// Parse source code into a syntax tree.
|
/// Parse source code into a syntax tree.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn parse(&self, src: &str) -> Result<SyntaxTree, ParseError> {
|
pub fn parse(&self, src: &str) -> Result<SyntaxTree, ParseError> {
|
||||||
Parser::new(Tokens::new(src)).parse()
|
let mut tokens = ParseTokens::new(src);
|
||||||
|
Parser::new(&mut tokens).parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compile a portable typesetted document from source code.
|
/// Compile a portable typesetted document from source code.
|
||||||
|
141
src/parsing.rs
141
src/parsing.rs
@ -7,7 +7,7 @@ use std::mem::swap;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use crate::syntax::*;
|
use crate::syntax::*;
|
||||||
use crate::func::Scope;
|
use crate::func::{ParseContext, Scope};
|
||||||
use crate::utility::{Splinor, Spline, Splined, StrExt};
|
use crate::utility::{Splinor, Spline, Splined, StrExt};
|
||||||
|
|
||||||
use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
|
use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
|
||||||
@ -209,8 +209,8 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Transforms token streams to syntax trees.
|
/// Transforms token streams to syntax trees.
|
||||||
pub struct Parser<'s, T> where T: Iterator<Item=Token<'s>> {
|
pub struct Parser<'s, 't> {
|
||||||
tokens: Peekable<T>,
|
tokens: &'s mut ParseTokens<'t>,
|
||||||
scope: ParserScope<'s>,
|
scope: ParserScope<'s>,
|
||||||
state: ParserState,
|
state: ParserState,
|
||||||
tree: SyntaxTree,
|
tree: SyntaxTree,
|
||||||
@ -227,21 +227,21 @@ enum ParserState {
|
|||||||
WroteNewline,
|
WroteNewline,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> {
|
impl<'s, 't> Parser<'s, 't> {
|
||||||
/// Create a new parser from a type that emits results of tokens.
|
/// Create a new parser from a type that emits results of tokens.
|
||||||
pub fn new(tokens: T) -> Parser<'s, T> {
|
pub fn new(tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
|
||||||
Parser::new_internal(ParserScope::Owned(Scope::new()), tokens)
|
Parser::new_internal(ParserScope::Owned(Scope::new()), tokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new parser with a scope containing function definitions.
|
/// Create a new parser with a scope containing function definitions.
|
||||||
pub fn with_scope(scope: &'s Scope, tokens: T) -> Parser<'s, T> {
|
pub fn with_scope(scope: &'s Scope, tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
|
||||||
Parser::new_internal(ParserScope::Shared(scope), tokens)
|
Parser::new_internal(ParserScope::Shared(scope), tokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Internal helper for construction.
|
/// Internal helper for construction.
|
||||||
fn new_internal(scope: ParserScope<'s>, tokens: T) -> Parser<'s, T> {
|
fn new_internal(scope: ParserScope<'s>, tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
|
||||||
Parser {
|
Parser {
|
||||||
tokens: tokens.peekable(),
|
tokens,
|
||||||
scope,
|
scope,
|
||||||
state: ParserState::Body,
|
state: ParserState::Body,
|
||||||
tree: SyntaxTree::new(),
|
tree: SyntaxTree::new(),
|
||||||
@ -341,19 +341,33 @@ impl<'s, T> Parser<'s, T> where T: Iterator<Item=Token<'s>> {
|
|||||||
let parser = self.scope.get_parser(&header.name)
|
let parser = self.scope.get_parser(&header.name)
|
||||||
.ok_or_else(|| ParseError::new(format!("unknown function: '{}'", &header.name)))?;
|
.ok_or_else(|| ParseError::new(format!("unknown function: '{}'", &header.name)))?;
|
||||||
|
|
||||||
// Do the parsing dependend on whether the function has a body.
|
// Do the parsing dependent on whether the function has a body.
|
||||||
let body = if has_body {
|
let body = if has_body {
|
||||||
let mut func_tokens = FuncTokens::new(&mut self.tokens);
|
self.tokens.start();
|
||||||
let borrowed = Box::new(&mut func_tokens) as Box<dyn Iterator<Item=Token<'_>>>;
|
|
||||||
|
|
||||||
let body = parser(&header, Some(borrowed), &self.scope)?;
|
println!("starting with: {:?}", self.tokens);
|
||||||
if func_tokens.unexpected_end {
|
|
||||||
|
let body = parser(ParseContext {
|
||||||
|
header: &header,
|
||||||
|
tokens: Some(&mut self.tokens),
|
||||||
|
scope: &self.scope,
|
||||||
|
})?;
|
||||||
|
|
||||||
|
self.tokens.finish();
|
||||||
|
println!("finished with: {:?}", self.tokens);
|
||||||
|
|
||||||
|
// Now the body should be closed.
|
||||||
|
if self.tokens.next() != Some(Token::RightBracket) {
|
||||||
return Err(ParseError::new("expected closing bracket"));
|
return Err(ParseError::new("expected closing bracket"));
|
||||||
}
|
}
|
||||||
|
|
||||||
body
|
body
|
||||||
} else {
|
} else {
|
||||||
parser(&header, None, &self.scope)?
|
parser(ParseContext {
|
||||||
|
header: &header,
|
||||||
|
tokens: None,
|
||||||
|
scope: &self.scope,
|
||||||
|
})?
|
||||||
};
|
};
|
||||||
|
|
||||||
// Finally this function is parsed to the end.
|
// Finally this function is parsed to the end.
|
||||||
@ -433,45 +447,71 @@ impl Deref for ParserScope<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A token iterator that that stops after the first unbalanced right paren.
|
/// A token iterator that iterates over exactly one body.
|
||||||
pub struct FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> {
|
#[derive(Debug)]
|
||||||
tokens: T,
|
pub struct ParseTokens<'s> {
|
||||||
parens: u32,
|
tokens: Peekable<Tokens<'s>>,
|
||||||
unexpected_end: bool,
|
parens: Vec<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, T> FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> {
|
impl<'s> ParseTokens<'s> {
|
||||||
|
/// Create a new iterator over text.
|
||||||
|
#[inline]
|
||||||
|
pub fn new(source: &'s str) -> ParseTokens<'s> {
|
||||||
|
ParseTokens::from_tokens(Tokens::new(source))
|
||||||
|
}
|
||||||
|
|
||||||
/// Create a new iterator operating over an existing one.
|
/// Create a new iterator operating over an existing one.
|
||||||
pub fn new(tokens: T) -> FuncTokens<'s, T> {
|
#[inline]
|
||||||
FuncTokens {
|
pub fn from_tokens(tokens: Tokens<'s>) -> ParseTokens<'s> {
|
||||||
tokens,
|
ParseTokens {
|
||||||
parens: 0,
|
tokens: tokens.peekable(),
|
||||||
unexpected_end: false,
|
parens: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Peek at the next token.
|
||||||
|
#[inline]
|
||||||
|
pub fn peek(&mut self) -> Option<&Token<'s>> {
|
||||||
|
let token = self.tokens.peek();
|
||||||
|
if token == Some(&Token::RightBracket) && self.parens.last() == Some(&0) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
token
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start a new substream of tokens.
|
||||||
|
fn start(&mut self) {
|
||||||
|
self.parens.push(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finish a substream of tokens.
|
||||||
|
fn finish(&mut self) {
|
||||||
|
self.parens.pop().unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, T> Iterator for FuncTokens<'s, T> where T: Iterator<Item=Token<'s>> {
|
impl<'s> Iterator for ParseTokens<'s> {
|
||||||
type Item = Token<'s>;
|
type Item = Token<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Token<'s>> {
|
fn next(&mut self) -> Option<Token<'s>> {
|
||||||
let token = self.tokens.next();
|
let token = self.tokens.next();
|
||||||
match token {
|
match token {
|
||||||
Some(Token::RightBracket) if self.parens == 0 => None,
|
|
||||||
Some(Token::RightBracket) => {
|
Some(Token::RightBracket) => {
|
||||||
self.parens -= 1;
|
match self.parens.last_mut() {
|
||||||
token
|
Some(&mut 0) => return None,
|
||||||
|
Some(top) => *top -= 1,
|
||||||
|
None => {}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Some(Token::LeftBracket) => {
|
Some(Token::LeftBracket) => {
|
||||||
self.parens += 1;
|
if let Some(top) = self.parens.last_mut() {
|
||||||
token
|
*top += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
None => {
|
_ => {}
|
||||||
self.unexpected_end = true;
|
};
|
||||||
None
|
token
|
||||||
}
|
|
||||||
token => token,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -610,7 +650,7 @@ mod token_tests {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod parse_tests {
|
mod parse_tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::func::{Function, Scope, BodyTokens};
|
use crate::func::{Function, Scope};
|
||||||
use Node::{Space as S, Newline as N, Func as F};
|
use Node::{Space as S, Newline as N, Func as F};
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
@ -621,10 +661,9 @@ mod parse_tests {
|
|||||||
struct TreeFn(SyntaxTree);
|
struct TreeFn(SyntaxTree);
|
||||||
|
|
||||||
impl Function for TreeFn {
|
impl Function for TreeFn {
|
||||||
fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, scope: &Scope)
|
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
||||||
-> ParseResult<Self> where Self: Sized {
|
if let Some(tokens) = context.tokens {
|
||||||
if let Some(tokens) = tokens {
|
Parser::with_scope(context.scope, tokens).parse().map(|tree| TreeFn(tree))
|
||||||
Parser::with_scope(scope, tokens).parse().map(|tree| TreeFn(tree))
|
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("expected body for tree fn"))
|
Err(ParseError::new("expected body for tree fn"))
|
||||||
}
|
}
|
||||||
@ -637,9 +676,8 @@ mod parse_tests {
|
|||||||
struct BodylessFn;
|
struct BodylessFn;
|
||||||
|
|
||||||
impl Function for BodylessFn {
|
impl Function for BodylessFn {
|
||||||
fn parse(_: &FuncHeader, tokens: BodyTokens<'_>, _: &Scope)
|
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
||||||
-> ParseResult<Self> where Self: Sized {
|
if context.tokens.is_none() {
|
||||||
if tokens.is_none() {
|
|
||||||
Ok(BodylessFn)
|
Ok(BodylessFn)
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("unexpected body for bodyless fn"))
|
Err(ParseError::new("unexpected body for bodyless fn"))
|
||||||
@ -679,22 +717,26 @@ mod parse_tests {
|
|||||||
|
|
||||||
/// Test if the source code parses into the syntax tree.
|
/// Test if the source code parses into the syntax tree.
|
||||||
fn test(src: &str, tree: SyntaxTree) {
|
fn test(src: &str, tree: SyntaxTree) {
|
||||||
assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap(), tree);
|
let mut tokens = ParseTokens::new(src);
|
||||||
|
assert_eq!(Parser::new(&mut tokens).parse().unwrap(), tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test with a scope containing function definitions.
|
/// Test with a scope containing function definitions.
|
||||||
fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
|
fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
|
||||||
assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap(), tree);
|
let mut tokens = ParseTokens::new(src);
|
||||||
|
assert_eq!(Parser::with_scope(scope, &mut tokens).parse().unwrap(), tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test if the source parses into the error.
|
/// Test if the source parses into the error.
|
||||||
fn test_err(src: &str, err: &str) {
|
fn test_err(src: &str, err: &str) {
|
||||||
assert_eq!(Parser::new(Tokens::new(src)).parse().unwrap_err().message, err);
|
let mut tokens = ParseTokens::new(src);
|
||||||
|
assert_eq!(Parser::new(&mut tokens).parse().unwrap_err().message, err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test with a scope if the source parses into the error.
|
/// Test with a scope if the source parses into the error.
|
||||||
fn test_err_scoped(scope: &Scope, src: &str, err: &str) {
|
fn test_err_scoped(scope: &Scope, src: &str, err: &str) {
|
||||||
assert_eq!(Parser::with_scope(scope, Tokens::new(src)).parse().unwrap_err().message, err);
|
let mut tokens = ParseTokens::new(src);
|
||||||
|
assert_eq!(Parser::with_scope(scope, &mut tokens).parse().unwrap_err().message, err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the basic cases.
|
/// Parse the basic cases.
|
||||||
@ -771,7 +813,6 @@ mod parse_tests {
|
|||||||
let mut scope = Scope::new();
|
let mut scope = Scope::new();
|
||||||
scope.add::<BodylessFn>("func");
|
scope.add::<BodylessFn>("func");
|
||||||
scope.add::<TreeFn>("bold");
|
scope.add::<TreeFn>("bold");
|
||||||
|
|
||||||
test_scoped(&scope, "[func] ⺐.", tree! [
|
test_scoped(&scope, "[func] ⺐.", tree! [
|
||||||
F(func! {
|
F(func! {
|
||||||
name => "func",
|
name => "func",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user