mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Simplify the parsing model 🔋
This commit is contained in:
parent
27947e212c
commit
37c336063b
85
src/func.rs
85
src/func.rs
@ -5,12 +5,9 @@ use std::collections::HashMap;
|
|||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
|
|
||||||
use crate::syntax::{FuncHeader, Expression};
|
use crate::syntax::{FuncHeader, Expression};
|
||||||
use crate::parsing::{BodyTokens, ParseResult};
|
use crate::parsing::{FuncContext, ParseResult};
|
||||||
|
|
||||||
|
|
||||||
/// A function which transforms a parsing context into a boxed function.
|
|
||||||
type ParseFunc = dyn Fn(ParseContext) -> ParseResult<Box<dyn Function>>;
|
|
||||||
|
|
||||||
/// Types that act as functions.
|
/// Types that act as functions.
|
||||||
///
|
///
|
||||||
/// These types have to be able to parse tokens into themselves and store the
|
/// These types have to be able to parse tokens into themselves and store the
|
||||||
@ -20,17 +17,52 @@ type ParseFunc = dyn Fn(ParseContext) -> ParseResult<Box<dyn Function>>;
|
|||||||
/// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`.
|
/// used as functions, that is they fulfill the bounds `Debug + PartialEq + 'static`.
|
||||||
pub trait Function: FunctionBounds {
|
pub trait Function: FunctionBounds {
|
||||||
/// Parse the tokens of the context with the given header and scope into self.
|
/// Parse the tokens of the context with the given header and scope into self.
|
||||||
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized;
|
fn parse(context: FuncContext) -> ParseResult<Self> where Self: Sized;
|
||||||
|
|
||||||
/// Execute the function and optionally yield a return value.
|
/// Execute the function and optionally yield a return value.
|
||||||
fn typeset(&self, header: &FuncHeader) -> Option<Expression>;
|
fn typeset(&self, header: &FuncHeader) -> Option<Expression>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl PartialEq for dyn Function {
|
||||||
|
fn eq(&self, other: &dyn Function) -> bool {
|
||||||
|
self.help_eq(other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A helper trait that describes requirements for types that can implement [`Function`].
|
||||||
|
///
|
||||||
|
/// Automatically implemented for all types which fulfill to the bounds
|
||||||
|
/// `Debug + PartialEq + 'static`. There should be no need to implement this manually.
|
||||||
|
pub trait FunctionBounds: Debug {
|
||||||
|
/// Cast self into `Any`.
|
||||||
|
fn help_cast_as_any(&self) -> &dyn Any;
|
||||||
|
|
||||||
|
/// Compare self with another function.
|
||||||
|
fn help_eq(&self, other: &dyn Function) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> FunctionBounds for T where T: Debug + PartialEq + 'static {
|
||||||
|
fn help_cast_as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn help_eq(&self, other: &dyn Function) -> bool {
|
||||||
|
if let Some(other) = other.help_cast_as_any().downcast_ref::<Self>() {
|
||||||
|
self == other
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A map from identifiers to functions.
|
/// A map from identifiers to functions.
|
||||||
pub struct Scope {
|
pub struct Scope {
|
||||||
parsers: HashMap<String, Box<ParseFunc>>,
|
parsers: HashMap<String, Box<ParseFunc>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A function which transforms a parsing context into a boxed function.
|
||||||
|
type ParseFunc = dyn Fn(FuncContext) -> ParseResult<Box<dyn Function>>;
|
||||||
|
|
||||||
impl Scope {
|
impl Scope {
|
||||||
/// Create a new empty scope.
|
/// Create a new empty scope.
|
||||||
pub fn new() -> Scope {
|
pub fn new() -> Scope {
|
||||||
@ -59,46 +91,3 @@ impl Debug for Scope {
|
|||||||
write!(f, "{:?}", self.parsers.keys())
|
write!(f, "{:?}", self.parsers.keys())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The context for parsing a function.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct ParseContext<'s, 't> {
|
|
||||||
/// The header of the function to be parsed.
|
|
||||||
pub header: &'s FuncHeader,
|
|
||||||
/// Tokens if the function has a body, otherwise nothing.
|
|
||||||
pub tokens: Option<&'s mut BodyTokens<'t>>,
|
|
||||||
/// The current scope containing function definitions.
|
|
||||||
pub scope: &'s Scope,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A helper trait that describes requirements for types that can implement [`Function`].
|
|
||||||
///
|
|
||||||
/// Automatically implemented for all types which fulfill to the bounds
|
|
||||||
/// `Debug + PartialEq + 'static`. There should be no need to implement this manually.
|
|
||||||
pub trait FunctionBounds: Debug {
|
|
||||||
/// Cast self into `Any`.
|
|
||||||
fn help_cast_as_any(&self) -> &dyn Any;
|
|
||||||
|
|
||||||
/// Compare self with another function.
|
|
||||||
fn help_eq(&self, other: &dyn Function) -> bool;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> FunctionBounds for T where T: Debug + PartialEq + 'static {
|
|
||||||
fn help_cast_as_any(&self) -> &dyn Any {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
fn help_eq(&self, other: &dyn Function) -> bool {
|
|
||||||
if let Some(other) = other.help_cast_as_any().downcast_ref::<Self>() {
|
|
||||||
self == other
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialEq for dyn Function {
|
|
||||||
fn eq(&self, other: &dyn Function) -> bool {
|
|
||||||
self.help_eq(other)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -49,7 +49,7 @@ use crate::doc::Document;
|
|||||||
use crate::engine::{Engine, Style, TypesetError};
|
use crate::engine::{Engine, Style, TypesetError};
|
||||||
use crate::func::Scope;
|
use crate::func::Scope;
|
||||||
use crate::font::FontProvider;
|
use crate::font::FontProvider;
|
||||||
use crate::parsing::{Parser, BodyTokens, ParseResult, ParseError};
|
use crate::parsing::{parse, ParseResult, ParseError};
|
||||||
use crate::syntax::SyntaxTree;
|
use crate::syntax::SyntaxTree;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
@ -99,8 +99,7 @@ impl<'p> Compiler<'p> {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn parse(&self, src: &str) -> ParseResult<SyntaxTree> {
|
pub fn parse(&self, src: &str) -> ParseResult<SyntaxTree> {
|
||||||
let scope = Scope::new();
|
let scope = Scope::new();
|
||||||
let mut tokens = BodyTokens::new(src);
|
parse(src, &scope)
|
||||||
Parser::new(&mut tokens, &scope).parse()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compile a portable typesetted document from source code.
|
/// Compile a portable typesetted document from source code.
|
||||||
|
295
src/parsing.rs
295
src/parsing.rs
@ -1,23 +1,29 @@
|
|||||||
//! Tokenization and parsing of source code into syntax trees.
|
//! Tokenization and parsing of source code into syntax trees.
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::iter::Peekable;
|
|
||||||
use std::mem::swap;
|
use std::mem::swap;
|
||||||
use std::str::CharIndices;
|
use std::str::CharIndices;
|
||||||
|
|
||||||
|
use smallvec::SmallVec;
|
||||||
use unicode_xid::UnicodeXID;
|
use unicode_xid::UnicodeXID;
|
||||||
|
|
||||||
use crate::syntax::*;
|
use crate::syntax::*;
|
||||||
use crate::func::{ParseContext, Scope};
|
use crate::func::Scope;
|
||||||
|
|
||||||
|
|
||||||
|
/// Builds an iterator over the tokens of the source code.
|
||||||
|
#[inline]
|
||||||
|
pub fn tokenize(src: &str) -> Tokens {
|
||||||
|
Tokens::new(src)
|
||||||
|
}
|
||||||
|
|
||||||
/// An iterator over the tokens of source code.
|
/// An iterator over the tokens of source code.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Tokens<'s> {
|
pub struct Tokens<'s> {
|
||||||
source: &'s str,
|
src: &'s str,
|
||||||
chars: PeekableChars<'s>,
|
chars: PeekableChars<'s>,
|
||||||
state: TokensState,
|
state: TokensState,
|
||||||
stack: Vec<TokensState>,
|
stack: SmallVec<[TokensState; 1]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The state the tokenizer is in.
|
/// The state the tokenizer is in.
|
||||||
@ -33,14 +39,13 @@ enum TokensState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Tokens<'s> {
|
impl<'s> Tokens<'s> {
|
||||||
/// Create a new token stream from text.
|
/// Create a new token stream from source code.
|
||||||
#[inline]
|
fn new(src: &'s str) -> Tokens<'s> {
|
||||||
pub fn new(source: &'s str) -> Tokens<'s> {
|
|
||||||
Tokens {
|
Tokens {
|
||||||
source,
|
src,
|
||||||
chars: PeekableChars::new(source),
|
chars: PeekableChars::new(src),
|
||||||
state: TokensState::Body,
|
state: TokensState::Body,
|
||||||
stack: vec![],
|
stack: SmallVec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,7 +73,7 @@ impl<'s> Tokens<'s> {
|
|||||||
|
|
||||||
/// Returns a word containing the string bounded by the given indices.
|
/// Returns a word containing the string bounded by the given indices.
|
||||||
fn text(&self, start: usize, end: usize) -> Token<'s> {
|
fn text(&self, start: usize, end: usize) -> Token<'s> {
|
||||||
Token::Text(&self.source[start .. end])
|
Token::Text(&self.src[start .. end])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,6 +107,8 @@ impl<'s> Iterator for Tokens<'s> {
|
|||||||
']' => {
|
']' => {
|
||||||
if self.state == TS::Function {
|
if self.state == TS::Function {
|
||||||
self.state = TS::MaybeBody;
|
self.state = TS::MaybeBody;
|
||||||
|
} else {
|
||||||
|
self.unswitch();
|
||||||
}
|
}
|
||||||
Token::RightBracket
|
Token::RightBracket
|
||||||
},
|
},
|
||||||
@ -197,6 +204,8 @@ fn is_newline_char(character: char) -> bool {
|
|||||||
/// A index + char iterator with double lookahead.
|
/// A index + char iterator with double lookahead.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct PeekableChars<'s> {
|
struct PeekableChars<'s> {
|
||||||
|
offset: usize,
|
||||||
|
string: &'s str,
|
||||||
chars: CharIndices<'s>,
|
chars: CharIndices<'s>,
|
||||||
peek1: Option<Option<(usize, char)>>,
|
peek1: Option<Option<(usize, char)>>,
|
||||||
peek2: Option<Option<(usize, char)>>,
|
peek2: Option<Option<(usize, char)>>,
|
||||||
@ -206,6 +215,8 @@ impl<'s> PeekableChars<'s> {
|
|||||||
/// Create a new iterator from a string.
|
/// Create a new iterator from a string.
|
||||||
fn new(string: &'s str) -> PeekableChars<'s> {
|
fn new(string: &'s str) -> PeekableChars<'s> {
|
||||||
PeekableChars {
|
PeekableChars {
|
||||||
|
offset: 0,
|
||||||
|
string,
|
||||||
chars: string.char_indices(),
|
chars: string.char_indices(),
|
||||||
peek1: None,
|
peek1: None,
|
||||||
peek2: None,
|
peek2: None,
|
||||||
@ -214,8 +225,14 @@ impl<'s> PeekableChars<'s> {
|
|||||||
|
|
||||||
/// Peek at the next element.
|
/// Peek at the next element.
|
||||||
fn peek(&mut self) -> Option<(usize, char)> {
|
fn peek(&mut self) -> Option<(usize, char)> {
|
||||||
let iter = &mut self.chars;
|
match self.peek1 {
|
||||||
*self.peek1.get_or_insert_with(|| iter.next())
|
Some(peeked) => peeked,
|
||||||
|
None => {
|
||||||
|
let next = self.next_inner();
|
||||||
|
self.peek1 = Some(next);
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peek at the element after the next element.
|
/// Peek at the element after the next element.
|
||||||
@ -224,12 +241,30 @@ impl<'s> PeekableChars<'s> {
|
|||||||
Some(peeked) => peeked,
|
Some(peeked) => peeked,
|
||||||
None => {
|
None => {
|
||||||
self.peek();
|
self.peek();
|
||||||
let next = self.chars.next();
|
let next = self.next_inner();
|
||||||
self.peek2 = Some(next);
|
self.peek2 = Some(next);
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the next value of the inner iterator mapped with the offset.
|
||||||
|
fn next_inner(&mut self) -> Option<(usize, char)> {
|
||||||
|
self.chars.next().map(|(i, c)| (i + self.offset, c))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The index of the first character of the next token in the source string.
|
||||||
|
fn current_index(&mut self) -> Option<usize> {
|
||||||
|
self.peek().map(|p| p.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Go to a new position in the underlying string.
|
||||||
|
fn goto(&mut self, index: usize) {
|
||||||
|
self.offset = index;
|
||||||
|
self.chars = self.string[index..].char_indices();
|
||||||
|
self.peek1 = None;
|
||||||
|
self.peek2 = None;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for PeekableChars<'_> {
|
impl Iterator for PeekableChars<'_> {
|
||||||
@ -241,14 +276,21 @@ impl Iterator for PeekableChars<'_> {
|
|||||||
self.peek1 = self.peek2.take();
|
self.peek1 = self.peek2.take();
|
||||||
value
|
value
|
||||||
},
|
},
|
||||||
None => self.chars.next(),
|
None => self.next_inner(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses source code into a syntax tree using function definitions from a scope.
|
||||||
|
#[inline]
|
||||||
|
pub fn parse(src: &str, scope: &Scope) -> ParseResult<SyntaxTree> {
|
||||||
|
Parser::new(src, scope).parse()
|
||||||
|
}
|
||||||
|
|
||||||
/// Transforms token streams to syntax trees.
|
/// Transforms token streams to syntax trees.
|
||||||
pub struct Parser<'s, 't> {
|
struct Parser<'s> {
|
||||||
tokens: &'s mut BodyTokens<'t>,
|
src: &'s str,
|
||||||
|
tokens: PeekableTokens<'s>,
|
||||||
scope: &'s Scope,
|
scope: &'s Scope,
|
||||||
state: ParserState,
|
state: ParserState,
|
||||||
tree: SyntaxTree,
|
tree: SyntaxTree,
|
||||||
@ -265,12 +307,12 @@ enum ParserState {
|
|||||||
WroteNewline,
|
WroteNewline,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, 't> Parser<'s, 't> {
|
impl<'s> Parser<'s> {
|
||||||
/// Create a new parser from a stream of tokens and a scope of functions.
|
/// Create a new parser from a stream of tokens and a scope of functions.
|
||||||
#[inline]
|
fn new(src: &'s str, scope: &'s Scope) -> Parser<'s> {
|
||||||
pub fn new(tokens: &'s mut BodyTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> {
|
|
||||||
Parser {
|
Parser {
|
||||||
tokens,
|
src,
|
||||||
|
tokens: PeekableTokens::new(tokenize(src)),
|
||||||
scope,
|
scope,
|
||||||
state: ParserState::Body,
|
state: ParserState::Body,
|
||||||
tree: SyntaxTree::new(),
|
tree: SyntaxTree::new(),
|
||||||
@ -278,13 +320,13 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the source into an abstract syntax tree.
|
/// Parse the source into an abstract syntax tree.
|
||||||
pub fn parse(mut self) -> ParseResult<SyntaxTree> {
|
fn parse(mut self) -> ParseResult<SyntaxTree> {
|
||||||
use ParserState as PS;
|
use ParserState as PS;
|
||||||
|
|
||||||
while let Some(&token) = self.tokens.peek() {
|
while let Some(token) = self.tokens.peek() {
|
||||||
// Skip over comments.
|
// Skip over comments.
|
||||||
if token == Token::Hashtag {
|
if token == Token::Hashtag {
|
||||||
self.skip_while(|&t| t != Token::Newline);
|
self.skip_while(|t| t != Token::Newline);
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,7 +383,7 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
/// Parse a function from the current position.
|
/// Parse a function from the current position.
|
||||||
fn parse_function(&mut self) -> ParseResult<()> {
|
fn parse_function(&mut self) -> ParseResult<()> {
|
||||||
// This should only be called if a left bracket was seen.
|
// This should only be called if a left bracket was seen.
|
||||||
debug_assert!(self.tokens.next() == Some(Token::LeftBracket));
|
assert!(self.tokens.next() == Some(Token::LeftBracket));
|
||||||
|
|
||||||
// The next token should be the name of the function.
|
// The next token should be the name of the function.
|
||||||
let name = match self.tokens.next() {
|
let name = match self.tokens.next() {
|
||||||
@ -368,7 +410,7 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Whether the function has a body.
|
// Whether the function has a body.
|
||||||
let has_body = self.tokens.peek() == Some(&Token::LeftBracket);
|
let has_body = self.tokens.peek() == Some(Token::LeftBracket);
|
||||||
if has_body {
|
if has_body {
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
@ -379,26 +421,31 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
|
|
||||||
// Do the parsing dependent on whether the function has a body.
|
// Do the parsing dependent on whether the function has a body.
|
||||||
let body = if has_body {
|
let body = if has_body {
|
||||||
self.tokens.start();
|
// Find out the string which makes the body of this function.
|
||||||
|
let (start, end) = self.tokens.current_index().and_then(|index| {
|
||||||
|
find_closing_bracket(&self.src[index..])
|
||||||
|
.map(|end| (index, index + end))
|
||||||
|
}).ok_or_else(|| ParseError::new("expected closing bracket"))?;
|
||||||
|
|
||||||
let body = parser(ParseContext {
|
// Parse the body.
|
||||||
|
let body_string = &self.src[start .. end];
|
||||||
|
let body = parser(FuncContext {
|
||||||
header: &header,
|
header: &header,
|
||||||
tokens: Some(&mut self.tokens),
|
body: Some(body_string),
|
||||||
scope: &self.scope,
|
scope: &self.scope,
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
self.tokens.finish();
|
// Skip to the end of the function in the token stream.
|
||||||
|
self.tokens.goto(end);
|
||||||
|
|
||||||
// Now the body should be closed.
|
// Now the body should be closed.
|
||||||
if self.tokens.next() != Some(Token::RightBracket) {
|
assert!(self.tokens.next() == Some(Token::RightBracket));
|
||||||
return Err(ParseError::new("expected closing bracket"));
|
|
||||||
}
|
|
||||||
|
|
||||||
body
|
body
|
||||||
} else {
|
} else {
|
||||||
parser(ParseContext {
|
parser(FuncContext {
|
||||||
header: &header,
|
header: &header,
|
||||||
tokens: None,
|
body: None,
|
||||||
scope: &self.scope,
|
scope: &self.scope,
|
||||||
})?
|
})?
|
||||||
};
|
};
|
||||||
@ -447,7 +494,7 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Skip tokens until the condition is met.
|
/// Skip tokens until the condition is met.
|
||||||
fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
|
fn skip_while<F>(&mut self, f: F) where F: Fn(Token) -> bool {
|
||||||
while let Some(token) = self.tokens.peek() {
|
while let Some(token) = self.tokens.peek() {
|
||||||
if !f(token) {
|
if !f(token) {
|
||||||
break;
|
break;
|
||||||
@ -457,6 +504,77 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find the index of the first unbalanced closing bracket.
|
||||||
|
fn find_closing_bracket(src: &str) -> Option<usize> {
|
||||||
|
let mut parens = 0;
|
||||||
|
for (index, c) in src.char_indices() {
|
||||||
|
match c {
|
||||||
|
']' if parens == 0 => return Some(index),
|
||||||
|
'[' => parens += 1,
|
||||||
|
']' => parens -= 1,
|
||||||
|
_ => {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A peekable iterator for tokens which allows access to the original iterator
|
||||||
|
/// inside this module (which is needed by the parser).
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct PeekableTokens<'s> {
|
||||||
|
tokens: Tokens<'s>,
|
||||||
|
peeked: Option<Option<Token<'s>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> PeekableTokens<'s> {
|
||||||
|
/// Create a new iterator from a string.
|
||||||
|
fn new(tokens: Tokens<'s>) -> PeekableTokens<'s> {
|
||||||
|
PeekableTokens {
|
||||||
|
tokens,
|
||||||
|
peeked: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the next element.
|
||||||
|
fn peek(&mut self) -> Option<Token<'s>> {
|
||||||
|
let iter = &mut self.tokens;
|
||||||
|
*self.peeked.get_or_insert_with(|| iter.next())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The index of the first character of the next token in the source string.
|
||||||
|
fn current_index(&mut self) -> Option<usize> {
|
||||||
|
self.tokens.chars.current_index()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Go to a new position in the underlying string.
|
||||||
|
fn goto(&mut self, index: usize) {
|
||||||
|
self.tokens.chars.goto(index);
|
||||||
|
self.peeked = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for PeekableTokens<'s> {
|
||||||
|
type Item = Token<'s>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Token<'s>> {
|
||||||
|
match self.peeked.take() {
|
||||||
|
Some(value) => value,
|
||||||
|
None => self.tokens.next(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The context for parsing a function.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FuncContext<'s> {
|
||||||
|
/// The header of the function to be parsed.
|
||||||
|
pub header: &'s FuncHeader,
|
||||||
|
/// The body source if the function has a body, otherwise nothing.
|
||||||
|
pub body: Option<&'s str>,
|
||||||
|
/// The current scope containing function definitions.
|
||||||
|
pub scope: &'s Scope,
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether this word is a valid unicode identifier.
|
/// Whether this word is a valid unicode identifier.
|
||||||
fn is_identifier(string: &str) -> bool {
|
fn is_identifier(string: &str) -> bool {
|
||||||
let mut chars = string.chars();
|
let mut chars = string.chars();
|
||||||
@ -476,92 +594,6 @@ fn is_identifier(string: &str) -> bool {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A token iterator that iterates over exactly one body.
|
|
||||||
///
|
|
||||||
/// This iterator wraps [`Tokens`] and yields exactly the tokens of one
|
|
||||||
/// function body or the complete top-level body and stops then.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct BodyTokens<'s> {
|
|
||||||
tokens: Peekable<Tokens<'s>>,
|
|
||||||
parens: Vec<u32>,
|
|
||||||
blocked: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> BodyTokens<'s> {
|
|
||||||
/// Create a new iterator over text.
|
|
||||||
#[inline]
|
|
||||||
pub fn new(source: &'s str) -> BodyTokens<'s> {
|
|
||||||
BodyTokens::from_tokens(Tokens::new(source))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new iterator operating over an existing one.
|
|
||||||
#[inline]
|
|
||||||
pub fn from_tokens(tokens: Tokens<'s>) -> BodyTokens<'s> {
|
|
||||||
BodyTokens {
|
|
||||||
tokens: tokens.peekable(),
|
|
||||||
parens: vec![],
|
|
||||||
blocked: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the next token.
|
|
||||||
#[inline]
|
|
||||||
pub fn peek(&mut self) -> Option<&Token<'s>> {
|
|
||||||
if self.blocked {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let token = self.tokens.peek();
|
|
||||||
if token == Some(&Token::RightBracket) && self.parens.last() == Some(&0) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
token
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Start a new substream of tokens.
|
|
||||||
fn start(&mut self) {
|
|
||||||
self.parens.push(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Finish a substream of tokens.
|
|
||||||
fn finish(&mut self) {
|
|
||||||
self.blocked = false;
|
|
||||||
self.parens.pop().unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> Iterator for BodyTokens<'s> {
|
|
||||||
type Item = Token<'s>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Token<'s>> {
|
|
||||||
if self.blocked {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let token = self.tokens.peek();
|
|
||||||
match token {
|
|
||||||
Some(Token::RightBracket) => {
|
|
||||||
match self.parens.last_mut() {
|
|
||||||
Some(&mut 0) => {
|
|
||||||
self.blocked = true;
|
|
||||||
return None
|
|
||||||
},
|
|
||||||
Some(top) => *top -= 1,
|
|
||||||
None => {}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Some(Token::LeftBracket) => {
|
|
||||||
if let Some(top) = self.parens.last_mut() {
|
|
||||||
*top += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
};
|
|
||||||
self.tokens.next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The error type for parsing.
|
/// The error type for parsing.
|
||||||
pub struct ParseError(String);
|
pub struct ParseError(String);
|
||||||
|
|
||||||
@ -666,6 +698,9 @@ mod token_tests {
|
|||||||
test("[[n: k=v]:x][:[=]]:=",
|
test("[[n: k=v]:x][:[=]]:=",
|
||||||
vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
|
vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
|
||||||
L, T(":"), L, E, R, R, T(":=")]);
|
L, T(":"), L, E, R, R, T(":=")]);
|
||||||
|
test("[hi: k=[func][body] v=1][hello]",
|
||||||
|
vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
|
||||||
|
T("v"), E, T("1"), R, L, T("hello"), R]);
|
||||||
test("[func: __key__=value]",
|
test("[func: __key__=value]",
|
||||||
vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
|
vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
|
||||||
}
|
}
|
||||||
@ -707,9 +742,9 @@ mod parse_tests {
|
|||||||
pub struct TreeFn(pub SyntaxTree);
|
pub struct TreeFn(pub SyntaxTree);
|
||||||
|
|
||||||
impl Function for TreeFn {
|
impl Function for TreeFn {
|
||||||
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
fn parse(context: FuncContext) -> ParseResult<Self> where Self: Sized {
|
||||||
if let Some(tokens) = context.tokens {
|
if let Some(src) = context.body {
|
||||||
Parser::new(tokens, context.scope).parse().map(|tree| TreeFn(tree))
|
parse(src, context.scope).map(|tree| TreeFn(tree))
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("expected body for tree fn"))
|
Err(ParseError::new("expected body for tree fn"))
|
||||||
}
|
}
|
||||||
@ -722,8 +757,8 @@ mod parse_tests {
|
|||||||
pub struct BodylessFn;
|
pub struct BodylessFn;
|
||||||
|
|
||||||
impl Function for BodylessFn {
|
impl Function for BodylessFn {
|
||||||
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
fn parse(context: FuncContext) -> ParseResult<Self> where Self: Sized {
|
||||||
if context.tokens.is_none() {
|
if context.body.is_none() {
|
||||||
Ok(BodylessFn)
|
Ok(BodylessFn)
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("unexpected body for bodyless fn"))
|
Err(ParseError::new("unexpected body for bodyless fn"))
|
||||||
@ -753,12 +788,6 @@ mod parse_tests {
|
|||||||
assert_eq!(parse(src, &scope).unwrap_err().to_string(), err);
|
assert_eq!(parse(src, &scope).unwrap_err().to_string(), err);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the source code with the given scope.
|
|
||||||
fn parse(src: &str, scope: &Scope) -> ParseResult<SyntaxTree> {
|
|
||||||
let mut tokens = BodyTokens::new(src);
|
|
||||||
Parser::new(&mut tokens, scope).parse()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a text node.
|
/// Create a text node.
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
|
fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user