From 27947e212cf217005059c4d31fa4301e92cfd3cc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 1 May 2019 17:15:59 +0200 Subject: [PATCH] =?UTF-8?q?Require=20scope=20for=20parser=20=E2=99=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/func.rs | 4 +- src/lib.rs | 8 +- src/parsing.rs | 242 +++++++++++++++++++++---------------------------- 3 files changed, 112 insertions(+), 142 deletions(-) diff --git a/src/func.rs b/src/func.rs index e92122784..7558a124d 100644 --- a/src/func.rs +++ b/src/func.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; use crate::syntax::{FuncHeader, Expression}; -use crate::parsing::{ParseTokens, ParseResult}; +use crate::parsing::{BodyTokens, ParseResult}; /// A function which transforms a parsing context into a boxed function. @@ -66,7 +66,7 @@ pub struct ParseContext<'s, 't> { /// The header of the function to be parsed. pub header: &'s FuncHeader, /// Tokens if the function has a body, otherwise nothing. - pub tokens: Option<&'s mut ParseTokens<'t>>, + pub tokens: Option<&'s mut BodyTokens<'t>>, /// The current scope containing function definitions. pub scope: &'s Scope, } diff --git a/src/lib.rs b/src/lib.rs index 0a17fca2b..7fd10f5fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,8 +47,9 @@ use std::fmt::{self, Debug, Formatter}; use crate::doc::Document; use crate::engine::{Engine, Style, TypesetError}; +use crate::func::Scope; use crate::font::FontProvider; -use crate::parsing::{Parser, ParseTokens, ParseResult, ParseError}; +use crate::parsing::{Parser, BodyTokens, ParseResult, ParseError}; use crate::syntax::SyntaxTree; #[macro_use] @@ -97,8 +98,9 @@ impl<'p> Compiler<'p> { /// Parse source code into a syntax tree. #[inline] pub fn parse(&self, src: &str) -> ParseResult { - let mut tokens = ParseTokens::new(src); - Parser::new(&mut tokens).parse() + let scope = Scope::new(); + let mut tokens = BodyTokens::new(src); + Parser::new(&mut tokens, &scope).parse() } /// Compile a portable typesetted document from source code. diff --git a/src/parsing.rs b/src/parsing.rs index d79a942d9..8a3f075ae 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -3,7 +3,6 @@ use std::collections::HashMap; use std::iter::Peekable; use std::mem::swap; -use std::ops::Deref; use std::str::CharIndices; use unicode_xid::UnicodeXID; @@ -90,8 +89,7 @@ impl<'s> Iterator for Tokens<'s> { } } - // Now all special cases are handled and we can finally look at the - // next words. + // Take the next char and peek at the one behind. let (next_pos, next) = self.chars.next()?; let afterwards = self.chars.peek().map(|p| p.1); @@ -250,8 +248,8 @@ impl Iterator for PeekableChars<'_> { /// Transforms token streams to syntax trees. pub struct Parser<'s, 't> { - tokens: &'s mut ParseTokens<'t>, - scope: ParserScope<'s>, + tokens: &'s mut BodyTokens<'t>, + scope: &'s Scope, state: ParserState, tree: SyntaxTree, } @@ -268,20 +266,9 @@ enum ParserState { } impl<'s, 't> Parser<'s, 't> { - /// Create a new parser from a stream of tokens. + /// Create a new parser from a stream of tokens and a scope of functions. #[inline] - pub fn new(tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> { - Parser::new_internal(tokens, ParserScope::Owned(Scope::new())) - } - - /// Create a new parser with a scope containing function definitions. - #[inline] - pub fn with_scope(tokens: &'s mut ParseTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> { - Parser::new_internal(tokens, ParserScope::Shared(scope)) - } - - /// Internal helper for construction. - fn new_internal(tokens: &'s mut ParseTokens<'t>, scope: ParserScope<'s>) -> Parser<'s, 't> { + pub fn new(tokens: &'s mut BodyTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> { Parser { tokens, scope, @@ -323,7 +310,10 @@ impl<'s, 't> Parser<'s, 't> { PS::Body => match token { // Whitespace Token::Space => self.append_space_consumed(), - Token::Newline => self.switch_consumed(PS::FirstNewline), + Token::Newline => { + self.advance(); + self.switch(PS::FirstNewline); + }, // Text Token::Text(word) => self.append_consumed(Node::Text(word.to_owned())), @@ -356,7 +346,7 @@ impl<'s, 't> Parser<'s, 't> { // The next token should be the name of the function. let name = match self.tokens.next() { Some(Token::Text(word)) => { - if word.is_identifier() { + if is_identifier(word) { Ok(word.to_owned()) } else { Err(ParseError::new("invalid identifier")) @@ -427,6 +417,11 @@ impl<'s, 't> Parser<'s, 't> { self.tokens.next(); } + /// Switch the state. + fn switch(&mut self, state: ParserState) { + self.state = state; + } + /// Append a node to the tree. fn append(&mut self, node: Node) { self.tree.nodes.push(node); @@ -439,11 +434,6 @@ impl<'s, 't> Parser<'s, 't> { } } - /// Switch the state. - fn switch(&mut self, state: ParserState) { - self.state = state; - } - /// Advance and return the given node. fn append_consumed(&mut self, node: Node) { self.advance(); @@ -456,12 +446,6 @@ impl<'s, 't> Parser<'s, 't> { self.append_space(); } - /// Advance and switch the state. - fn switch_consumed(&mut self, state: ParserState) { - self.advance(); - self.switch(state); - } - /// Skip tokens until the condition is met. fn skip_while(&mut self, f: F) where F: Fn(&Token) -> bool { while let Some(token) = self.tokens.peek() { @@ -473,46 +457,47 @@ impl<'s, 't> Parser<'s, 't> { } } -/// An owned or shared scope. -#[derive(Debug)] -enum ParserScope<'s> { - Owned(Scope), - Shared(&'s Scope) -} +/// Whether this word is a valid unicode identifier. +fn is_identifier(string: &str) -> bool { + let mut chars = string.chars(); -impl Deref for ParserScope<'_> { - type Target = Scope; + match chars.next() { + Some(c) if !UnicodeXID::is_xid_start(c) => return false, + None => return false, + _ => (), + } - fn deref(&self) -> &Scope { - match self { - ParserScope::Owned(scope) => &scope, - ParserScope::Shared(scope) => scope, + while let Some(c) = chars.next() { + if !UnicodeXID::is_xid_continue(c) { + return false; } } + + true } /// A token iterator that iterates over exactly one body. /// /// This iterator wraps [`Tokens`] and yields exactly the tokens of one -/// function body or the complete top-level body and stops there. +/// function body or the complete top-level body and stops then. #[derive(Debug, Clone)] -pub struct ParseTokens<'s> { +pub struct BodyTokens<'s> { tokens: Peekable>, parens: Vec, blocked: bool, } -impl<'s> ParseTokens<'s> { +impl<'s> BodyTokens<'s> { /// Create a new iterator over text. #[inline] - pub fn new(source: &'s str) -> ParseTokens<'s> { - ParseTokens::from_tokens(Tokens::new(source)) + pub fn new(source: &'s str) -> BodyTokens<'s> { + BodyTokens::from_tokens(Tokens::new(source)) } /// Create a new iterator operating over an existing one. #[inline] - pub fn from_tokens(tokens: Tokens<'s>) -> ParseTokens<'s> { - ParseTokens { + pub fn from_tokens(tokens: Tokens<'s>) -> BodyTokens<'s> { + BodyTokens { tokens: tokens.peekable(), parens: vec![], blocked: false, @@ -546,7 +531,7 @@ impl<'s> ParseTokens<'s> { } } -impl<'s> Iterator for ParseTokens<'s> { +impl<'s> Iterator for BodyTokens<'s> { type Item = Token<'s>; fn next(&mut self) -> Option> { @@ -577,44 +562,18 @@ impl<'s> Iterator for ParseTokens<'s> { } } -/// More useful functions on `str`'s. -trait StrExt { - /// Whether this word is a valid unicode identifier. - fn is_identifier(&self) -> bool; -} - -impl StrExt for str { - fn is_identifier(&self) -> bool { - let mut chars = self.chars(); - - match chars.next() { - Some(c) if !UnicodeXID::is_xid_start(c) => return false, - None => return false, - _ => (), - } - - while let Some(c) = chars.next() { - if !UnicodeXID::is_xid_continue(c) { - return false; - } - } - - true - } -} - /// The error type for parsing. pub struct ParseError(String); +/// The result type for parsing. +pub type ParseResult = Result; + impl ParseError { fn new>(message: S) -> ParseError { ParseError(message.into()) } } -/// The result type for parsing. -pub type ParseResult = Result; - error_type! { err: ParseError, show: f => f.write_str(&err.0), @@ -734,74 +693,44 @@ mod token_tests { #[cfg(test)] mod parse_tests { use super::*; + use funcs::*; use crate::func::{Function, Scope}; use Node::{Space as S, Newline as N, Func as F}; - #[allow(non_snake_case)] - fn T(s: &str) -> Node { Node::Text(s.to_owned()) } + /// Two test functions, one which parses it's body as another syntax tree + /// and another one which does not expect a body. + mod funcs { + use super::*; - /// A testing function which just parses it's body into a syntax tree. - #[derive(Debug, PartialEq)] - struct TreeFn(SyntaxTree); + /// A testing function which just parses it's body into a syntax tree. + #[derive(Debug, PartialEq)] + pub struct TreeFn(pub SyntaxTree); - impl Function for TreeFn { - fn parse(context: ParseContext) -> ParseResult where Self: Sized { - if let Some(tokens) = context.tokens { - Parser::with_scope(tokens, context.scope).parse().map(|tree| TreeFn(tree)) - } else { - Err(ParseError::new("expected body for tree fn")) + impl Function for TreeFn { + fn parse(context: ParseContext) -> ParseResult where Self: Sized { + if let Some(tokens) = context.tokens { + Parser::new(tokens, context.scope).parse().map(|tree| TreeFn(tree)) + } else { + Err(ParseError::new("expected body for tree fn")) + } } + fn typeset(&self, _header: &FuncHeader) -> Option { None } } - fn typeset(&self, _header: &FuncHeader) -> Option { None } - } - /// A testing function without a body. - #[derive(Debug, PartialEq)] - struct BodylessFn; + /// A testing function without a body. + #[derive(Debug, PartialEq)] + pub struct BodylessFn; - impl Function for BodylessFn { - fn parse(context: ParseContext) -> ParseResult where Self: Sized { - if context.tokens.is_none() { - Ok(BodylessFn) - } else { - Err(ParseError::new("unexpected body for bodyless fn")) + impl Function for BodylessFn { + fn parse(context: ParseContext) -> ParseResult where Self: Sized { + if context.tokens.is_none() { + Ok(BodylessFn) + } else { + Err(ParseError::new("unexpected body for bodyless fn")) + } } + fn typeset(&self, _header: &FuncHeader) -> Option { None } } - fn typeset(&self, _header: &FuncHeader) -> Option { None } - } - - /// Shortcut macro to create a function. - macro_rules! func { - (name => $name:expr, body => None $(,)*) => { - func!(@$name, Box::new(BodylessFn)) - }; - (name => $name:expr, body => $tree:expr $(,)*) => { - func!(@$name, Box::new(TreeFn($tree))) - }; - (@$name:expr, $body:expr) => { - FuncCall { - header: FuncHeader { - name: $name.to_string(), - args: vec![], - kwargs: HashMap::new(), - }, - body: $body, - } - } - } - - /// Shortcut macro to create a syntax tree. - /// Is `vec`-like and the elements are the nodes. - macro_rules! tree { - ($($x:expr),*) => ( - SyntaxTree { nodes: vec![$($x),*] } - ); - ($($x:expr,)*) => (tree![$($x),*]) - } - - fn parse(src: &str, scope: &Scope) -> ParseResult { - let mut tokens = ParseTokens::new(src); - Parser::with_scope(&mut tokens, scope).parse() } /// Test if the source code parses into the syntax tree. @@ -824,6 +753,45 @@ mod parse_tests { assert_eq!(parse(src, &scope).unwrap_err().to_string(), err); } + /// Parse the source code with the given scope. + fn parse(src: &str, scope: &Scope) -> ParseResult { + let mut tokens = BodyTokens::new(src); + Parser::new(&mut tokens, scope).parse() + } + + /// Create a text node. + #[allow(non_snake_case)] + fn T(s: &str) -> Node { Node::Text(s.to_owned()) } + + /// Shortcut macro to create a syntax tree. + /// Is `vec`-like and the elements are the nodes. + macro_rules! tree { + ($($x:expr),*) => ( + SyntaxTree { nodes: vec![$($x),*] } + ); + ($($x:expr,)*) => (tree![$($x),*]) + } + + /// Shortcut macro to create a function. + macro_rules! func { + (name => $name:expr, body => None $(,)*) => { + func!(@$name, Box::new(BodylessFn)) + }; + (name => $name:expr, body => $tree:expr $(,)*) => { + func!(@$name, Box::new(TreeFn($tree))) + }; + (@$name:expr, $body:expr) => { + FuncCall { + header: FuncHeader { + name: $name.to_string(), + args: vec![], + kwargs: HashMap::new(), + }, + body: $body, + } + } + } + /// Parse the basic cases. #[test] fn parse_base() {