mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Require scope for parser ♻
This commit is contained in:
parent
632bf2f2ef
commit
27947e212c
@ -5,7 +5,7 @@ use std::collections::HashMap;
|
|||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
|
|
||||||
use crate::syntax::{FuncHeader, Expression};
|
use crate::syntax::{FuncHeader, Expression};
|
||||||
use crate::parsing::{ParseTokens, ParseResult};
|
use crate::parsing::{BodyTokens, ParseResult};
|
||||||
|
|
||||||
|
|
||||||
/// A function which transforms a parsing context into a boxed function.
|
/// A function which transforms a parsing context into a boxed function.
|
||||||
@ -66,7 +66,7 @@ pub struct ParseContext<'s, 't> {
|
|||||||
/// The header of the function to be parsed.
|
/// The header of the function to be parsed.
|
||||||
pub header: &'s FuncHeader,
|
pub header: &'s FuncHeader,
|
||||||
/// Tokens if the function has a body, otherwise nothing.
|
/// Tokens if the function has a body, otherwise nothing.
|
||||||
pub tokens: Option<&'s mut ParseTokens<'t>>,
|
pub tokens: Option<&'s mut BodyTokens<'t>>,
|
||||||
/// The current scope containing function definitions.
|
/// The current scope containing function definitions.
|
||||||
pub scope: &'s Scope,
|
pub scope: &'s Scope,
|
||||||
}
|
}
|
||||||
|
@ -47,8 +47,9 @@ use std::fmt::{self, Debug, Formatter};
|
|||||||
|
|
||||||
use crate::doc::Document;
|
use crate::doc::Document;
|
||||||
use crate::engine::{Engine, Style, TypesetError};
|
use crate::engine::{Engine, Style, TypesetError};
|
||||||
|
use crate::func::Scope;
|
||||||
use crate::font::FontProvider;
|
use crate::font::FontProvider;
|
||||||
use crate::parsing::{Parser, ParseTokens, ParseResult, ParseError};
|
use crate::parsing::{Parser, BodyTokens, ParseResult, ParseError};
|
||||||
use crate::syntax::SyntaxTree;
|
use crate::syntax::SyntaxTree;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
@ -97,8 +98,9 @@ impl<'p> Compiler<'p> {
|
|||||||
/// Parse source code into a syntax tree.
|
/// Parse source code into a syntax tree.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn parse(&self, src: &str) -> ParseResult<SyntaxTree> {
|
pub fn parse(&self, src: &str) -> ParseResult<SyntaxTree> {
|
||||||
let mut tokens = ParseTokens::new(src);
|
let scope = Scope::new();
|
||||||
Parser::new(&mut tokens).parse()
|
let mut tokens = BodyTokens::new(src);
|
||||||
|
Parser::new(&mut tokens, &scope).parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compile a portable typesetted document from source code.
|
/// Compile a portable typesetted document from source code.
|
||||||
|
242
src/parsing.rs
242
src/parsing.rs
@ -3,7 +3,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
use std::mem::swap;
|
use std::mem::swap;
|
||||||
use std::ops::Deref;
|
|
||||||
use std::str::CharIndices;
|
use std::str::CharIndices;
|
||||||
|
|
||||||
use unicode_xid::UnicodeXID;
|
use unicode_xid::UnicodeXID;
|
||||||
@ -90,8 +89,7 @@ impl<'s> Iterator for Tokens<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now all special cases are handled and we can finally look at the
|
// Take the next char and peek at the one behind.
|
||||||
// next words.
|
|
||||||
let (next_pos, next) = self.chars.next()?;
|
let (next_pos, next) = self.chars.next()?;
|
||||||
let afterwards = self.chars.peek().map(|p| p.1);
|
let afterwards = self.chars.peek().map(|p| p.1);
|
||||||
|
|
||||||
@ -250,8 +248,8 @@ impl Iterator for PeekableChars<'_> {
|
|||||||
|
|
||||||
/// Transforms token streams to syntax trees.
|
/// Transforms token streams to syntax trees.
|
||||||
pub struct Parser<'s, 't> {
|
pub struct Parser<'s, 't> {
|
||||||
tokens: &'s mut ParseTokens<'t>,
|
tokens: &'s mut BodyTokens<'t>,
|
||||||
scope: ParserScope<'s>,
|
scope: &'s Scope,
|
||||||
state: ParserState,
|
state: ParserState,
|
||||||
tree: SyntaxTree,
|
tree: SyntaxTree,
|
||||||
}
|
}
|
||||||
@ -268,20 +266,9 @@ enum ParserState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, 't> Parser<'s, 't> {
|
impl<'s, 't> Parser<'s, 't> {
|
||||||
/// Create a new parser from a stream of tokens.
|
/// Create a new parser from a stream of tokens and a scope of functions.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(tokens: &'s mut ParseTokens<'t>) -> Parser<'s, 't> {
|
pub fn new(tokens: &'s mut BodyTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> {
|
||||||
Parser::new_internal(tokens, ParserScope::Owned(Scope::new()))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new parser with a scope containing function definitions.
|
|
||||||
#[inline]
|
|
||||||
pub fn with_scope(tokens: &'s mut ParseTokens<'t>, scope: &'s Scope) -> Parser<'s, 't> {
|
|
||||||
Parser::new_internal(tokens, ParserScope::Shared(scope))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Internal helper for construction.
|
|
||||||
fn new_internal(tokens: &'s mut ParseTokens<'t>, scope: ParserScope<'s>) -> Parser<'s, 't> {
|
|
||||||
Parser {
|
Parser {
|
||||||
tokens,
|
tokens,
|
||||||
scope,
|
scope,
|
||||||
@ -323,7 +310,10 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
PS::Body => match token {
|
PS::Body => match token {
|
||||||
// Whitespace
|
// Whitespace
|
||||||
Token::Space => self.append_space_consumed(),
|
Token::Space => self.append_space_consumed(),
|
||||||
Token::Newline => self.switch_consumed(PS::FirstNewline),
|
Token::Newline => {
|
||||||
|
self.advance();
|
||||||
|
self.switch(PS::FirstNewline);
|
||||||
|
},
|
||||||
|
|
||||||
// Text
|
// Text
|
||||||
Token::Text(word) => self.append_consumed(Node::Text(word.to_owned())),
|
Token::Text(word) => self.append_consumed(Node::Text(word.to_owned())),
|
||||||
@ -356,7 +346,7 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
// The next token should be the name of the function.
|
// The next token should be the name of the function.
|
||||||
let name = match self.tokens.next() {
|
let name = match self.tokens.next() {
|
||||||
Some(Token::Text(word)) => {
|
Some(Token::Text(word)) => {
|
||||||
if word.is_identifier() {
|
if is_identifier(word) {
|
||||||
Ok(word.to_owned())
|
Ok(word.to_owned())
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("invalid identifier"))
|
Err(ParseError::new("invalid identifier"))
|
||||||
@ -427,6 +417,11 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
self.tokens.next();
|
self.tokens.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Switch the state.
|
||||||
|
fn switch(&mut self, state: ParserState) {
|
||||||
|
self.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
/// Append a node to the tree.
|
/// Append a node to the tree.
|
||||||
fn append(&mut self, node: Node) {
|
fn append(&mut self, node: Node) {
|
||||||
self.tree.nodes.push(node);
|
self.tree.nodes.push(node);
|
||||||
@ -439,11 +434,6 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Switch the state.
|
|
||||||
fn switch(&mut self, state: ParserState) {
|
|
||||||
self.state = state;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Advance and return the given node.
|
/// Advance and return the given node.
|
||||||
fn append_consumed(&mut self, node: Node) {
|
fn append_consumed(&mut self, node: Node) {
|
||||||
self.advance();
|
self.advance();
|
||||||
@ -456,12 +446,6 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
self.append_space();
|
self.append_space();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advance and switch the state.
|
|
||||||
fn switch_consumed(&mut self, state: ParserState) {
|
|
||||||
self.advance();
|
|
||||||
self.switch(state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Skip tokens until the condition is met.
|
/// Skip tokens until the condition is met.
|
||||||
fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
|
fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
|
||||||
while let Some(token) = self.tokens.peek() {
|
while let Some(token) = self.tokens.peek() {
|
||||||
@ -473,46 +457,47 @@ impl<'s, 't> Parser<'s, 't> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An owned or shared scope.
|
/// Whether this word is a valid unicode identifier.
|
||||||
#[derive(Debug)]
|
fn is_identifier(string: &str) -> bool {
|
||||||
enum ParserScope<'s> {
|
let mut chars = string.chars();
|
||||||
Owned(Scope),
|
|
||||||
Shared(&'s Scope)
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for ParserScope<'_> {
|
match chars.next() {
|
||||||
type Target = Scope;
|
Some(c) if !UnicodeXID::is_xid_start(c) => return false,
|
||||||
|
None => return false,
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
fn deref(&self) -> &Scope {
|
while let Some(c) = chars.next() {
|
||||||
match self {
|
if !UnicodeXID::is_xid_continue(c) {
|
||||||
ParserScope::Owned(scope) => &scope,
|
return false;
|
||||||
ParserScope::Shared(scope) => scope,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A token iterator that iterates over exactly one body.
|
/// A token iterator that iterates over exactly one body.
|
||||||
///
|
///
|
||||||
/// This iterator wraps [`Tokens`] and yields exactly the tokens of one
|
/// This iterator wraps [`Tokens`] and yields exactly the tokens of one
|
||||||
/// function body or the complete top-level body and stops there.
|
/// function body or the complete top-level body and stops then.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ParseTokens<'s> {
|
pub struct BodyTokens<'s> {
|
||||||
tokens: Peekable<Tokens<'s>>,
|
tokens: Peekable<Tokens<'s>>,
|
||||||
parens: Vec<u32>,
|
parens: Vec<u32>,
|
||||||
blocked: bool,
|
blocked: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> ParseTokens<'s> {
|
impl<'s> BodyTokens<'s> {
|
||||||
/// Create a new iterator over text.
|
/// Create a new iterator over text.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(source: &'s str) -> ParseTokens<'s> {
|
pub fn new(source: &'s str) -> BodyTokens<'s> {
|
||||||
ParseTokens::from_tokens(Tokens::new(source))
|
BodyTokens::from_tokens(Tokens::new(source))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new iterator operating over an existing one.
|
/// Create a new iterator operating over an existing one.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn from_tokens(tokens: Tokens<'s>) -> ParseTokens<'s> {
|
pub fn from_tokens(tokens: Tokens<'s>) -> BodyTokens<'s> {
|
||||||
ParseTokens {
|
BodyTokens {
|
||||||
tokens: tokens.peekable(),
|
tokens: tokens.peekable(),
|
||||||
parens: vec![],
|
parens: vec![],
|
||||||
blocked: false,
|
blocked: false,
|
||||||
@ -546,7 +531,7 @@ impl<'s> ParseTokens<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for ParseTokens<'s> {
|
impl<'s> Iterator for BodyTokens<'s> {
|
||||||
type Item = Token<'s>;
|
type Item = Token<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Token<'s>> {
|
fn next(&mut self) -> Option<Token<'s>> {
|
||||||
@ -577,44 +562,18 @@ impl<'s> Iterator for ParseTokens<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// More useful functions on `str`'s.
|
|
||||||
trait StrExt {
|
|
||||||
/// Whether this word is a valid unicode identifier.
|
|
||||||
fn is_identifier(&self) -> bool;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StrExt for str {
|
|
||||||
fn is_identifier(&self) -> bool {
|
|
||||||
let mut chars = self.chars();
|
|
||||||
|
|
||||||
match chars.next() {
|
|
||||||
Some(c) if !UnicodeXID::is_xid_start(c) => return false,
|
|
||||||
None => return false,
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
while let Some(c) = chars.next() {
|
|
||||||
if !UnicodeXID::is_xid_continue(c) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The error type for parsing.
|
/// The error type for parsing.
|
||||||
pub struct ParseError(String);
|
pub struct ParseError(String);
|
||||||
|
|
||||||
|
/// The result type for parsing.
|
||||||
|
pub type ParseResult<T> = Result<T, ParseError>;
|
||||||
|
|
||||||
impl ParseError {
|
impl ParseError {
|
||||||
fn new<S: Into<String>>(message: S) -> ParseError {
|
fn new<S: Into<String>>(message: S) -> ParseError {
|
||||||
ParseError(message.into())
|
ParseError(message.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The result type for parsing.
|
|
||||||
pub type ParseResult<T> = Result<T, ParseError>;
|
|
||||||
|
|
||||||
error_type! {
|
error_type! {
|
||||||
err: ParseError,
|
err: ParseError,
|
||||||
show: f => f.write_str(&err.0),
|
show: f => f.write_str(&err.0),
|
||||||
@ -734,74 +693,44 @@ mod token_tests {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod parse_tests {
|
mod parse_tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use funcs::*;
|
||||||
use crate::func::{Function, Scope};
|
use crate::func::{Function, Scope};
|
||||||
use Node::{Space as S, Newline as N, Func as F};
|
use Node::{Space as S, Newline as N, Func as F};
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
/// Two test functions, one which parses it's body as another syntax tree
|
||||||
fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
|
/// and another one which does not expect a body.
|
||||||
|
mod funcs {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
/// A testing function which just parses it's body into a syntax tree.
|
/// A testing function which just parses it's body into a syntax tree.
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
struct TreeFn(SyntaxTree);
|
pub struct TreeFn(pub SyntaxTree);
|
||||||
|
|
||||||
impl Function for TreeFn {
|
impl Function for TreeFn {
|
||||||
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
||||||
if let Some(tokens) = context.tokens {
|
if let Some(tokens) = context.tokens {
|
||||||
Parser::with_scope(tokens, context.scope).parse().map(|tree| TreeFn(tree))
|
Parser::new(tokens, context.scope).parse().map(|tree| TreeFn(tree))
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("expected body for tree fn"))
|
Err(ParseError::new("expected body for tree fn"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
|
||||||
}
|
}
|
||||||
fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A testing function without a body.
|
/// A testing function without a body.
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
struct BodylessFn;
|
pub struct BodylessFn;
|
||||||
|
|
||||||
impl Function for BodylessFn {
|
impl Function for BodylessFn {
|
||||||
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
fn parse(context: ParseContext) -> ParseResult<Self> where Self: Sized {
|
||||||
if context.tokens.is_none() {
|
if context.tokens.is_none() {
|
||||||
Ok(BodylessFn)
|
Ok(BodylessFn)
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::new("unexpected body for bodyless fn"))
|
Err(ParseError::new("unexpected body for bodyless fn"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
|
||||||
}
|
}
|
||||||
fn typeset(&self, _header: &FuncHeader) -> Option<Expression> { None }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Shortcut macro to create a function.
|
|
||||||
macro_rules! func {
|
|
||||||
(name => $name:expr, body => None $(,)*) => {
|
|
||||||
func!(@$name, Box::new(BodylessFn))
|
|
||||||
};
|
|
||||||
(name => $name:expr, body => $tree:expr $(,)*) => {
|
|
||||||
func!(@$name, Box::new(TreeFn($tree)))
|
|
||||||
};
|
|
||||||
(@$name:expr, $body:expr) => {
|
|
||||||
FuncCall {
|
|
||||||
header: FuncHeader {
|
|
||||||
name: $name.to_string(),
|
|
||||||
args: vec![],
|
|
||||||
kwargs: HashMap::new(),
|
|
||||||
},
|
|
||||||
body: $body,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Shortcut macro to create a syntax tree.
|
|
||||||
/// Is `vec`-like and the elements are the nodes.
|
|
||||||
macro_rules! tree {
|
|
||||||
($($x:expr),*) => (
|
|
||||||
SyntaxTree { nodes: vec![$($x),*] }
|
|
||||||
);
|
|
||||||
($($x:expr,)*) => (tree![$($x),*])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse(src: &str, scope: &Scope) -> ParseResult<SyntaxTree> {
|
|
||||||
let mut tokens = ParseTokens::new(src);
|
|
||||||
Parser::with_scope(&mut tokens, scope).parse()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test if the source code parses into the syntax tree.
|
/// Test if the source code parses into the syntax tree.
|
||||||
@ -824,6 +753,45 @@ mod parse_tests {
|
|||||||
assert_eq!(parse(src, &scope).unwrap_err().to_string(), err);
|
assert_eq!(parse(src, &scope).unwrap_err().to_string(), err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse the source code with the given scope.
|
||||||
|
fn parse(src: &str, scope: &Scope) -> ParseResult<SyntaxTree> {
|
||||||
|
let mut tokens = BodyTokens::new(src);
|
||||||
|
Parser::new(&mut tokens, scope).parse()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a text node.
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
|
||||||
|
|
||||||
|
/// Shortcut macro to create a syntax tree.
|
||||||
|
/// Is `vec`-like and the elements are the nodes.
|
||||||
|
macro_rules! tree {
|
||||||
|
($($x:expr),*) => (
|
||||||
|
SyntaxTree { nodes: vec![$($x),*] }
|
||||||
|
);
|
||||||
|
($($x:expr,)*) => (tree![$($x),*])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut macro to create a function.
|
||||||
|
macro_rules! func {
|
||||||
|
(name => $name:expr, body => None $(,)*) => {
|
||||||
|
func!(@$name, Box::new(BodylessFn))
|
||||||
|
};
|
||||||
|
(name => $name:expr, body => $tree:expr $(,)*) => {
|
||||||
|
func!(@$name, Box::new(TreeFn($tree)))
|
||||||
|
};
|
||||||
|
(@$name:expr, $body:expr) => {
|
||||||
|
FuncCall {
|
||||||
|
header: FuncHeader {
|
||||||
|
name: $name.to_string(),
|
||||||
|
args: vec![],
|
||||||
|
kwargs: HashMap::new(),
|
||||||
|
},
|
||||||
|
body: $body,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse the basic cases.
|
/// Parse the basic cases.
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_base() {
|
fn parse_base() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user