Parser testing prototype 🥥

This commit is contained in:
Laurenz 2020-01-14 20:17:50 +01:00
parent dde69276d4
commit 15ad30555b
12 changed files with 698 additions and 504 deletions

View File

@ -9,9 +9,9 @@ build = "build.rs"
toddle = { path = "../toddle", default-features = false } toddle = { path = "../toddle", default-features = false }
tide = { path = "../tide" } tide = { path = "../tide" }
byteorder = "1" byteorder = "1"
smallvec = "0.6.10" smallvec = "1"
unicode-xid = "0.1.0" unicode-xid = "0.2"
async-trait = "0.1.22" async-trait = "0.1"
futures-executor = { version = "0.3", optional = true } futures-executor = { version = "0.3", optional = true }
[features] [features]

View File

@ -75,6 +75,8 @@ macro_rules! function {
parse($args:ident, $body:pat, $ctx:pat, $metadata:pat) $code:block parse($args:ident, $body:pat, $ctx:pat, $metadata:pat) $code:block
$($rest:tt)* $($rest:tt)*
) => { ) => {
use $crate::func::prelude::*;
impl $crate::func::ParseFunc for $type { impl $crate::func::ParseFunc for $type {
type Meta = $meta; type Meta = $meta;
@ -88,7 +90,8 @@ macro_rules! function {
let mut $args = args; let mut $args = args;
let val = $code; let val = $code;
if !$args.is_empty() { if !$args.is_empty() {
error!(unexpected_argument); return Err($crate::TypesetError
::with_message("unexpected arguments"));
} }
Ok(val) Ok(val)
} }
@ -109,6 +112,8 @@ macro_rules! function {
// (2-arg) Parse a layout-definition with all arguments. // (2-arg) Parse a layout-definition with all arguments.
(@layout $type:ident | layout($this:ident, $ctx:pat) $code:block) => { (@layout $type:ident | layout($this:ident, $ctx:pat) $code:block) => {
use $crate::func::prelude::*;
impl LayoutFunc for $type { impl LayoutFunc for $type {
fn layout<'a, 'life0, 'life1, 'async_trait>( fn layout<'a, 'life0, 'life1, 'async_trait>(
&'a $this, &'a $this,
@ -138,13 +143,13 @@ macro_rules! function {
macro_rules! parse { macro_rules! parse {
(forbidden: $body:expr) => { (forbidden: $body:expr) => {
if $body.is_some() { if $body.is_some() {
error!("unexpected body"); return Err($crate::TypesetError::with_message("unexpected body"));
} }
}; };
(optional: $body:expr, $ctx:expr) => ( (optional: $body:expr, $ctx:expr) => (
if let Some(body) = $body { if let Some(body) = $body {
Some($crate::syntax::parse(body, $ctx)) Some($crate::syntax::parse(body, $ctx).0)
} else { } else {
None None
} }
@ -152,9 +157,9 @@ macro_rules! parse {
(expected: $body:expr, $ctx:expr) => ( (expected: $body:expr, $ctx:expr) => (
if let Some(body) = $body { if let Some(body) = $body {
$crate::syntax::parse(body, $ctx)? $crate::syntax::parse(body, $ctx).0
} else { } else {
error!("expected body"); Err($crate::TypesetError::with_message("unexpected body"))
} }
) )
} }

View File

@ -119,6 +119,7 @@ pub enum Command<'a> {
/// A map from identifiers to function parsers. /// A map from identifiers to function parsers.
pub struct Scope { pub struct Scope {
parsers: HashMap<String, Box<Parser>>, parsers: HashMap<String, Box<Parser>>,
debug: Option<Box<Parser>>
} }
/// A function which parses the source of a function into a function type which /// A function which parses the source of a function into a function type which
@ -129,11 +130,30 @@ type Parser = dyn Fn(
ParseContext ParseContext
) -> ParseResult<Box<dyn LayoutFunc>>; ) -> ParseResult<Box<dyn LayoutFunc>>;
fn make_parser<F>(metadata: <F as ParseFunc>::Meta) -> Box<Parser>
where F: ParseFunc + LayoutFunc + 'static {
Box::new(move |a, b, c| {
F::parse(a, b, c, metadata.clone())
.map(|f| Box::new(f) as Box<dyn LayoutFunc>)
})
}
impl Scope { impl Scope {
/// Create a new empty scope. /// Create a new empty scope.
pub fn new() -> Scope { pub fn new() -> Scope {
Scope { Scope {
parsers: HashMap::new(), parsers: HashMap::new(),
debug: None,
}
}
/// Create a new scope with a debug parser that is invoked if not other
/// match is found.
pub fn with_debug<F>() -> Scope
where F: ParseFunc<Meta=()> + LayoutFunc + 'static {
Scope {
parsers: HashMap::new(),
debug: Some(make_parser::<F>(())),
} }
} }
@ -154,16 +174,14 @@ impl Scope {
where F: ParseFunc + LayoutFunc + 'static { where F: ParseFunc + LayoutFunc + 'static {
self.parsers.insert( self.parsers.insert(
name.to_owned(), name.to_owned(),
Box::new(move |a, b, c| { make_parser::<F>(metadata),
F::parse(a, b, c, metadata.clone())
.map(|f| Box::new(f) as Box<dyn LayoutFunc>)
})
); );
} }
/// Return the parser with the given name if there is one. /// Return the parser with the given name if there is one.
pub(crate) fn get_parser(&self, name: &str) -> Option<&Parser> { pub(crate) fn get_parser(&self, name: &str) -> Option<&Parser> {
self.parsers.get(name).map(|x| &**x) self.parsers.get(name).map(|x| &**x)
.or(self.debug.as_ref().map(|x| &**x))
} }
} }

View File

@ -28,7 +28,7 @@ use toddle::Error as FontError;
use crate::func::Scope; use crate::func::Scope;
use crate::layout::{MultiLayout, LayoutResult}; use crate::layout::{MultiLayout, LayoutResult};
use crate::syntax::{parse, SyntaxTree, ParseContext, Span, ParseResult}; use crate::syntax::{parse, SyntaxTree, Colorization, ErrorMap, ParseContext, Span};
use crate::style::{LayoutStyle, PageStyle, TextStyle}; use crate::style::{LayoutStyle, PageStyle, TextStyle};
#[macro_use] #[macro_use]
@ -84,7 +84,7 @@ impl<'p> Typesetter<'p> {
} }
/// Parse source code into a syntax tree. /// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> SyntaxTree { pub fn parse(&self, src: &str) -> (SyntaxTree, Colorization, ErrorMap) {
let scope = Scope::with_std(); let scope = Scope::with_std();
parse(src, ParseContext { scope: &scope }) parse(src, ParseContext { scope: &scope })
} }
@ -115,7 +115,7 @@ impl<'p> Typesetter<'p> {
/// Process source code directly into a layout. /// Process source code directly into a layout.
pub async fn typeset(&self, src: &str) -> TypesetResult<MultiLayout> { pub async fn typeset(&self, src: &str) -> TypesetResult<MultiLayout> {
let tree = self.parse(src); let tree = self.parse(src).0;
let layout = self.layout(&tree).await?; let layout = self.layout(&tree).await?;
Ok(layout) Ok(layout)
} }
@ -132,8 +132,8 @@ pub struct TypesetError {
impl TypesetError { impl TypesetError {
/// Create a new typesetting error. /// Create a new typesetting error.
pub fn with_message(message: String) -> TypesetError { pub fn with_message(message: impl Into<String>) -> TypesetError {
TypesetError { message, span: None } TypesetError { message: message.into(), span: None }
} }
} }

View File

@ -1,28 +1,3 @@
/// Entities which can be colored by syntax highlighting. use super::*;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColorToken {
Comment,
Bracket,
FuncName,
Colon,
Key,
Equals,
Comma,
Paren,
Brace,
ExprIdent,
ExprStr,
ExprNumber,
ExprSize,
ExprBool,
Bold,
Italic,
Monospace,
Invalid,
}

View File

@ -91,12 +91,6 @@ pub struct Object {
pub pairs: Vec<Pair>, pub pairs: Vec<Pair>,
} }
#[derive(Clone, PartialEq)]
pub struct Pair {
pub key: Spanned<Ident>,
pub value: Spanned<Expression>,
}
impl Object { impl Object {
pub fn new() -> Object { pub fn new() -> Object {
Object { pairs: vec![] } Object { pairs: vec![] }
@ -120,7 +114,7 @@ impl Display for Object {
if !first { if !first {
write!(f, ", ")?; write!(f, ", ")?;
} }
write!(f, "{}: {}", pair.key.v, pair.value.v)?; write!(f, "{}", pair)?;
first = false; first = false;
} }
@ -128,10 +122,23 @@ impl Display for Object {
} }
} }
#[derive(Clone, PartialEq)]
pub struct Pair {
pub key: Spanned<Ident>,
pub value: Spanned<Expression>,
}
impl Display for Pair {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}: {}", self.key.v, self.value.v)
}
}
debug_display!(Ident); debug_display!(Ident);
debug_display!(Expression); debug_display!(Expression);
debug_display!(Tuple); debug_display!(Tuple);
debug_display!(Object); debug_display!(Object);
debug_display!(Pair);
/// Kinds of expressions. /// Kinds of expressions.

View File

@ -14,3 +14,247 @@ pub_use_mod!(expr);
pub_use_mod!(tokens); pub_use_mod!(tokens);
pub_use_mod!(parsing); pub_use_mod!(parsing);
pub_use_mod!(span); pub_use_mod!(span);
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Whitespace(usize),
/// A line comment with inner string contents `//<&'s str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// An erroneous `*/` without an opening block comment.
StarSlash,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `:`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
ExprIdent(&'s str),
/// A quoted string in a function header: `"..."`.
ExprStr(&'s str),
/// A number in a function header: `3.14`.
ExprNumber(f64),
/// A size in a function header: `12pt`.
ExprSize(Size),
/// A boolean in a function header: `true | false`.
ExprBool(bool),
/// A star in body-text.
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// Any other consecutive string.
Text(&'s str),
}
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
pub nodes: Vec<Spanned<Node>>,
}
impl SyntaxTree {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] }
}
/// Add a node to the tree.
pub fn add(&mut self, node: Spanned<Node>) {
self.nodes.push(node);
}
}
/// A node in the syntax tree.
#[derive(Debug, PartialEq)]
pub enum Node {
/// A number of whitespace characters containing less than two newlines.
Space,
/// Whitespace characters with more than two newlines.
Newline,
/// Plain text.
Text(String),
/// Italics enabled / disabled.
ToggleItalic,
/// Bolder enabled / disabled.
ToggleBolder,
/// Monospace enabled / disabled.
ToggleMonospace,
/// A function invocation.
Func(FuncCall),
}
/// An invocation of a function.
#[derive(Debug)]
pub struct FuncCall(pub Box<dyn LayoutFunc>);
impl PartialEq for FuncCall {
fn eq(&self, other: &FuncCall) -> bool {
&self.0 == &other.0
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Colorization {
pub colors: Vec<Spanned<ColorToken>>,
}
/// Entities which can be colored by syntax highlighting.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColorToken {
Comment,
Bracket,
FuncName,
Colon,
Key,
Equals,
Comma,
Paren,
Brace,
ExprIdent,
ExprStr,
ExprNumber,
ExprSize,
ExprBool,
Bold,
Italic,
Monospace,
Invalid,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct ErrorMap {
pub errors: Vec<Spanned<String>>,
}
#[derive(Debug)]
pub struct FuncHeader {
pub name: Spanned<Ident>,
pub args: FuncArgs,
}
#[derive(Debug)]
pub struct FuncArgs {
positional: Tuple,
keyword: Object,
}
impl FuncArgs {
fn new() -> FuncArgs {
FuncArgs {
positional: Tuple::new(),
keyword: Object::new(),
}
}
/// Add a positional argument.
pub fn add_pos(&mut self, item: Spanned<Expression>) {
self.positional.add(item);
}
/// Force-extract the first positional argument.
pub fn get_pos<E: ExpressionKind>(&mut self) -> ParseResult<E> {
expect(self.get_pos_opt())
}
/// Extract the first positional argument.
pub fn get_pos_opt<E: ExpressionKind>(&mut self) -> ParseResult<Option<E>> {
Ok(if !self.positional.items.is_empty() {
let spanned = self.positional.items.remove(0);
Some(E::from_expr(spanned)?)
} else {
None
})
}
/// Add a keyword argument.
pub fn add_key(&mut self, key: Spanned<Ident>, value: Spanned<Expression>) {
self.keyword.add(key, value);
}
/// Add a keyword argument from an existing pair.
pub fn add_key_pair(&mut self, pair: Pair) {
self.keyword.add_pair(pair);
}
/// Force-extract a keyword argument.
pub fn get_key<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<E> {
expect(self.get_key_opt(name))
}
/// Extract a keyword argument.
pub fn get_key_opt<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<Option<E>> {
self.keyword.pairs.iter()
.position(|p| p.key.v.0 == name)
.map(|index| {
let value = self.keyword.pairs.swap_remove(index).value;
E::from_expr(value)
})
.transpose()
}
/// Iterator over positional arguments.
pub fn iter_pos(&mut self) -> std::vec::IntoIter<Spanned<Expression>> {
let tuple = std::mem::replace(&mut self.positional, Tuple::new());
tuple.items.into_iter()
}
/// Iterator over all keyword arguments.
pub fn iter_keys(&mut self) -> std::vec::IntoIter<Pair> {
let object = std::mem::replace(&mut self.keyword, Object::new());
object.pairs.into_iter()
}
/// Clear the argument lists.
pub fn clear(&mut self) {
self.positional.items.clear();
self.keyword.pairs.clear();
}
/// Whether both the positional and keyword argument lists are empty.
pub fn is_empty(&self) -> bool {
self.positional.items.is_empty() && self.keyword.pairs.is_empty()
}
}
/// Extract the option expression kind from the option or return an error.
fn expect<E: ExpressionKind>(opt: ParseResult<Option<E>>) -> ParseResult<E> {
match opt {
Ok(Some(spanned)) => Ok(spanned),
Ok(None) => error!("expected {}", E::NAME),
Err(e) => Err(e),
}
}

View File

@ -1,147 +1,10 @@
use std::iter::Peekable;
use crate::func::Scope; use crate::func::Scope;
use super::*; use super::*;
use Token::*; use Token::*;
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
pub nodes: Vec<Spanned<Node>>,
}
impl SyntaxTree {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] }
}
}
/// A node in the syntax tree.
#[derive(Debug, PartialEq)]
pub enum Node {
/// A number of whitespace characters containing less than two newlines.
Space,
/// Whitespace characters with more than two newlines.
Newline,
/// Plain text.
Text(String),
/// Italics enabled / disabled.
ToggleItalic,
/// Bolder enabled / disabled.
ToggleBolder,
/// Monospace enabled / disabled.
ToggleMonospace,
/// A function invocation.
Func(FuncCall),
}
/// An invocation of a function.
#[derive(Debug)]
pub struct FuncCall(pub Box<dyn LayoutFunc>);
impl PartialEq for FuncCall {
fn eq(&self, other: &FuncCall) -> bool {
&self.0 == &other.0
}
}
#[derive(Debug)]
pub struct FuncArgs {
positional: Tuple,
keyword: Object,
}
impl FuncArgs {
fn new() -> FuncArgs {
FuncArgs {
positional: Tuple::new(),
keyword: Object::new(),
}
}
/// Add a positional argument.
pub fn add_pos(&mut self, item: Spanned<Expression>) {
self.positional.add(item);
}
/// Force-extract the first positional argument.
pub fn get_pos<E: ExpressionKind>(&mut self) -> ParseResult<E> {
expect(self.get_pos_opt())
}
/// Extract the first positional argument.
pub fn get_pos_opt<E: ExpressionKind>(&mut self) -> ParseResult<Option<E>> {
Ok(if !self.positional.items.is_empty() {
let spanned = self.positional.items.remove(0);
Some(E::from_expr(spanned)?)
} else {
None
})
}
/// Add a keyword argument.
pub fn add_key(&mut self, key: Spanned<Ident>, value: Spanned<Expression>) {
self.keyword.add(key, value);
}
/// Add a keyword argument from an existing pair.
pub fn add_key_pair(&mut self, pair: Pair) {
self.keyword.add_pair(pair);
}
/// Force-extract a keyword argument.
pub fn get_key<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<E> {
expect(self.get_key_opt(name))
}
/// Extract a keyword argument.
pub fn get_key_opt<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<Option<E>> {
self.keyword.pairs.iter()
.position(|p| p.key.v.0 == name)
.map(|index| {
let value = self.keyword.pairs.swap_remove(index).value;
E::from_expr(value)
})
.transpose()
}
/// Iterator over positional arguments.
pub fn iter_pos(&mut self) -> std::vec::IntoIter<Spanned<Expression>> {
let tuple = std::mem::replace(&mut self.positional, Tuple::new());
tuple.items.into_iter()
}
/// Iterator over all keyword arguments.
pub fn iter_keys(&mut self) -> std::vec::IntoIter<Pair> {
let object = std::mem::replace(&mut self.keyword, Object::new());
object.pairs.into_iter()
}
/// Clear the argument lists.
pub fn clear(&mut self) {
self.positional.items.clear();
self.keyword.pairs.clear();
}
/// Whether both the positional and keyword argument lists are empty.
pub fn is_empty(&self) -> bool {
self.positional.items.is_empty() && self.keyword.pairs.is_empty()
}
}
/// Extract the option expression kind from the option or return an error.
fn expect<E: ExpressionKind>(opt: ParseResult<Option<E>>) -> ParseResult<E> {
match opt {
Ok(Some(spanned)) => Ok(spanned),
Ok(None) => error!("expected {}", E::NAME),
Err(e) => Err(e),
}
}
/// Parses source code into a syntax tree given a context. /// Parses source code into a syntax tree given a context.
pub fn parse(src: &str, ctx: ParseContext) -> SyntaxTree { pub fn parse(src: &str, ctx: ParseContext) -> (SyntaxTree, Colorization, ErrorMap) {
Parser::new(src, ctx).parse() Parser::new(src, ctx).parse()
} }
@ -155,16 +18,13 @@ pub struct ParseContext<'a> {
struct Parser<'s> { struct Parser<'s> {
src: &'s str, src: &'s str,
ctx: ParseContext<'s>, ctx: ParseContext<'s>,
tokens: Peekable<Tokens<'s>>, colorization: Colorization,
errors: Vec<Spanned<String>>, error_map: ErrorMap,
colored: Vec<Spanned<ColorToken>>,
span: Span,
}
macro_rules! defer { tokens: Tokens<'s>,
($($tts:tt)*) => ( peeked: Option<Option<Spanned<Token<'s>>>>,
unimplemented!() position: Position,
); last_position: Position,
} }
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
@ -172,81 +32,128 @@ impl<'s> Parser<'s> {
Parser { Parser {
src, src,
ctx, ctx,
tokens: Tokens::new(src).peekable(), error_map: ErrorMap { errors: vec![] },
errors: vec![], colorization: Colorization { colors: vec![] },
colored: vec![],
span: Span::ZERO, tokens: Tokens::new(src),
peeked: None,
position: Position::ZERO,
last_position: Position::ZERO,
} }
} }
fn parse(mut self) -> SyntaxTree { fn parse(mut self) -> (SyntaxTree, Colorization, ErrorMap) {
let mut tree = SyntaxTree::new(); let mut tree = SyntaxTree::new();
loop { loop {
self.skip_whitespace(); if let Some(spanned) = self.eat() {
match spanned.v {
LineComment(_) | BlockComment(_) => {}
let start = self.position(); Whitespace(newlines) => {
tree.add(spanned.map_v(if newlines >= 2 {
Node::Newline
} else {
Node::Space
}));
}
let node = match self.next() { LeftBracket => {
Some(LeftBracket) => self.parse_func().map(|f| Node::Func(f)), if let Some(func) = self.parse_func() {
Some(Star) => Some(Node::ToggleBolder), tree.add(func);
Some(Underscore) => Some(Node::ToggleItalic), }
Some(Backtick) => Some(Node::ToggleMonospace), }
Some(Text(text)) => Some(Node::Text(text.to_owned())),
Some(other) => { self.unexpected(other); None },
None => break,
};
if let Some(node) = node { Star => tree.add(spanned.map_v(Node::ToggleBolder)),
let end = self.position(); Underscore => tree.add(spanned.map_v(Node::ToggleItalic)),
Backtick => tree.add(spanned.map_v(Node::ToggleMonospace)),
Text(text) => tree.add(spanned.map_v(Node::Text(text.to_owned()))),
_ => self.unexpected(spanned),
}
} else {
break;
}
}
(tree, self.colorization, self.error_map)
}
fn parse_func(&mut self) -> Option<Spanned<Node>> {
let start = self.last_pos();
let header = self.parse_func_header();
let call = self.parse_func_call(header)?;
let end = self.pos();
let span = Span { start, end }; let span = Span { start, end };
tree.nodes.push(Spanned { v: node, span }); Some(Spanned { v: Node::Func(call), span })
}
} }
tree fn parse_func_header(&mut self) -> Option<FuncHeader> {
}
fn parse_func(&mut self) -> Option<FuncCall> {
let (name, args) = self.parse_func_header()?;
self.parse_func_call(name, args)
}
fn parse_func_header(&mut self) -> Option<(Spanned<Ident>, FuncArgs)> {
defer! { self.eat_until(|t| t == RightBracket, true); }
self.skip_whitespace(); self.skip_whitespace();
let name = self.parse_func_name()?; let name = self.parse_func_name().or_else(|| {
self.eat_until(|t| t == RightBracket, true);
None
})?;
self.skip_whitespace(); self.skip_whitespace();
let args = match self.eat() {
let args = match self.next() { Some(Spanned { v: Colon, .. }) => self.parse_func_args(),
Some(Colon) => self.parse_func_args(), Some(Spanned { v: RightBracket, .. }) => FuncArgs::new(),
Some(RightBracket) => FuncArgs::new(),
other => { other => {
self.expected("colon or closing bracket", other); self.expected("colon or closing bracket", other);
self.eat_until(|t| t == RightBracket, true);
FuncArgs::new() FuncArgs::new()
} }
}; };
Some((name, args)) Some(FuncHeader { name, args })
} }
fn parse_func_call( fn parse_func_call(&mut self, header: Option<FuncHeader>) -> Option<FuncCall> {
&mut self, println!("peek: {:?}", self.peek());
name: Spanned<Ident>,
args: FuncArgs, let body = if self.peek() == Some(LeftBracket) {
) -> Option<FuncCall> { self.eat();
unimplemented!()
let start = self.tokens.index();
let found = self.tokens.move_to_closing_bracket();
let end = self.tokens.index();
self.last_position = self.position;
self.position = self.tokens.pos();
let body = &self.src[start .. end];
if found {
assert_eq!(self.eat().map(Spanned::value), Some(RightBracket));
} else {
self.error_here("expected closing bracket");
}
Some(body)
} else {
None
};
let header = header?;
let name = header.name;
let parser = self.ctx.scope.get_parser(name.v.as_str()).or_else(|| {
self.error(format!("unknown function: `{}`", name.v), name.span);
None
})?;
Some(FuncCall(parser(header.args, body, self.ctx).unwrap()))
} }
fn parse_func_name(&mut self) -> Option<Spanned<Ident>> { fn parse_func_name(&mut self) -> Option<Spanned<Ident>> {
match self.next() { match self.eat() {
Some(ExprIdent(ident)) => { Some(Spanned { v: ExprIdent(ident), span }) => {
self.color_span(ColorToken::FuncName, self.span(), true); self.color(Spanned { v: ColorToken::FuncName, span }, true);
Some(Spanned { v: Ident(ident.to_string()), span: self.span() }) Some(Spanned { v: Ident(ident.to_string()), span })
} }
other => { other => {
self.expected("identifier", other); self.expected("identifier", other);
@ -256,119 +163,16 @@ impl<'s> Parser<'s> {
} }
fn parse_func_args(&mut self) -> FuncArgs { fn parse_func_args(&mut self) -> FuncArgs {
enum State { // unimplemented!()
Start, FuncArgs::new()
Identifier(Spanned<Ident>),
Assignment(Spanned<Ident>),
Value,
} }
impl State { fn parse_tuple(&mut self) -> Spanned<Expression> {
fn expected(&self) -> &'static str { unimplemented!("parse_tuple")
match self {
State::Start => "value or key",
State::Identifier(_) => "comma or assignment",
State::Assignment(_) => "value",
State::Value => "comma",
}
}
} }
let mut args = FuncArgs::new(); fn parse_object(&mut self) -> Spanned<Expression> {
let mut state = State::Start; unimplemented!("parse_object")
loop {
self.skip_whitespace();
/*
let token = self.next();
match token {
Some(ExprIdent(ident)) => match state {
State::Start => {
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
State::Identifier(prev) => {
self.expected(state.expected(), token);
args.add_pos(prev.map(|id| Expression::Ident(id)));
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
State::Assignment(key) => {
let span = Span::merge(key.span, self.span());
args.add_key(Spanned::new(KeyArg {
key,
value: Spanned {
v: Expression::Ident(Ident(ident.to_string())),
span: self.span(),
},
}, span));
state = State::Value;
}
State::Value => {
self.expected(state.expected(), token);
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
}
// Handle expressions.
Some(Expr(_)) | Some(LeftParen) | Some(LeftBrace) => {
let expr = match token.unwrap() {
Expr(e) => e,
LeftParen => self.parse_tuple(),
LeftBrace => self.parse_object(),
_ => unreachable!(),
}
}
// Handle commas after values.
Some(Comma) => match state {
State::Identifier(ident) => {
args.add_pos(ident.map(|id| Expression::Ident(id)));
state = State::Start;
}
State::Value => state = State::Start,
_ => self.expected(state.expected(), token),
}
// Handle the end of the function header.
Some(RightBracket) => {
match state {
State::Identifier(ident) => {
args.add_pos(ident.map(|id| Expression::Ident(id)));
}
State::Assignment(_) => {
self.expected(state.expected(), token);
}
_ => {}
}
break;
}
}
*/
}
args
}
fn handle_expr(&mut self, expr: Spanned<Expression>) {
}
fn parse_tuple(&mut self) -> Spanned<Tuple> {
unimplemented!()
}
fn parse_object(&mut self) -> Spanned<Object> {
unimplemented!()
} }
fn skip_whitespace(&mut self) { fn skip_whitespace(&mut self) {
@ -378,58 +182,42 @@ impl<'s> Parser<'s> {
}, false) }, false)
} }
fn eat_until<F>(&mut self, mut f: F, eat_match: bool) fn expected(&mut self, thing: &str, found: Option<Spanned<Token>>) {
where F: FnMut(Token<'s>) -> bool { if let Some(Spanned { v: found, span }) = found {
while let Some(token) = self.tokens.peek() { self.error(
if f(token.v) { format!("expected {}, found {}", thing, name(found)),
if eat_match { span
self.next(); );
}
break;
}
self.next();
}
}
fn next(&mut self) -> Option<Token<'s>> {
self.tokens.next().map(|spanned| {
self.color_token(&spanned.v, spanned.span);
self.span = spanned.span;
spanned.v
})
}
fn span(&self) -> Span {
self.span
}
fn position(&self) -> Position {
self.span.end
}
fn unexpected(&mut self, found: Token) {
self.errors.push(Spanned {
v: format!("unexpected {}", name(found)),
span: self.span(),
});
}
fn expected(&mut self, thing: &str, found: Option<Token>) {
let message = if let Some(found) = found {
format!("expected {}, found {}", thing, name(found))
} else { } else {
format!("expected {}", thing) self.error_here(format!("expected {}", thing));
}; }
self.errors.push(Spanned {
v: message,
span: self.span(),
});
} }
fn color_token(&mut self, token: &Token<'s>, span: Span) { fn unexpected(&mut self, found: Spanned<Token>) {
let colored = match token { self.error_map.errors.push(found.map(|t| format!("unexpected {}", name(t))));
}
fn error(&mut self, message: impl Into<String>, span: Span) {
self.error_map.errors.push(Spanned { v: message.into(), span });
}
fn error_here(&mut self, message: impl Into<String>) {
self.error(message, Span::at(self.pos()));
}
fn color(&mut self, token: Spanned<ColorToken>, replace_last: bool) {
if replace_last {
if let Some(last) = self.colorization.colors.last_mut() {
*last = token;
return;
}
}
self.colorization.colors.push(token);
}
fn color_token(&mut self, token: Spanned<Token<'s>>) {
let colored = match token.v {
LineComment(_) | BlockComment(_) => Some(ColorToken::Comment), LineComment(_) | BlockComment(_) => Some(ColorToken::Comment),
StarSlash => Some(ColorToken::Invalid), StarSlash => Some(ColorToken::Invalid),
LeftBracket | RightBracket => Some(ColorToken::Bracket), LeftBracket | RightBracket => Some(ColorToken::Bracket),
@ -447,21 +235,49 @@ impl<'s> Parser<'s> {
}; };
if let Some(color) = colored { if let Some(color) = colored {
self.colored.push(Spanned { v: color, span }); self.colorization.colors.push(Spanned { v: color, span: token.span });
} }
} }
fn color_span(&mut self, color: ColorToken, span: Span, replace_last: bool) { fn eat_until<F>(&mut self, mut f: F, eat_match: bool)
let token = Spanned { v: color, span }; where F: FnMut(Token<'s>) -> bool {
while let Some(token) = self.peek() {
if f(token) {
if eat_match {
self.eat();
}
break;
}
if replace_last { self.eat();
if let Some(last) = self.colored.last_mut() {
*last = token;
return;
} }
} }
self.colored.push(token); fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
let token = self.peeked.take().unwrap_or_else(|| self.tokens.next());
self.last_position = self.position;
if let Some(spanned) = token {
self.color_token(spanned);
self.position = spanned.span.end;
}
token
}
fn peek(&mut self) -> Option<Token<'s>> {
let iter = &mut self.tokens;
self.peeked
.get_or_insert_with(|| iter.next())
.map(Spanned::value)
}
fn pos(&self) -> Position {
self.position
}
fn last_pos(&self) -> Position {
self.last_position
} }
} }

View File

@ -6,64 +6,6 @@ use Token::*;
use State::*; use State::*;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Whitespace(usize),
/// A line comment with inner string contents `//<&'s str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// An erroneous `*/` without an opening block comment.
StarSlash,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `:`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
ExprIdent(&'s str),
/// A quoted string in a function header: `"..."`.
ExprStr(&'s str),
/// A number in a function header: `3.14`.
ExprNumber(f64),
/// A size in a function header: `12pt`.
ExprSize(Size),
/// A boolean in a function header: `true | false`.
ExprBool(bool),
/// A star in body-text.
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// Any other consecutive string.
Text(&'s str),
}
/// Decomposes text into a sequence of semantic tokens. /// Decomposes text into a sequence of semantic tokens.
pub fn tokenize(src: &str) -> Tokens { pub fn tokenize(src: &str) -> Tokens {
Tokens::new(src) Tokens::new(src)
@ -97,6 +39,47 @@ impl<'s> Tokens<'s> {
index: 0, index: 0,
} }
} }
/// The index in the string at which the last token ends and next token will
/// start.
pub fn index(&self) -> usize {
self.index
}
/// The line-colunn position in the source at which the last token ends and
/// next token will start.
pub fn pos(&self) -> Position {
self.position
}
/// Move through the string until an unbalanced closing bracket is found
/// without tokenizing the contents.
///
/// Returns whether a closing bracket was found or the end of the string was
/// reached.
pub fn move_to_closing_bracket(&mut self) -> bool {
let mut escaped = false;
let mut depth = 0;
self.read_string_until(|n| {
match n {
'[' if !escaped => depth += 1,
']' if !escaped => {
if depth == 0 {
return true;
} else {
depth -= 1;
}
}
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
}, false, 0, 0);
self.peek() == Some(']')
}
} }
impl<'s> Iterator for Tokens<'s> { impl<'s> Iterator for Tokens<'s> {
@ -118,8 +101,13 @@ impl<'s> Iterator for Tokens<'s> {
// Functions. // Functions.
'[' => { '[' => {
if self.state == Header || self.state == Body {
self.stack.push(self.state); self.stack.push(self.state);
self.state = Header; self.state = Header;
} else {
self.state = Body;
}
LeftBracket LeftBracket
} }
']' => { ']' => {
@ -221,12 +209,10 @@ impl<'s> Tokens<'s> {
fn parse_string(&mut self) -> Token<'s> { fn parse_string(&mut self) -> Token<'s> {
let mut escaped = false; let mut escaped = false;
ExprStr(self.read_string_until(|n| { ExprStr(self.read_string_until(|n| {
if n == '"' && !escaped { match n {
return true; '"' if !escaped => return true,
} else if n == '\\' { '\\' => escaped = !escaped,
escaped = !escaped; _ => escaped = false,
} else {
escaped = false;
} }
false false
@ -316,14 +302,6 @@ impl<'s> Tokens<'s> {
fn peek(&mut self) -> Option<char> { fn peek(&mut self) -> Option<char> {
self.iter.peek().copied() self.iter.peek().copied()
} }
fn index(&self) -> usize {
self.index
}
fn pos(&self) -> Position {
self.position
}
} }
fn parse_percentage(text: &str) -> Option<f64> { fn parse_percentage(text: &str) -> Option<f64> {

View File

@ -1,9 +1,15 @@
#![allow(unused_imports)] #![allow(unused_imports)]
#![allow(dead_code)]
#![allow(non_snake_case)] #![allow(non_snake_case)]
use typstc::func::Scope;
use typstc::size::Size; use typstc::size::Size;
use typstc::syntax::*; use typstc::syntax::*;
use Token::{ use typstc::{function, parse};
mod token_shorthands {
pub use super::Token::{
Whitespace as W, Whitespace as W,
LineComment as LC, BlockComment as BC, StarSlash as SS, LineComment as LC, BlockComment as BC, StarSlash as SS,
LeftBracket as LB, RightBracket as RB, LeftBracket as LB, RightBracket as RB,
@ -14,34 +20,140 @@ use Token::{
ExprNumber as NUM, ExprBool as BOOL, ExprNumber as NUM, ExprBool as BOOL,
Star as ST, Underscore as U, Backtick as B, Text as T, Star as ST, Underscore as U, Backtick as B, Text as T,
}; };
}
mod node_shorthands {
use super::Node;
pub use Node::{
Space as S, Newline as N, Text,
ToggleItalic as I, ToggleBolder as B, ToggleMonospace as M,
Func,
};
pub fn T(text: &str) -> Node { Node::Text(text.to_string()) }
}
macro_rules! F {
(@body None) => (None);
(@body Some([$($tts:tt)*])) => ({
let nodes = vec![$($tts)*].into_iter()
.map(|v| Spanned { v, span: Span::ZERO })
.collect();
Some(SyntaxTree { nodes })
});
($($body:tt)*) => ({
Func(FuncCall(Box::new(DebugFn {
pos: vec![],
key: vec![],
body: F!(@body $($body)*),
})))
});
}
function! {
#[derive(Debug, PartialEq)]
pub struct DebugFn {
pos: Vec<Spanned<Expression>>,
key: Vec<Pair>,
body: Option<SyntaxTree>,
}
parse(args, body, ctx) {
DebugFn {
pos: args.iter_pos().collect(),
key: args.iter_keys().collect(),
body: parse!(optional: body, ctx),
}
}
layout() { vec![] }
}
impl DebugFn {
fn compare(&self, other: &DebugFn) -> bool {
self.pos.iter().zip(&other.pos).all(|(a, b)| a.v == b.v)
&& self.key.iter().zip(&other.key)
.all(|(a, b)| a.key.v == b.key.v && a.value.v == b.value.v)
&& match (&self.body, &other.body) {
(Some(a), Some(b)) => compare(a, b),
(None, None) => true,
_ => false,
}
}
}
fn downcast(func: &FuncCall) -> &DebugFn {
func.0.downcast::<DebugFn>().expect("not a debug fn")
}
fn compare(a: &SyntaxTree, b: &SyntaxTree) -> bool {
for (x, y) in a.nodes.iter().zip(&b.nodes) {
use node_shorthands::*;
let same = match (&x.v, &y.v) {
(S, S) | (N, N) | (I, I) | (B, B) | (M, M) => true,
(Text(t1), Text(t2)) => t1 == t2,
(Func(f1), Func(f2)) => {
downcast(f1).compare(downcast(f2))
}
_ => false,
};
if !same { return false; }
}
true
}
/// Parses the test syntax. /// Parses the test syntax.
macro_rules! tokens { macro_rules! tokens {
($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({ ($($task:ident $src:expr =>($line:expr)=> [$($tts:tt)*])*) => ({
#[allow(unused_mut)] #[allow(unused_mut)]
let mut cases = Vec::new(); let mut cases = Vec::new();
$(cases.push(($line, $src, tokens!(@$task [$($target)*])));)* $(cases.push(($line, $src, tokens!(@$task [$($tts)*])));)*
cases cases
}); });
(@t $tokens:expr) => ({ (@t [$($tts:tt)*]) => ({
Target::Tokenized($tokens.to_vec()) use token_shorthands::*;
Target::Tokenize(vec![$($tts)*])
}); });
(@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({ (@ts [$($tts:tt)*]) => ({
Target::TokenizedSpanned(vec![ use token_shorthands::*;
$(Spanned { v: $t, span: Span { Target::TokenizeSpanned(tokens!(@__spans [$($tts)*]))
});
(@p [$($tts:tt)*]) => ({
use node_shorthands::*;
let nodes = vec![$($tts)*].into_iter()
.map(|v| Spanned { v, span: Span::ZERO })
.collect();
Target::Parse(SyntaxTree { nodes })
});
(@ps [$($tts:tt)*]) => ({
use node_shorthands::*;
Target::ParseSpanned(tokens!(@__spans [$($tts)*]))
});
(@__spans [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)),* $(,)?]) => ({
vec![
$(Spanned { v: $v, span: Span {
start: Position { line: $sl, column: $sc }, start: Position { line: $sl, column: $sc },
end: Position { line: $el, column: $ec }, end: Position { line: $el, column: $ec },
}}),* }}),*
]) ]
}); });
} }
#[derive(Debug)] #[derive(Debug)]
enum Target { enum Target {
Tokenized(Vec<Token<'static>>), Tokenize(Vec<Token<'static>>),
TokenizedSpanned(Vec<Spanned<Token<'static>>>), TokenizeSpanned(Vec<Spanned<Token<'static>>>),
Parse(SyntaxTree),
ParseSpanned(SyntaxTree),
} }
fn main() { fn main() {
@ -75,6 +187,7 @@ fn main() {
println!(" - Source: {:?}", src); println!(" - Source: {:?}", src);
println!(" - Expected: {:?}", expected); println!(" - Expected: {:?}", expected);
println!(" - Found: {:?}", found); println!(" - Found: {:?}", found);
println!();
failed += 1; failed += 1;
errors = true; errors = true;
@ -98,14 +211,26 @@ fn main() {
fn test_case(src: &str, target: Target) -> (bool, String, String) { fn test_case(src: &str, target: Target) -> (bool, String, String) {
match target { match target {
Target::Tokenized(tokens) => { Target::Tokenize(tokens) => {
let found: Vec<_> = tokenize(src).map(Spanned::value).collect(); let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found)) (found == tokens, format!("{:?}", tokens), format!("{:?}", found))
} }
Target::TokenizedSpanned(tokens) => { Target::TokenizeSpanned(tokens) => {
let found: Vec<_> = tokenize(src).collect(); let found: Vec<_> = tokenize(src).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found)) (found == tokens, format!("{:?}", tokens), format!("{:?}", found))
} }
Target::Parse(tree) => {
let scope = Scope::with_debug::<DebugFn>();
let (found, _, errs) = parse(src, ParseContext { scope: &scope });
(compare(&tree, &found), format!("{:?}", tree), format!("{:?}", found))
}
Target::ParseSpanned(tree) => {
let scope = Scope::with_debug::<DebugFn>();
let (found, _, _) = parse(src, ParseContext { scope: &scope });
(tree == found, format!("{:?}", tree), format!("{:?}", found))
}
} }
} }

View File

@ -46,6 +46,12 @@ t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST]
t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB] t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")] t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
// Nested functions.
t "[f: [=][*]]" => [LB, ID("f"), CL, W(0), LB, EQ, RB, LB, ST, RB, RB]
t "[_][[,],]," => [LB, T("_"), RB, LB, LB, CM, RB, T(","), RB, T(",")]
t "[=][=][=]" => [LB, EQ, RB, LB, T("="), RB, LB, EQ, RB]
t "[=][[=][=][=]]" => [LB, EQ, RB, LB, LB, EQ, RB, LB, T("="), RB, LB, EQ, RB, RB]
// Escapes. // Escapes.
t r"\[" => [T("[")] t r"\[" => [T("[")]
t r"\]" => [T("]")] t r"\]" => [T("]")]

20
tests/parsing/trees.rs Normal file
View File

@ -0,0 +1,20 @@
p "" => []
p "hi" => [T("hi")]
p "hi you" => [T("hi"), S, T("you")]
p "\n\n 🌍" => [T(""), N, T("🌍")]
p "[func]" => [F!(None)]
p "[tree][hi *you*]" => [F!(Some([T("hi"), S, B, T("you"), B]))]
// p "from [align: left] to" => [
// T("from"), S,
// F!("align", pos=[ID("left")], None),
// S, T("to"),
// ]
// p "[box: x=1.2pt, false][a b c] bye" => [
// F!(
// "box",
// pos=[BOOL(false)],
// key=["x": SIZE(Size::pt(1.2))],
// Some([T("a"), S, T("b"), S, T("c")]),
// ),
// S, T("bye"),
// ]