Parser testing prototype 🥥

This commit is contained in:
Laurenz 2020-01-14 20:17:50 +01:00
parent dde69276d4
commit 15ad30555b
12 changed files with 698 additions and 504 deletions

View File

@ -9,9 +9,9 @@ build = "build.rs"
toddle = { path = "../toddle", default-features = false }
tide = { path = "../tide" }
byteorder = "1"
smallvec = "0.6.10"
unicode-xid = "0.1.0"
async-trait = "0.1.22"
smallvec = "1"
unicode-xid = "0.2"
async-trait = "0.1"
futures-executor = { version = "0.3", optional = true }
[features]

View File

@ -75,6 +75,8 @@ macro_rules! function {
parse($args:ident, $body:pat, $ctx:pat, $metadata:pat) $code:block
$($rest:tt)*
) => {
use $crate::func::prelude::*;
impl $crate::func::ParseFunc for $type {
type Meta = $meta;
@ -88,7 +90,8 @@ macro_rules! function {
let mut $args = args;
let val = $code;
if !$args.is_empty() {
error!(unexpected_argument);
return Err($crate::TypesetError
::with_message("unexpected arguments"));
}
Ok(val)
}
@ -109,6 +112,8 @@ macro_rules! function {
// (2-arg) Parse a layout-definition with all arguments.
(@layout $type:ident | layout($this:ident, $ctx:pat) $code:block) => {
use $crate::func::prelude::*;
impl LayoutFunc for $type {
fn layout<'a, 'life0, 'life1, 'async_trait>(
&'a $this,
@ -138,13 +143,13 @@ macro_rules! function {
macro_rules! parse {
(forbidden: $body:expr) => {
if $body.is_some() {
error!("unexpected body");
return Err($crate::TypesetError::with_message("unexpected body"));
}
};
(optional: $body:expr, $ctx:expr) => (
if let Some(body) = $body {
Some($crate::syntax::parse(body, $ctx))
Some($crate::syntax::parse(body, $ctx).0)
} else {
None
}
@ -152,9 +157,9 @@ macro_rules! parse {
(expected: $body:expr, $ctx:expr) => (
if let Some(body) = $body {
$crate::syntax::parse(body, $ctx)?
$crate::syntax::parse(body, $ctx).0
} else {
error!("expected body");
Err($crate::TypesetError::with_message("unexpected body"))
}
)
}

View File

@ -119,6 +119,7 @@ pub enum Command<'a> {
/// A map from identifiers to function parsers.
pub struct Scope {
parsers: HashMap<String, Box<Parser>>,
debug: Option<Box<Parser>>
}
/// A function which parses the source of a function into a function type which
@ -129,11 +130,30 @@ type Parser = dyn Fn(
ParseContext
) -> ParseResult<Box<dyn LayoutFunc>>;
fn make_parser<F>(metadata: <F as ParseFunc>::Meta) -> Box<Parser>
where F: ParseFunc + LayoutFunc + 'static {
Box::new(move |a, b, c| {
F::parse(a, b, c, metadata.clone())
.map(|f| Box::new(f) as Box<dyn LayoutFunc>)
})
}
impl Scope {
/// Create a new empty scope.
pub fn new() -> Scope {
Scope {
parsers: HashMap::new(),
debug: None,
}
}
/// Create a new scope with a debug parser that is invoked if not other
/// match is found.
pub fn with_debug<F>() -> Scope
where F: ParseFunc<Meta=()> + LayoutFunc + 'static {
Scope {
parsers: HashMap::new(),
debug: Some(make_parser::<F>(())),
}
}
@ -154,16 +174,14 @@ impl Scope {
where F: ParseFunc + LayoutFunc + 'static {
self.parsers.insert(
name.to_owned(),
Box::new(move |a, b, c| {
F::parse(a, b, c, metadata.clone())
.map(|f| Box::new(f) as Box<dyn LayoutFunc>)
})
make_parser::<F>(metadata),
);
}
/// Return the parser with the given name if there is one.
pub(crate) fn get_parser(&self, name: &str) -> Option<&Parser> {
self.parsers.get(name).map(|x| &**x)
.or(self.debug.as_ref().map(|x| &**x))
}
}

View File

@ -28,7 +28,7 @@ use toddle::Error as FontError;
use crate::func::Scope;
use crate::layout::{MultiLayout, LayoutResult};
use crate::syntax::{parse, SyntaxTree, ParseContext, Span, ParseResult};
use crate::syntax::{parse, SyntaxTree, Colorization, ErrorMap, ParseContext, Span};
use crate::style::{LayoutStyle, PageStyle, TextStyle};
#[macro_use]
@ -84,7 +84,7 @@ impl<'p> Typesetter<'p> {
}
/// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> SyntaxTree {
pub fn parse(&self, src: &str) -> (SyntaxTree, Colorization, ErrorMap) {
let scope = Scope::with_std();
parse(src, ParseContext { scope: &scope })
}
@ -115,7 +115,7 @@ impl<'p> Typesetter<'p> {
/// Process source code directly into a layout.
pub async fn typeset(&self, src: &str) -> TypesetResult<MultiLayout> {
let tree = self.parse(src);
let tree = self.parse(src).0;
let layout = self.layout(&tree).await?;
Ok(layout)
}
@ -132,8 +132,8 @@ pub struct TypesetError {
impl TypesetError {
/// Create a new typesetting error.
pub fn with_message(message: String) -> TypesetError {
TypesetError { message, span: None }
pub fn with_message(message: impl Into<String>) -> TypesetError {
TypesetError { message: message.into(), span: None }
}
}

View File

@ -1,28 +1,3 @@
/// Entities which can be colored by syntax highlighting.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColorToken {
Comment,
use super::*;
Bracket,
FuncName,
Colon,
Key,
Equals,
Comma,
Paren,
Brace,
ExprIdent,
ExprStr,
ExprNumber,
ExprSize,
ExprBool,
Bold,
Italic,
Monospace,
Invalid,
}

View File

@ -91,12 +91,6 @@ pub struct Object {
pub pairs: Vec<Pair>,
}
#[derive(Clone, PartialEq)]
pub struct Pair {
pub key: Spanned<Ident>,
pub value: Spanned<Expression>,
}
impl Object {
pub fn new() -> Object {
Object { pairs: vec![] }
@ -120,7 +114,7 @@ impl Display for Object {
if !first {
write!(f, ", ")?;
}
write!(f, "{}: {}", pair.key.v, pair.value.v)?;
write!(f, "{}", pair)?;
first = false;
}
@ -128,10 +122,23 @@ impl Display for Object {
}
}
#[derive(Clone, PartialEq)]
pub struct Pair {
pub key: Spanned<Ident>,
pub value: Spanned<Expression>,
}
impl Display for Pair {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}: {}", self.key.v, self.value.v)
}
}
debug_display!(Ident);
debug_display!(Expression);
debug_display!(Tuple);
debug_display!(Object);
debug_display!(Pair);
/// Kinds of expressions.

View File

@ -14,3 +14,247 @@ pub_use_mod!(expr);
pub_use_mod!(tokens);
pub_use_mod!(parsing);
pub_use_mod!(span);
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Whitespace(usize),
/// A line comment with inner string contents `//<&'s str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// An erroneous `*/` without an opening block comment.
StarSlash,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `:`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
ExprIdent(&'s str),
/// A quoted string in a function header: `"..."`.
ExprStr(&'s str),
/// A number in a function header: `3.14`.
ExprNumber(f64),
/// A size in a function header: `12pt`.
ExprSize(Size),
/// A boolean in a function header: `true | false`.
ExprBool(bool),
/// A star in body-text.
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// Any other consecutive string.
Text(&'s str),
}
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
pub nodes: Vec<Spanned<Node>>,
}
impl SyntaxTree {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] }
}
/// Add a node to the tree.
pub fn add(&mut self, node: Spanned<Node>) {
self.nodes.push(node);
}
}
/// A node in the syntax tree.
#[derive(Debug, PartialEq)]
pub enum Node {
/// A number of whitespace characters containing less than two newlines.
Space,
/// Whitespace characters with more than two newlines.
Newline,
/// Plain text.
Text(String),
/// Italics enabled / disabled.
ToggleItalic,
/// Bolder enabled / disabled.
ToggleBolder,
/// Monospace enabled / disabled.
ToggleMonospace,
/// A function invocation.
Func(FuncCall),
}
/// An invocation of a function.
#[derive(Debug)]
pub struct FuncCall(pub Box<dyn LayoutFunc>);
impl PartialEq for FuncCall {
fn eq(&self, other: &FuncCall) -> bool {
&self.0 == &other.0
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Colorization {
pub colors: Vec<Spanned<ColorToken>>,
}
/// Entities which can be colored by syntax highlighting.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ColorToken {
Comment,
Bracket,
FuncName,
Colon,
Key,
Equals,
Comma,
Paren,
Brace,
ExprIdent,
ExprStr,
ExprNumber,
ExprSize,
ExprBool,
Bold,
Italic,
Monospace,
Invalid,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct ErrorMap {
pub errors: Vec<Spanned<String>>,
}
#[derive(Debug)]
pub struct FuncHeader {
pub name: Spanned<Ident>,
pub args: FuncArgs,
}
#[derive(Debug)]
pub struct FuncArgs {
positional: Tuple,
keyword: Object,
}
impl FuncArgs {
fn new() -> FuncArgs {
FuncArgs {
positional: Tuple::new(),
keyword: Object::new(),
}
}
/// Add a positional argument.
pub fn add_pos(&mut self, item: Spanned<Expression>) {
self.positional.add(item);
}
/// Force-extract the first positional argument.
pub fn get_pos<E: ExpressionKind>(&mut self) -> ParseResult<E> {
expect(self.get_pos_opt())
}
/// Extract the first positional argument.
pub fn get_pos_opt<E: ExpressionKind>(&mut self) -> ParseResult<Option<E>> {
Ok(if !self.positional.items.is_empty() {
let spanned = self.positional.items.remove(0);
Some(E::from_expr(spanned)?)
} else {
None
})
}
/// Add a keyword argument.
pub fn add_key(&mut self, key: Spanned<Ident>, value: Spanned<Expression>) {
self.keyword.add(key, value);
}
/// Add a keyword argument from an existing pair.
pub fn add_key_pair(&mut self, pair: Pair) {
self.keyword.add_pair(pair);
}
/// Force-extract a keyword argument.
pub fn get_key<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<E> {
expect(self.get_key_opt(name))
}
/// Extract a keyword argument.
pub fn get_key_opt<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<Option<E>> {
self.keyword.pairs.iter()
.position(|p| p.key.v.0 == name)
.map(|index| {
let value = self.keyword.pairs.swap_remove(index).value;
E::from_expr(value)
})
.transpose()
}
/// Iterator over positional arguments.
pub fn iter_pos(&mut self) -> std::vec::IntoIter<Spanned<Expression>> {
let tuple = std::mem::replace(&mut self.positional, Tuple::new());
tuple.items.into_iter()
}
/// Iterator over all keyword arguments.
pub fn iter_keys(&mut self) -> std::vec::IntoIter<Pair> {
let object = std::mem::replace(&mut self.keyword, Object::new());
object.pairs.into_iter()
}
/// Clear the argument lists.
pub fn clear(&mut self) {
self.positional.items.clear();
self.keyword.pairs.clear();
}
/// Whether both the positional and keyword argument lists are empty.
pub fn is_empty(&self) -> bool {
self.positional.items.is_empty() && self.keyword.pairs.is_empty()
}
}
/// Extract the option expression kind from the option or return an error.
fn expect<E: ExpressionKind>(opt: ParseResult<Option<E>>) -> ParseResult<E> {
match opt {
Ok(Some(spanned)) => Ok(spanned),
Ok(None) => error!("expected {}", E::NAME),
Err(e) => Err(e),
}
}

View File

@ -1,147 +1,10 @@
use std::iter::Peekable;
use crate::func::Scope;
use super::*;
use Token::*;
/// A tree representation of source code.
#[derive(Debug, PartialEq)]
pub struct SyntaxTree {
pub nodes: Vec<Spanned<Node>>,
}
impl SyntaxTree {
/// Create an empty syntax tree.
pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] }
}
}
/// A node in the syntax tree.
#[derive(Debug, PartialEq)]
pub enum Node {
/// A number of whitespace characters containing less than two newlines.
Space,
/// Whitespace characters with more than two newlines.
Newline,
/// Plain text.
Text(String),
/// Italics enabled / disabled.
ToggleItalic,
/// Bolder enabled / disabled.
ToggleBolder,
/// Monospace enabled / disabled.
ToggleMonospace,
/// A function invocation.
Func(FuncCall),
}
/// An invocation of a function.
#[derive(Debug)]
pub struct FuncCall(pub Box<dyn LayoutFunc>);
impl PartialEq for FuncCall {
fn eq(&self, other: &FuncCall) -> bool {
&self.0 == &other.0
}
}
#[derive(Debug)]
pub struct FuncArgs {
positional: Tuple,
keyword: Object,
}
impl FuncArgs {
fn new() -> FuncArgs {
FuncArgs {
positional: Tuple::new(),
keyword: Object::new(),
}
}
/// Add a positional argument.
pub fn add_pos(&mut self, item: Spanned<Expression>) {
self.positional.add(item);
}
/// Force-extract the first positional argument.
pub fn get_pos<E: ExpressionKind>(&mut self) -> ParseResult<E> {
expect(self.get_pos_opt())
}
/// Extract the first positional argument.
pub fn get_pos_opt<E: ExpressionKind>(&mut self) -> ParseResult<Option<E>> {
Ok(if !self.positional.items.is_empty() {
let spanned = self.positional.items.remove(0);
Some(E::from_expr(spanned)?)
} else {
None
})
}
/// Add a keyword argument.
pub fn add_key(&mut self, key: Spanned<Ident>, value: Spanned<Expression>) {
self.keyword.add(key, value);
}
/// Add a keyword argument from an existing pair.
pub fn add_key_pair(&mut self, pair: Pair) {
self.keyword.add_pair(pair);
}
/// Force-extract a keyword argument.
pub fn get_key<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<E> {
expect(self.get_key_opt(name))
}
/// Extract a keyword argument.
pub fn get_key_opt<E: ExpressionKind>(&mut self, name: &str) -> ParseResult<Option<E>> {
self.keyword.pairs.iter()
.position(|p| p.key.v.0 == name)
.map(|index| {
let value = self.keyword.pairs.swap_remove(index).value;
E::from_expr(value)
})
.transpose()
}
/// Iterator over positional arguments.
pub fn iter_pos(&mut self) -> std::vec::IntoIter<Spanned<Expression>> {
let tuple = std::mem::replace(&mut self.positional, Tuple::new());
tuple.items.into_iter()
}
/// Iterator over all keyword arguments.
pub fn iter_keys(&mut self) -> std::vec::IntoIter<Pair> {
let object = std::mem::replace(&mut self.keyword, Object::new());
object.pairs.into_iter()
}
/// Clear the argument lists.
pub fn clear(&mut self) {
self.positional.items.clear();
self.keyword.pairs.clear();
}
/// Whether both the positional and keyword argument lists are empty.
pub fn is_empty(&self) -> bool {
self.positional.items.is_empty() && self.keyword.pairs.is_empty()
}
}
/// Extract the option expression kind from the option or return an error.
fn expect<E: ExpressionKind>(opt: ParseResult<Option<E>>) -> ParseResult<E> {
match opt {
Ok(Some(spanned)) => Ok(spanned),
Ok(None) => error!("expected {}", E::NAME),
Err(e) => Err(e),
}
}
/// Parses source code into a syntax tree given a context.
pub fn parse(src: &str, ctx: ParseContext) -> SyntaxTree {
pub fn parse(src: &str, ctx: ParseContext) -> (SyntaxTree, Colorization, ErrorMap) {
Parser::new(src, ctx).parse()
}
@ -155,16 +18,13 @@ pub struct ParseContext<'a> {
struct Parser<'s> {
src: &'s str,
ctx: ParseContext<'s>,
tokens: Peekable<Tokens<'s>>,
errors: Vec<Spanned<String>>,
colored: Vec<Spanned<ColorToken>>,
span: Span,
}
colorization: Colorization,
error_map: ErrorMap,
macro_rules! defer {
($($tts:tt)*) => (
unimplemented!()
);
tokens: Tokens<'s>,
peeked: Option<Option<Spanned<Token<'s>>>>,
position: Position,
last_position: Position,
}
impl<'s> Parser<'s> {
@ -172,81 +32,128 @@ impl<'s> Parser<'s> {
Parser {
src,
ctx,
tokens: Tokens::new(src).peekable(),
errors: vec![],
colored: vec![],
span: Span::ZERO,
error_map: ErrorMap { errors: vec![] },
colorization: Colorization { colors: vec![] },
tokens: Tokens::new(src),
peeked: None,
position: Position::ZERO,
last_position: Position::ZERO,
}
}
fn parse(mut self) -> SyntaxTree {
fn parse(mut self) -> (SyntaxTree, Colorization, ErrorMap) {
let mut tree = SyntaxTree::new();
loop {
self.skip_whitespace();
if let Some(spanned) = self.eat() {
match spanned.v {
LineComment(_) | BlockComment(_) => {}
let start = self.position();
Whitespace(newlines) => {
tree.add(spanned.map_v(if newlines >= 2 {
Node::Newline
} else {
Node::Space
}));
}
let node = match self.next() {
Some(LeftBracket) => self.parse_func().map(|f| Node::Func(f)),
Some(Star) => Some(Node::ToggleBolder),
Some(Underscore) => Some(Node::ToggleItalic),
Some(Backtick) => Some(Node::ToggleMonospace),
Some(Text(text)) => Some(Node::Text(text.to_owned())),
Some(other) => { self.unexpected(other); None },
None => break,
};
LeftBracket => {
if let Some(func) = self.parse_func() {
tree.add(func);
}
}
if let Some(node) = node {
let end = self.position();
Star => tree.add(spanned.map_v(Node::ToggleBolder)),
Underscore => tree.add(spanned.map_v(Node::ToggleItalic)),
Backtick => tree.add(spanned.map_v(Node::ToggleMonospace)),
Text(text) => tree.add(spanned.map_v(Node::Text(text.to_owned()))),
_ => self.unexpected(spanned),
}
} else {
break;
}
}
(tree, self.colorization, self.error_map)
}
fn parse_func(&mut self) -> Option<Spanned<Node>> {
let start = self.last_pos();
let header = self.parse_func_header();
let call = self.parse_func_call(header)?;
let end = self.pos();
let span = Span { start, end };
tree.nodes.push(Spanned { v: node, span });
}
Some(Spanned { v: Node::Func(call), span })
}
tree
}
fn parse_func(&mut self) -> Option<FuncCall> {
let (name, args) = self.parse_func_header()?;
self.parse_func_call(name, args)
}
fn parse_func_header(&mut self) -> Option<(Spanned<Ident>, FuncArgs)> {
defer! { self.eat_until(|t| t == RightBracket, true); }
fn parse_func_header(&mut self) -> Option<FuncHeader> {
self.skip_whitespace();
let name = self.parse_func_name()?;
let name = self.parse_func_name().or_else(|| {
self.eat_until(|t| t == RightBracket, true);
None
})?;
self.skip_whitespace();
let args = match self.next() {
Some(Colon) => self.parse_func_args(),
Some(RightBracket) => FuncArgs::new(),
let args = match self.eat() {
Some(Spanned { v: Colon, .. }) => self.parse_func_args(),
Some(Spanned { v: RightBracket, .. }) => FuncArgs::new(),
other => {
self.expected("colon or closing bracket", other);
self.eat_until(|t| t == RightBracket, true);
FuncArgs::new()
}
};
Some((name, args))
Some(FuncHeader { name, args })
}
fn parse_func_call(
&mut self,
name: Spanned<Ident>,
args: FuncArgs,
) -> Option<FuncCall> {
unimplemented!()
fn parse_func_call(&mut self, header: Option<FuncHeader>) -> Option<FuncCall> {
println!("peek: {:?}", self.peek());
let body = if self.peek() == Some(LeftBracket) {
self.eat();
let start = self.tokens.index();
let found = self.tokens.move_to_closing_bracket();
let end = self.tokens.index();
self.last_position = self.position;
self.position = self.tokens.pos();
let body = &self.src[start .. end];
if found {
assert_eq!(self.eat().map(Spanned::value), Some(RightBracket));
} else {
self.error_here("expected closing bracket");
}
Some(body)
} else {
None
};
let header = header?;
let name = header.name;
let parser = self.ctx.scope.get_parser(name.v.as_str()).or_else(|| {
self.error(format!("unknown function: `{}`", name.v), name.span);
None
})?;
Some(FuncCall(parser(header.args, body, self.ctx).unwrap()))
}
fn parse_func_name(&mut self) -> Option<Spanned<Ident>> {
match self.next() {
Some(ExprIdent(ident)) => {
self.color_span(ColorToken::FuncName, self.span(), true);
Some(Spanned { v: Ident(ident.to_string()), span: self.span() })
match self.eat() {
Some(Spanned { v: ExprIdent(ident), span }) => {
self.color(Spanned { v: ColorToken::FuncName, span }, true);
Some(Spanned { v: Ident(ident.to_string()), span })
}
other => {
self.expected("identifier", other);
@ -256,119 +163,16 @@ impl<'s> Parser<'s> {
}
fn parse_func_args(&mut self) -> FuncArgs {
enum State {
Start,
Identifier(Spanned<Ident>),
Assignment(Spanned<Ident>),
Value,
// unimplemented!()
FuncArgs::new()
}
impl State {
fn expected(&self) -> &'static str {
match self {
State::Start => "value or key",
State::Identifier(_) => "comma or assignment",
State::Assignment(_) => "value",
State::Value => "comma",
}
}
fn parse_tuple(&mut self) -> Spanned<Expression> {
unimplemented!("parse_tuple")
}
let mut args = FuncArgs::new();
let mut state = State::Start;
loop {
self.skip_whitespace();
/*
let token = self.next();
match token {
Some(ExprIdent(ident)) => match state {
State::Start => {
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
State::Identifier(prev) => {
self.expected(state.expected(), token);
args.add_pos(prev.map(|id| Expression::Ident(id)));
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
State::Assignment(key) => {
let span = Span::merge(key.span, self.span());
args.add_key(Spanned::new(KeyArg {
key,
value: Spanned {
v: Expression::Ident(Ident(ident.to_string())),
span: self.span(),
},
}, span));
state = State::Value;
}
State::Value => {
self.expected(state.expected(), token);
state = State::Identifier(Spanned {
v: Ident(ident.to_string()),
span: self.span(),
});
}
}
// Handle expressions.
Some(Expr(_)) | Some(LeftParen) | Some(LeftBrace) => {
let expr = match token.unwrap() {
Expr(e) => e,
LeftParen => self.parse_tuple(),
LeftBrace => self.parse_object(),
_ => unreachable!(),
}
}
// Handle commas after values.
Some(Comma) => match state {
State::Identifier(ident) => {
args.add_pos(ident.map(|id| Expression::Ident(id)));
state = State::Start;
}
State::Value => state = State::Start,
_ => self.expected(state.expected(), token),
}
// Handle the end of the function header.
Some(RightBracket) => {
match state {
State::Identifier(ident) => {
args.add_pos(ident.map(|id| Expression::Ident(id)));
}
State::Assignment(_) => {
self.expected(state.expected(), token);
}
_ => {}
}
break;
}
}
*/
}
args
}
fn handle_expr(&mut self, expr: Spanned<Expression>) {
}
fn parse_tuple(&mut self) -> Spanned<Tuple> {
unimplemented!()
}
fn parse_object(&mut self) -> Spanned<Object> {
unimplemented!()
fn parse_object(&mut self) -> Spanned<Expression> {
unimplemented!("parse_object")
}
fn skip_whitespace(&mut self) {
@ -378,58 +182,42 @@ impl<'s> Parser<'s> {
}, false)
}
fn eat_until<F>(&mut self, mut f: F, eat_match: bool)
where F: FnMut(Token<'s>) -> bool {
while let Some(token) = self.tokens.peek() {
if f(token.v) {
if eat_match {
self.next();
}
break;
}
self.next();
}
}
fn next(&mut self) -> Option<Token<'s>> {
self.tokens.next().map(|spanned| {
self.color_token(&spanned.v, spanned.span);
self.span = spanned.span;
spanned.v
})
}
fn span(&self) -> Span {
self.span
}
fn position(&self) -> Position {
self.span.end
}
fn unexpected(&mut self, found: Token) {
self.errors.push(Spanned {
v: format!("unexpected {}", name(found)),
span: self.span(),
});
}
fn expected(&mut self, thing: &str, found: Option<Token>) {
let message = if let Some(found) = found {
format!("expected {}, found {}", thing, name(found))
fn expected(&mut self, thing: &str, found: Option<Spanned<Token>>) {
if let Some(Spanned { v: found, span }) = found {
self.error(
format!("expected {}, found {}", thing, name(found)),
span
);
} else {
format!("expected {}", thing)
};
self.errors.push(Spanned {
v: message,
span: self.span(),
});
self.error_here(format!("expected {}", thing));
}
}
fn color_token(&mut self, token: &Token<'s>, span: Span) {
let colored = match token {
fn unexpected(&mut self, found: Spanned<Token>) {
self.error_map.errors.push(found.map(|t| format!("unexpected {}", name(t))));
}
fn error(&mut self, message: impl Into<String>, span: Span) {
self.error_map.errors.push(Spanned { v: message.into(), span });
}
fn error_here(&mut self, message: impl Into<String>) {
self.error(message, Span::at(self.pos()));
}
fn color(&mut self, token: Spanned<ColorToken>, replace_last: bool) {
if replace_last {
if let Some(last) = self.colorization.colors.last_mut() {
*last = token;
return;
}
}
self.colorization.colors.push(token);
}
fn color_token(&mut self, token: Spanned<Token<'s>>) {
let colored = match token.v {
LineComment(_) | BlockComment(_) => Some(ColorToken::Comment),
StarSlash => Some(ColorToken::Invalid),
LeftBracket | RightBracket => Some(ColorToken::Bracket),
@ -447,21 +235,49 @@ impl<'s> Parser<'s> {
};
if let Some(color) = colored {
self.colored.push(Spanned { v: color, span });
self.colorization.colors.push(Spanned { v: color, span: token.span });
}
}
fn color_span(&mut self, color: ColorToken, span: Span, replace_last: bool) {
let token = Spanned { v: color, span };
fn eat_until<F>(&mut self, mut f: F, eat_match: bool)
where F: FnMut(Token<'s>) -> bool {
while let Some(token) = self.peek() {
if f(token) {
if eat_match {
self.eat();
}
break;
}
if replace_last {
if let Some(last) = self.colored.last_mut() {
*last = token;
return;
self.eat();
}
}
self.colored.push(token);
fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
let token = self.peeked.take().unwrap_or_else(|| self.tokens.next());
self.last_position = self.position;
if let Some(spanned) = token {
self.color_token(spanned);
self.position = spanned.span.end;
}
token
}
fn peek(&mut self) -> Option<Token<'s>> {
let iter = &mut self.tokens;
self.peeked
.get_or_insert_with(|| iter.next())
.map(Spanned::value)
}
fn pos(&self) -> Position {
self.position
}
fn last_pos(&self) -> Position {
self.last_position
}
}

View File

@ -6,64 +6,6 @@ use Token::*;
use State::*;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Whitespace(usize),
/// A line comment with inner string contents `//<&'s str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<&'s str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// An erroneous `*/` without an opening block comment.
StarSlash,
/// A left bracket: `[`.
LeftBracket,
/// A right bracket: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `:`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
ExprIdent(&'s str),
/// A quoted string in a function header: `"..."`.
ExprStr(&'s str),
/// A number in a function header: `3.14`.
ExprNumber(f64),
/// A size in a function header: `12pt`.
ExprSize(Size),
/// A boolean in a function header: `true | false`.
ExprBool(bool),
/// A star in body-text.
Star,
/// An underscore in body-text.
Underscore,
/// A backtick in body-text.
Backtick,
/// Any other consecutive string.
Text(&'s str),
}
/// Decomposes text into a sequence of semantic tokens.
pub fn tokenize(src: &str) -> Tokens {
Tokens::new(src)
@ -97,6 +39,47 @@ impl<'s> Tokens<'s> {
index: 0,
}
}
/// The index in the string at which the last token ends and next token will
/// start.
pub fn index(&self) -> usize {
self.index
}
/// The line-colunn position in the source at which the last token ends and
/// next token will start.
pub fn pos(&self) -> Position {
self.position
}
/// Move through the string until an unbalanced closing bracket is found
/// without tokenizing the contents.
///
/// Returns whether a closing bracket was found or the end of the string was
/// reached.
pub fn move_to_closing_bracket(&mut self) -> bool {
let mut escaped = false;
let mut depth = 0;
self.read_string_until(|n| {
match n {
'[' if !escaped => depth += 1,
']' if !escaped => {
if depth == 0 {
return true;
} else {
depth -= 1;
}
}
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
}, false, 0, 0);
self.peek() == Some(']')
}
}
impl<'s> Iterator for Tokens<'s> {
@ -118,8 +101,13 @@ impl<'s> Iterator for Tokens<'s> {
// Functions.
'[' => {
if self.state == Header || self.state == Body {
self.stack.push(self.state);
self.state = Header;
} else {
self.state = Body;
}
LeftBracket
}
']' => {
@ -221,12 +209,10 @@ impl<'s> Tokens<'s> {
fn parse_string(&mut self) -> Token<'s> {
let mut escaped = false;
ExprStr(self.read_string_until(|n| {
if n == '"' && !escaped {
return true;
} else if n == '\\' {
escaped = !escaped;
} else {
escaped = false;
match n {
'"' if !escaped => return true,
'\\' => escaped = !escaped,
_ => escaped = false,
}
false
@ -316,14 +302,6 @@ impl<'s> Tokens<'s> {
fn peek(&mut self) -> Option<char> {
self.iter.peek().copied()
}
fn index(&self) -> usize {
self.index
}
fn pos(&self) -> Position {
self.position
}
}
fn parse_percentage(text: &str) -> Option<f64> {

View File

@ -1,9 +1,15 @@
#![allow(unused_imports)]
#![allow(dead_code)]
#![allow(non_snake_case)]
use typstc::func::Scope;
use typstc::size::Size;
use typstc::syntax::*;
use Token::{
use typstc::{function, parse};
mod token_shorthands {
pub use super::Token::{
Whitespace as W,
LineComment as LC, BlockComment as BC, StarSlash as SS,
LeftBracket as LB, RightBracket as RB,
@ -14,34 +20,140 @@ use Token::{
ExprNumber as NUM, ExprBool as BOOL,
Star as ST, Underscore as U, Backtick as B, Text as T,
};
}
mod node_shorthands {
use super::Node;
pub use Node::{
Space as S, Newline as N, Text,
ToggleItalic as I, ToggleBolder as B, ToggleMonospace as M,
Func,
};
pub fn T(text: &str) -> Node { Node::Text(text.to_string()) }
}
macro_rules! F {
(@body None) => (None);
(@body Some([$($tts:tt)*])) => ({
let nodes = vec![$($tts)*].into_iter()
.map(|v| Spanned { v, span: Span::ZERO })
.collect();
Some(SyntaxTree { nodes })
});
($($body:tt)*) => ({
Func(FuncCall(Box::new(DebugFn {
pos: vec![],
key: vec![],
body: F!(@body $($body)*),
})))
});
}
function! {
#[derive(Debug, PartialEq)]
pub struct DebugFn {
pos: Vec<Spanned<Expression>>,
key: Vec<Pair>,
body: Option<SyntaxTree>,
}
parse(args, body, ctx) {
DebugFn {
pos: args.iter_pos().collect(),
key: args.iter_keys().collect(),
body: parse!(optional: body, ctx),
}
}
layout() { vec![] }
}
impl DebugFn {
fn compare(&self, other: &DebugFn) -> bool {
self.pos.iter().zip(&other.pos).all(|(a, b)| a.v == b.v)
&& self.key.iter().zip(&other.key)
.all(|(a, b)| a.key.v == b.key.v && a.value.v == b.value.v)
&& match (&self.body, &other.body) {
(Some(a), Some(b)) => compare(a, b),
(None, None) => true,
_ => false,
}
}
}
fn downcast(func: &FuncCall) -> &DebugFn {
func.0.downcast::<DebugFn>().expect("not a debug fn")
}
fn compare(a: &SyntaxTree, b: &SyntaxTree) -> bool {
for (x, y) in a.nodes.iter().zip(&b.nodes) {
use node_shorthands::*;
let same = match (&x.v, &y.v) {
(S, S) | (N, N) | (I, I) | (B, B) | (M, M) => true,
(Text(t1), Text(t2)) => t1 == t2,
(Func(f1), Func(f2)) => {
downcast(f1).compare(downcast(f2))
}
_ => false,
};
if !same { return false; }
}
true
}
/// Parses the test syntax.
macro_rules! tokens {
($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({
($($task:ident $src:expr =>($line:expr)=> [$($tts:tt)*])*) => ({
#[allow(unused_mut)]
let mut cases = Vec::new();
$(cases.push(($line, $src, tokens!(@$task [$($target)*])));)*
$(cases.push(($line, $src, tokens!(@$task [$($tts)*])));)*
cases
});
(@t $tokens:expr) => ({
Target::Tokenized($tokens.to_vec())
(@t [$($tts:tt)*]) => ({
use token_shorthands::*;
Target::Tokenize(vec![$($tts)*])
});
(@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({
Target::TokenizedSpanned(vec![
$(Spanned { v: $t, span: Span {
(@ts [$($tts:tt)*]) => ({
use token_shorthands::*;
Target::TokenizeSpanned(tokens!(@__spans [$($tts)*]))
});
(@p [$($tts:tt)*]) => ({
use node_shorthands::*;
let nodes = vec![$($tts)*].into_iter()
.map(|v| Spanned { v, span: Span::ZERO })
.collect();
Target::Parse(SyntaxTree { nodes })
});
(@ps [$($tts:tt)*]) => ({
use node_shorthands::*;
Target::ParseSpanned(tokens!(@__spans [$($tts)*]))
});
(@__spans [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)),* $(,)?]) => ({
vec![
$(Spanned { v: $v, span: Span {
start: Position { line: $sl, column: $sc },
end: Position { line: $el, column: $ec },
}}),*
])
]
});
}
#[derive(Debug)]
enum Target {
Tokenized(Vec<Token<'static>>),
TokenizedSpanned(Vec<Spanned<Token<'static>>>),
Tokenize(Vec<Token<'static>>),
TokenizeSpanned(Vec<Spanned<Token<'static>>>),
Parse(SyntaxTree),
ParseSpanned(SyntaxTree),
}
fn main() {
@ -75,6 +187,7 @@ fn main() {
println!(" - Source: {:?}", src);
println!(" - Expected: {:?}", expected);
println!(" - Found: {:?}", found);
println!();
failed += 1;
errors = true;
@ -98,14 +211,26 @@ fn main() {
fn test_case(src: &str, target: Target) -> (bool, String, String) {
match target {
Target::Tokenized(tokens) => {
Target::Tokenize(tokens) => {
let found: Vec<_> = tokenize(src).map(Spanned::value).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found))
}
Target::TokenizedSpanned(tokens) => {
Target::TokenizeSpanned(tokens) => {
let found: Vec<_> = tokenize(src).collect();
(found == tokens, format!("{:?}", tokens), format!("{:?}", found))
}
Target::Parse(tree) => {
let scope = Scope::with_debug::<DebugFn>();
let (found, _, errs) = parse(src, ParseContext { scope: &scope });
(compare(&tree, &found), format!("{:?}", tree), format!("{:?}", found))
}
Target::ParseSpanned(tree) => {
let scope = Scope::with_debug::<DebugFn>();
let (found, _, _) = parse(src, ParseContext { scope: &scope });
(tree == found, format!("{:?}", tree), format!("{:?}", found))
}
}
}

View File

@ -46,6 +46,12 @@ t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST]
t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB]
t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
// Nested functions.
t "[f: [=][*]]" => [LB, ID("f"), CL, W(0), LB, EQ, RB, LB, ST, RB, RB]
t "[_][[,],]," => [LB, T("_"), RB, LB, LB, CM, RB, T(","), RB, T(",")]
t "[=][=][=]" => [LB, EQ, RB, LB, T("="), RB, LB, EQ, RB]
t "[=][[=][=][=]]" => [LB, EQ, RB, LB, LB, EQ, RB, LB, T("="), RB, LB, EQ, RB, RB]
// Escapes.
t r"\[" => [T("[")]
t r"\]" => [T("]")]

20
tests/parsing/trees.rs Normal file
View File

@ -0,0 +1,20 @@
p "" => []
p "hi" => [T("hi")]
p "hi you" => [T("hi"), S, T("you")]
p "\n\n 🌍" => [T(""), N, T("🌍")]
p "[func]" => [F!(None)]
p "[tree][hi *you*]" => [F!(Some([T("hi"), S, B, T("you"), B]))]
// p "from [align: left] to" => [
// T("from"), S,
// F!("align", pos=[ID("left")], None),
// S, T("to"),
// ]
// p "[box: x=1.2pt, false][a b c] bye" => [
// F!(
// "box",
// pos=[BOOL(false)],
// key=["x": SIZE(Size::pt(1.2))],
// Some([T("a"), S, T("b"), S, T("c")]),
// ),
// S, T("bye"),
// ]