Basic node spans

This commit is contained in:
Laurenz 2019-10-30 22:52:25 +01:00
parent b5d8b8f4a5
commit 65ec3764e5
5 changed files with 161 additions and 95 deletions

View File

@ -42,6 +42,13 @@ pub trait Function: FunctionBounds {
fn layout(&self, ctx: LayoutContext) -> LayoutResult<CommandList>; fn layout(&self, ctx: LayoutContext) -> LayoutResult<CommandList>;
} }
impl dyn Function {
/// Downcast a dynamic function to a concrete function type.
pub fn downcast<F>(&self) -> Option<&F> where F: Function + 'static {
self.help_cast_as_any().downcast_ref::<F>()
}
}
impl PartialEq for dyn Function { impl PartialEq for dyn Function {
fn eq(&self, other: &dyn Function) -> bool { fn eq(&self, other: &dyn Function) -> bool {
self.help_eq(other) self.help_eq(other)

View File

@ -38,7 +38,7 @@ impl<'a, 'p> TreeLayouter<'a, 'p> {
/// Layout the tree into a box. /// Layout the tree into a box.
fn layout(&mut self, tree: &SyntaxTree) -> LayoutResult<()> { fn layout(&mut self, tree: &SyntaxTree) -> LayoutResult<()> {
for node in &tree.nodes { for node in &tree.nodes {
match node { match &node.val {
Node::Text(text) => { Node::Text(text) => {
let layout = self.layout_text(text)?; let layout = self.layout_text(text)?;
self.flex.add(layout); self.flex.add(layout);

View File

@ -56,12 +56,11 @@ pub enum Token<'s> {
/// A tree representation of source code. /// A tree representation of source code.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct SyntaxTree { pub struct SyntaxTree {
pub nodes: Vec<Node>, pub nodes: Vec<Spanned<Node>>,
} }
impl SyntaxTree { impl SyntaxTree {
/// Create an empty syntax tree. /// Create an empty syntax tree.
#[inline]
pub fn new() -> SyntaxTree { pub fn new() -> SyntaxTree {
SyntaxTree { nodes: vec![] } SyntaxTree { nodes: vec![] }
} }
@ -130,6 +129,8 @@ impl Display for Expression {
} }
} }
/// Annotates a value with the part of the source code it corresponds to.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Spanned<T> { pub struct Spanned<T> {
pub val: T, pub val: T,
pub span: Span, pub span: Span,
@ -141,6 +142,8 @@ impl<T> Spanned<T> {
} }
} }
/// Describes a slice of source code.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Span { pub struct Span {
pub start: usize, pub start: usize,
pub end: usize, pub end: usize,
@ -154,4 +157,13 @@ impl Span {
pub fn at(index: usize) -> Span { pub fn at(index: usize) -> Span {
Span { start: index, end: index + 1 } Span { start: index, end: index + 1 }
} }
pub fn pair(&self) -> (usize, usize) {
(self.start, self.end)
}
pub fn expand(&mut self, other: Span) {
self.start = self.start.min(other.start);
self.end = self.end.max(other.end);
}
} }

View File

@ -24,21 +24,20 @@ pub struct ParseContext<'a> {
struct Parser<'s> { struct Parser<'s> {
src: &'s str, src: &'s str,
tokens: PeekableTokens<'s>, tokens: PeekableTokens<'s>,
state: ParserState,
ctx: ParseContext<'s>, ctx: ParseContext<'s>,
tree: SyntaxTree, tree: SyntaxTree,
} }
/// The state the parser is in.
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum ParserState { enum NewlineState {
/// The base state of the parser. /// No newline yet.
Body, Zero,
/// We saw one newline already and are looking for another. /// We saw one newline with the given span already and are
FirstNewline, /// looking for another.
One(Span),
/// We saw at least two newlines and wrote one, thus not /// We saw at least two newlines and wrote one, thus not
/// writing another one for more newlines. /// writing another one for more newlines.
WroteNewline, TwoOrMore,
} }
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
@ -47,7 +46,6 @@ impl<'s> Parser<'s> {
Parser { Parser {
src, src,
tokens: PeekableTokens::new(tokenize(src)), tokens: PeekableTokens::new(tokenize(src)),
state: ParserState::Body,
ctx, ctx,
tree: SyntaxTree::new(), tree: SyntaxTree::new(),
} }
@ -68,18 +66,18 @@ impl<'s> Parser<'s> {
use Token::*; use Token::*;
if let Some(token) = self.tokens.peek() { if let Some(token) = self.tokens.peek() {
match token { match token.val {
// Functions. // Functions.
LeftBracket => self.parse_func()?, LeftBracket => self.parse_func()?,
RightBracket => return Err(ParseError::new("unexpected closing bracket")), RightBracket => return Err(ParseError::new("unexpected closing bracket")),
// Modifiers. // Modifiers.
Underscore => self.append_consumed(Node::ToggleItalics), Underscore => self.append_consumed(Node::ToggleItalics, token.span),
Star => self.append_consumed(Node::ToggleBold), Star => self.append_consumed(Node::ToggleBold, token.span),
Backtick => self.append_consumed(Node::ToggleMonospace), Backtick => self.append_consumed(Node::ToggleMonospace, token.span),
// Normal text. // Normal text.
Text(word) => self.append_consumed(Node::Text(word.to_owned())), Text(word) => self.append_consumed(Node::Text(word.to_owned()), token.span),
// The rest is handled elsewhere or should not happen, because `Tokens` does not // The rest is handled elsewhere or should not happen, because `Tokens` does not
// yield these in a body. // yield these in a body.
@ -95,22 +93,27 @@ impl<'s> Parser<'s> {
/// Parse a complete function from the current position. /// Parse a complete function from the current position.
fn parse_func(&mut self) -> ParseResult<()> { fn parse_func(&mut self) -> ParseResult<()> {
// This should only be called if a left bracket was seen. // This should only be called if a left bracket was seen.
assert!(self.tokens.next() == Some(Token::LeftBracket)); let token = self.tokens.next().expect("parse_func: expected token");
assert!(token.val == Token::LeftBracket);
let mut span = token.span;
let header = self.parse_func_header()?; let header = self.parse_func_header()?;
let body = self.parse_func_body(&header)?; let body = self.parse_func_body(&header)?;
// Finally this function is parsed to the end. span.end = self.tokens.string_index();
self.append(Node::Func(FuncCall { header, body }));
Ok(self.switch(ParserState::Body)) // Finally this function is parsed to the end.
self.append(Node::Func(FuncCall { header, body }), span);
Ok(())
} }
/// Parse a function header. /// Parse a function header.
fn parse_func_header(&mut self) -> ParseResult<FuncHeader> { fn parse_func_header(&mut self) -> ParseResult<FuncHeader> {
// The next token should be the name of the function.
self.skip_white(); self.skip_white();
let name = match self.tokens.next() {
let name = match self.tokens.next().map(|token| token.val) {
Some(Token::Text(word)) => { Some(Token::Text(word)) => {
if is_identifier(word) { if is_identifier(word) {
Ok(word.to_owned()) Ok(word.to_owned())
@ -130,7 +133,7 @@ impl<'s> Parser<'s> {
self.skip_white(); self.skip_white();
// Check for arguments // Check for arguments
match self.tokens.next() { match self.tokens.next().map(|token| token.val) {
Some(Token::RightBracket) => {} Some(Token::RightBracket) => {}
Some(Token::Colon) => { Some(Token::Colon) => {
let (args, kwargs) = self.parse_func_args()?; let (args, kwargs) = self.parse_func_args()?;
@ -157,7 +160,7 @@ impl<'s> Parser<'s> {
loop { loop {
self.skip_white(); self.skip_white();
match self.tokens.peek() { match self.tokens.peek().map(|token| token.val) {
Some(Token::Text(_)) | Some(Token::Quoted(_)) if !comma => { Some(Token::Text(_)) | Some(Token::Quoted(_)) if !comma => {
args.push(self.parse_expression()?); args.push(self.parse_expression()?);
comma = true; comma = true;
@ -182,7 +185,7 @@ impl<'s> Parser<'s> {
/// Parse an expression. /// Parse an expression.
fn parse_expression(&mut self) -> ParseResult<Expression> { fn parse_expression(&mut self) -> ParseResult<Expression> {
Ok(match self.tokens.next() { Ok(match self.tokens.next().map(|token| token.val) {
Some(Token::Quoted(text)) => Expression::Str(text.to_owned()), Some(Token::Quoted(text)) => Expression::Str(text.to_owned()),
Some(Token::Text(text)) => { Some(Token::Text(text)) => {
if let Ok(b) = text.parse::<bool>() { if let Ok(b) = text.parse::<bool>() {
@ -202,7 +205,7 @@ impl<'s> Parser<'s> {
/// Parse the body of a function. /// Parse the body of a function.
fn parse_func_body(&mut self, header: &FuncHeader) -> ParseResult<Box<dyn Function>> { fn parse_func_body(&mut self, header: &FuncHeader) -> ParseResult<Box<dyn Function>> {
// Whether the function has a body. // Whether the function has a body.
let has_body = self.tokens.peek() == Some(Token::LeftBracket); let has_body = self.tokens.peek().map(|token| token.val) == Some(Token::LeftBracket);
if has_body { if has_body {
self.advance(); self.advance();
} }
@ -230,7 +233,8 @@ impl<'s> Parser<'s> {
self.tokens.set_string_index(end); self.tokens.set_string_index(end);
// Now the body should be closed. // Now the body should be closed.
assert!(self.tokens.next() == Some(Token::RightBracket)); let token = self.tokens.next().expect("parse_func_body: expected token");
assert!(token.val == Token::RightBracket);
body body
} else { } else {
@ -240,40 +244,45 @@ impl<'s> Parser<'s> {
/// Parse whitespace (as long as there is any) and skip over comments. /// Parse whitespace (as long as there is any) and skip over comments.
fn parse_white(&mut self) -> ParseResult<()> { fn parse_white(&mut self) -> ParseResult<()> {
let mut state = NewlineState::Zero;
while let Some(token) = self.tokens.peek() { while let Some(token) = self.tokens.peek() {
match self.state { match token.val {
ParserState::FirstNewline => match token { Token::Space => {
Token::Newline => { self.advance();
self.append_consumed(Node::Newline); match state {
self.switch(ParserState::WroteNewline); NewlineState::Zero | NewlineState::TwoOrMore => {
self.append_space(token.span);
} }
Token::Space => self.append_space_consumed(), _ => {}
_ => {
self.append_space();
self.switch(ParserState::Body);
} }
}, }
ParserState::WroteNewline => match token {
Token::Newline | Token::Space => self.append_space_consumed(),
_ => self.switch(ParserState::Body),
},
ParserState::Body => match token {
// Whitespace
Token::Space => self.append_space_consumed(),
Token::Newline => { Token::Newline => {
self.advance(); self.advance();
self.switch(ParserState::FirstNewline); match state {
} NewlineState::Zero => state = NewlineState::One(token.span),
NewlineState::One(mut span) => {
// Comments span.expand(token.span);
Token::LineComment(_) | Token::BlockComment(_) => self.advance(), state = NewlineState::TwoOrMore;
Token::StarSlash => { self.append(Node::Newline, span);
return Err(ParseError::new("unexpected end of block comment"));
}
// Anything else skips out of the function.
_ => break,
}, },
NewlineState::TwoOrMore => self.append_space(token.span),
}
}
_ => {
if let NewlineState::One(span) = state {
self.append_space(span);
}
state = NewlineState::Zero;
match token.val {
Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
Token::StarSlash => err!("unexpected end of block comment"),
_ => break,
}
}
} }
} }
@ -283,10 +292,9 @@ impl<'s> Parser<'s> {
/// Skip over whitespace and comments. /// Skip over whitespace and comments.
fn skip_white(&mut self) { fn skip_white(&mut self) {
while let Some(token) = self.tokens.peek() { while let Some(token) = self.tokens.peek() {
match token { match token.val {
Token::Space | Token::Newline | Token::LineComment(_) | Token::BlockComment(_) => { Token::Space | Token::Newline |
self.advance() Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
}
_ => break, _ => break,
} }
} }
@ -297,33 +305,23 @@ impl<'s> Parser<'s> {
self.tokens.next(); self.tokens.next();
} }
/// Switch the state.
fn switch(&mut self, state: ParserState) {
self.state = state;
}
/// Append a node to the tree. /// Append a node to the tree.
fn append(&mut self, node: Node) { fn append(&mut self, node: Node, span: Span) {
self.tree.nodes.push(node); self.tree.nodes.push(Spanned::new(node, span));
} }
/// Append a space if there is not one already. /// Append a space, merging with a previous space if there is one.
fn append_space(&mut self) { fn append_space(&mut self, span: Span) {
if self.tree.nodes.last() != Some(&Node::Space) { match self.tree.nodes.last_mut() {
self.append(Node::Space); Some(ref mut node) if node.val == Node::Space => node.span.expand(span),
_ => self.append(Node::Space, span),
} }
} }
/// Advance and return the given node. /// Advance and return the given node.
fn append_consumed(&mut self, node: Node) { fn append_consumed(&mut self, node: Node, span: Span) {
self.advance(); self.advance();
self.append(node); self.append(node, span);
}
/// Advance and append a space if there is not one already.
fn append_space_consumed(&mut self) {
self.advance();
self.append_space();
} }
} }
@ -352,7 +350,7 @@ fn find_closing_bracket(src: &str) -> Option<usize> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct PeekableTokens<'s> { struct PeekableTokens<'s> {
tokens: Tokens<'s>, tokens: Tokens<'s>,
peeked: Option<Option<Token<'s>>>, peeked: Option<Option<Spanned<Token<'s>>>>,
} }
impl<'s> PeekableTokens<'s> { impl<'s> PeekableTokens<'s> {
@ -365,9 +363,9 @@ impl<'s> PeekableTokens<'s> {
} }
/// Peek at the next element. /// Peek at the next element.
fn peek(&mut self) -> Option<Token<'s>> { fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
let iter = &mut self.tokens; let iter = &mut self.tokens;
*self.peeked.get_or_insert_with(|| iter.next().map(|token| token.val)) *self.peeked.get_or_insert_with(|| iter.next())
} }
fn string_index(&mut self) -> usize { fn string_index(&mut self) -> usize {
@ -381,12 +379,12 @@ impl<'s> PeekableTokens<'s> {
} }
impl<'s> Iterator for PeekableTokens<'s> { impl<'s> Iterator for PeekableTokens<'s> {
type Item = Token<'s>; type Item = Spanned<Token<'s>>;
fn next(&mut self) -> Option<Token<'s>> { fn next(&mut self) -> Option<Self::Item> {
match self.peeked.take() { match self.peeked.take() {
Some(value) => value, Some(value) => value,
None => self.tokens.next().map(|token| token.val), None => self.tokens.next(),
} }
} }
} }
@ -442,7 +440,7 @@ mod tests {
use super::*; use super::*;
/// A testing function which just parses it's body into a syntax tree. /// A testing function which just parses it's body into a syntax tree.
#[derive(Debug, PartialEq)] #[derive(Debug)]
pub struct TreeFn(pub SyntaxTree); pub struct TreeFn(pub SyntaxTree);
function! { function! {
@ -452,8 +450,12 @@ mod tests {
layout(_, _) { Ok(commands![]) } layout(_, _) { Ok(commands![]) }
} }
impl PartialEq for TreeFn {
fn eq(&self, other: &TreeFn) -> bool { tree_equal(&self.0, &other.0) }
}
/// A testing function without a body. /// A testing function without a body.
#[derive(Debug, PartialEq)] #[derive(Debug)]
pub struct BodylessFn; pub struct BodylessFn;
function! { function! {
@ -462,6 +464,14 @@ mod tests {
parse(_args, body, _ctx) { parse!(forbidden: body); Ok(BodylessFn) } parse(_args, body, _ctx) { parse!(forbidden: body); Ok(BodylessFn) }
layout(_, _) { Ok(commands![]) } layout(_, _) { Ok(commands![]) }
} }
impl PartialEq for BodylessFn {
fn eq(&self, _: &BodylessFn) -> bool { true }
}
}
fn tree_equal(a: &SyntaxTree, b: &SyntaxTree) -> bool {
a.nodes.iter().zip(&b.nodes).all(|(x, y)| x.val == y.val)
} }
/// Test if the source code parses into the syntax tree. /// Test if the source code parses into the syntax tree.
@ -469,13 +479,13 @@ mod tests {
let ctx = ParseContext { let ctx = ParseContext {
scope: &Scope::new(), scope: &Scope::new(),
}; };
assert_eq!(parse(src, ctx).unwrap(), tree); assert!(tree_equal(&parse(src, ctx).unwrap(), &tree));
} }
/// Test with a scope containing function definitions. /// Test with a scope containing function definitions.
fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) { fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
let ctx = ParseContext { scope }; let ctx = ParseContext { scope };
assert_eq!(parse(src, ctx).unwrap(), tree); assert!(tree_equal(&parse(src, ctx).unwrap(), &tree));
} }
/// Test if the source parses into the error. /// Test if the source parses into the error.
@ -499,11 +509,15 @@ mod tests {
} }
/// Shortcut macro to create a syntax tree. Is `vec`-like and the elements /// Shortcut macro to create a syntax tree. Is `vec`-like and the elements
/// are the nodes. /// are the nodes without spans.
macro_rules! tree { macro_rules! tree {
($($x:expr),*) => ( ($($x:expr),*) => ({
SyntaxTree { nodes: vec![$($x),*] } #[allow(unused_mut)] let mut nodes = vec![];
); $(
nodes.push(Spanned::new($x, Span::new(0, 0)));
)*
SyntaxTree { nodes }
});
($($x:expr,)*) => (tree![$($x),*]) ($($x:expr,)*) => (tree![$($x),*])
} }
@ -545,7 +559,8 @@ mod tests {
test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]); test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]);
test("Hello\n\n World", tree! [ T("Hello"), N, S, T("World") ]); test("Hello\n\n World", tree! [ T("Hello"), N, S, T("World") ]);
test("Hello \n \n \n World", tree! [ T("Hello"), S, N, S, T("World") ]); test("Hello \n \n \n World", tree! [ T("Hello"), S, N, S, T("World") ]);
test("Hello\n \n\n World", tree! [ T("Hello"), S, N, S, T("World") ]); test("Hello\n \n\n World", tree! [ T("Hello"), N, S, T("World") ]);
test("Hello\n \nWorld", tree! [ T("Hello"), N, T("World") ]);
} }
/// Parse things dealing with functions. /// Parse things dealing with functions.
@ -686,6 +701,38 @@ mod tests {
]); ]);
} }
/// Tests whether spans get calculated correctly.
#[test]
#[rustfmt::skip]
fn parse_spans() {
let mut scope = Scope::new();
scope.add::<TreeFn>("hello");
let parse = |string| {
parse(string, ParseContext { scope: &scope }).unwrap().nodes
};
let tree = parse("hello world");
assert_eq!(tree[0].span.pair(), (0, 5));
assert_eq!(tree[2].span.pair(), (6, 11));
let tree = parse("p1\n \np2");
assert_eq!(tree[1].span.pair(), (2, 5));
let tree = parse("func [hello: pos, other][body _🌍_]");
assert_eq!(tree[0].span.pair(), (0, 4));
assert_eq!(tree[1].span.pair(), (4, 5));
assert_eq!(tree[2].span.pair(), (5, 37));
let func = if let Node::Func(f) = &tree[2].val { f } else { panic!() };
let body = &func.body.downcast::<TreeFn>().unwrap().0.nodes;
assert_eq!(body[0].span.pair(), (0, 4));
assert_eq!(body[1].span.pair(), (4, 5));
assert_eq!(body[2].span.pair(), (5, 6));
assert_eq!(body[3].span.pair(), (6, 10));
assert_eq!(body[4].span.pair(), (10, 11));
}
/// Tests whether errors get reported correctly. /// Tests whether errors get reported correctly.
#[test] #[test]
#[rustfmt::skip] #[rustfmt::skip]

View File

@ -361,7 +361,7 @@ mod tests {
/// Test if the tokens of the source code have the correct spans. /// Test if the tokens of the source code have the correct spans.
fn test_span(src: &str, spans: Vec<(usize, usize)>) { fn test_span(src: &str, spans: Vec<(usize, usize)>) {
assert_eq!(Tokens::new(src) assert_eq!(Tokens::new(src)
.map(|token| (token.span.start, token.span.end)) .map(|token| token.span.pair())
.collect::<Vec<_>>(), spans); .collect::<Vec<_>>(), spans);
} }