From 41bdafb5785dd85d20a3e79900b18e0010f6d71d Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 6 Nov 2021 12:12:02 +0100 Subject: [PATCH] Faster parser --- src/parse/mod.rs | 61 +-- src/parse/parser.rs | 888 ++++++++++++++++++++----------------------- src/parse/resolve.rs | 4 +- src/parse/tokens.rs | 14 +- src/syntax/mod.rs | 83 ++-- 5 files changed, 487 insertions(+), 563 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 90be73f9c..aa616fdf8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -13,13 +13,16 @@ pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; -use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; +use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); markup(&mut p); - p.finish() + match p.finish().into_iter().next() { + Some(Green::Node(node)) => node, + _ => unreachable!(), + } } /// Parse markup. @@ -36,7 +39,7 @@ fn markup_indented(p: &mut Parser, column: usize) { }); markup_while(p, false, &mut |p| match p.peek() { - Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, + Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column, _ => true, }) } @@ -114,7 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { let stmt = matches!(token, NodeKind::Let | NodeKind::Import); let group = if stmt { Group::Stmt } else { Group::Expr }; - p.start_group(group, TokenMode::Code); + p.start_group(group); let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); @@ -177,8 +180,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match p.eat_map(|x| UnOp::from_token(&x)) { + match p.peek().and_then(UnOp::from_token) { Some(op) => { + p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); @@ -201,7 +205,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { break; } - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } @@ -242,7 +246,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult { p.eat(); // Arrow means this is a closure's lone parameter. - if !atomic && p.peek() == Some(&NodeKind::Arrow) { + if !atomic && p.at(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); marker.perform(p, NodeKind::Closure, expr) @@ -315,7 +319,7 @@ fn literal(p: &mut Parser) -> bool { fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); @@ -327,14 +331,14 @@ fn parenthesized(p: &mut Parser) -> ParseResult { } // Arrow means this is a closure's parameter list. - if p.peek() == Some(&NodeKind::Arrow) { + if p.at(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } - // Find out which kind of collection this is. + // Transform into the identified collection. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), @@ -402,7 +406,8 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) { (kind, items) } -/// Parse an expression or a named pair. Returns if this is a named pair. +/// Parse an expression or a named pair, returning whether it's a spread or a +/// named pair. fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { @@ -412,25 +417,24 @@ fn item(p: &mut Parser) -> ParseResult { expr(p)?; - if p.peek() == Some(&NodeKind::Colon) { + if p.at(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { - marker.end( - p, - NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), - ); + let error = + NodeKind::Error(ErrorPosition::Full, "expected identifier".into()); + marker.end(p, error); p.eat(); - expr(p).ok(); Err(()) } })?; + Ok(NodeKind::Named) } else { - Ok(p.last_child().unwrap().kind().clone()) + Ok(NodeKind::None) } } @@ -488,7 +492,7 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { // Parse a template block: `[...]`. fn template(p: &mut Parser) { p.perform(NodeKind::Template, |p| { - p.start_group(Group::Bracket, TokenMode::Markup); + p.start_group(Group::Bracket); markup(p); p.end_group(); }); @@ -497,9 +501,9 @@ fn template(p: &mut Parser) { /// Parse a code block: `{...}`. fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { - p.start_group(Group::Brace, TokenMode::Code); + p.start_group(Group::Brace); while !p.eof() { - p.start_group(Group::Stmt, TokenMode::Code); + p.start_group(Group::Stmt); if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } @@ -515,7 +519,7 @@ fn block(p: &mut Parser) { /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { - Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { + Some(NodeKind::LeftParen | NodeKind::LeftBracket) => { args(p, true); Ok(()) } @@ -530,7 +534,7 @@ fn call(p: &mut Parser, callee: &Marker) -> ParseResult { fn args(p: &mut Parser, allow_template: bool) { p.perform(NodeKind::CallArgs, |p| { if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); collection(p); p.end_group(); } @@ -546,7 +550,7 @@ fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); - if p.peek() == Some(&NodeKind::LeftParen) { + if p.at(&NodeKind::LeftParen) { args(p, false); Ok(()) } else { @@ -564,14 +568,14 @@ fn let_expr(p: &mut Parser) -> ParseResult { let marker = p.marker(); ident(p)?; - if p.peek() == Some(&NodeKind::With) { + if p.at(&NodeKind::With) { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { p.perform(NodeKind::ClosureParams, |p| { - p.start_group(Group::Paren, TokenMode::Code); + p.start_group(Group::Paren); let marker = p.marker(); collection(p); params(p, &marker, true); @@ -605,7 +609,7 @@ fn if_expr(p: &mut Parser) -> ParseResult { body(p)?; if p.eat_if(&NodeKind::Else) { - if p.peek() == Some(&NodeKind::If) { + if p.at(&NodeKind::If) { if_expr(p)?; } else { body(p)?; @@ -657,7 +661,7 @@ fn import_expr(p: &mut Parser) -> ParseResult { if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.perform(NodeKind::ImportItems, |p| { - p.start_group(Group::Imports, TokenMode::Code); + p.start_group(Group::Imports); let marker = p.marker(); let items = collection(p).1; if items == 0 { @@ -712,6 +716,5 @@ fn body(p: &mut Parser) -> ParseResult { return Err(()); } } - Ok(()) } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 4f181821a..5d26ff636 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,5 +1,4 @@ -use std::ops::Range; -use std::rc::Rc; +use std::mem; use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; @@ -11,27 +10,425 @@ pub(crate) type ParseResult = Result; /// A convenient token-based parser. pub struct Parser<'s> { - /// The parsed file. - src: &'s str, /// An iterator over the source tokens. tokens: Tokens<'s>, + /// Whether we are at the end of the file or of a group. + eof: bool, + /// The current token. + current: Option, + /// The end byte index of the last (non-whitespace if in code mode) token. + prev_end: usize, + /// The start byte index of the peeked token. + current_start: usize, /// The stack of open groups. groups: Vec, - /// The next token. - next: Option, - /// The peeked token. - /// (Same as `next` except if we are at the end of group, then `None`). - peeked: Option, - /// The end index of the last (non-whitespace if in code mode) token. - prev_end: usize, - /// The start index of the peeked token. - next_start: usize, - /// A stack of outer children vectors. - stack: Vec>, /// The children of the currently built node. children: Vec, } +impl<'s> Parser<'s> { + /// Create a new parser for the source string. + pub fn new(src: &'s str) -> Self { + let mut tokens = Tokens::new(src, TokenMode::Markup); + let current = tokens.next(); + Self { + tokens, + eof: current.is_none(), + current, + prev_end: 0, + current_start: 0, + groups: vec![], + children: vec![], + } + } + + /// End the parsing process and return the last child. + pub fn finish(self) -> Vec { + self.children + } + + /// Create a new marker. + pub fn marker(&mut self) -> Marker { + Marker(self.children.len()) + } + + /// Perform a subparse that wraps its result in a node with the given kind. + pub fn perform(&mut self, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Self) -> T, + { + let prev = mem::take(&mut self.children); + let output = f(self); + let mut children = mem::replace(&mut self.children, prev); + + // Trailing trivia should not be wrapped into the new node. + let mut remains = vec![]; + if self.tokens.mode() == TokenMode::Code { + let len = children.len(); + for n in (0 .. len).rev() { + if !self.is_trivia(&children[n].kind()) { + break; + } + + remains.push(children.pop().unwrap()); + } + remains.reverse(); + } + + self.children.push(GreenNode::with_children(kind, children).into()); + self.children.extend(remains); + + output + } + + /// Whether the end of the source string or group is reached. + pub fn eof(&self) -> bool { + self.eof + } + + /// Consume the current token and also trailing trivia if in code mode. + pub fn eat(&mut self) { + self.prev_end = self.tokens.index(); + self.bump(); + + if self.tokens.mode() == TokenMode::Code { + // Skip whitespace and comments. + while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) { + self.bump(); + } + } + + self.repeek(); + } + + /// Eat if the current token it is the given one. + pub fn eat_if(&mut self, t: &NodeKind) -> bool { + let at = self.at(t); + if at { + self.eat(); + } + at + } + + /// Eat if the current token is the given one and produce an error if not. + pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { + let eaten = self.eat_if(t); + if !eaten { + self.expected_at(t.as_str()); + } + if eaten { Ok(()) } else { Err(()) } + } + + /// Eat, debug-asserting that the token is the given one. + pub fn eat_assert(&mut self, t: &NodeKind) { + debug_assert_eq!(self.peek(), Some(t)); + self.eat(); + } + + /// Eat tokens while the condition is true. + pub fn eat_while(&mut self, mut f: F) + where + F: FnMut(&NodeKind) -> bool, + { + while self.peek().map_or(false, |t| f(t)) { + self.eat(); + } + } + + /// Eat the current token, but change its type. + pub fn convert(&mut self, kind: NodeKind) { + let idx = self.children.len(); + self.eat(); + if let Some(child) = self.children.get_mut(idx) { + child.set_kind(kind); + } + } + + /// Whether the current token is of the given type. + pub fn at(&self, kind: &NodeKind) -> bool { + self.peek() == Some(kind) + } + + /// Peek at the current token without consuming it. + pub fn peek(&self) -> Option<&NodeKind> { + if self.eof { None } else { self.current.as_ref() } + } + + /// Peek at the current token, if it follows immediately after the last one + /// without any trivia in between. + pub fn peek_direct(&self) -> Option<&NodeKind> { + if self.prev_end() == self.current_start() { + self.peek() + } else { + None + } + } + + /// Peek at the source of the current token. + pub fn peek_src(&self) -> &'s str { + self.tokens.scanner().get(self.current_start() .. self.current_end()) + } + + /// The byte index at which the last token ended. + /// + /// Refers to the end of the last non-trivia token in code mode. + pub fn prev_end(&self) -> usize { + self.prev_end + } + + /// The byte index at which the current token starts. + pub fn current_start(&self) -> usize { + self.current_start + } + + /// The byte index at which the current token ends. + pub fn current_end(&self) -> usize { + self.tokens.index() + } + + /// Determine the column index for the given byte index. + pub fn column(&self, index: usize) -> usize { + self.tokens.scanner().column(index) + } + + /// Continue parsing in a group. + /// + /// When the end delimiter of the group is reached, all subsequent calls to + /// `peek()` return `None`. Parsing can only continue with a matching call + /// to `end_group`. + /// + /// This panics if the current token does not start the given group. + pub fn start_group(&mut self, kind: Group) { + self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); + self.tokens.set_mode(match kind { + Group::Bracket => TokenMode::Markup, + _ => TokenMode::Code, + }); + + self.repeek(); + match kind { + Group::Paren => self.eat_assert(&NodeKind::LeftParen), + Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), + Group::Brace => self.eat_assert(&NodeKind::LeftBrace), + Group::Stmt => {} + Group::Expr => {} + Group::Imports => {} + } + } + + /// End the parsing of a group. + /// + /// This panics if no group was started. + pub fn end_group(&mut self) { + let group_mode = self.tokens.mode(); + let group = self.groups.pop().expect("no started group"); + self.tokens.set_mode(group.prev_mode); + self.repeek(); + + let mut rescan = self.tokens.mode() != group_mode; + + // Eat the end delimiter if there is one. + if let Some((end, required)) = match group.kind { + Group::Paren => Some((NodeKind::RightParen, true)), + Group::Bracket => Some((NodeKind::RightBracket, true)), + Group::Brace => Some((NodeKind::RightBrace, true)), + Group::Stmt => Some((NodeKind::Semicolon, false)), + Group::Expr => None, + Group::Imports => None, + } { + if self.current.as_ref() == Some(&end) { + // Bump the delimeter and return. No need to rescan in this case. + self.eat(); + rescan = false; + } else if required { + self.push_error(format!("expected {}", end)); + } + } + + // Rescan the peeked token if the mode changed. + if rescan { + if group_mode == TokenMode::Code { + let len = self.children.len(); + for n in (0 .. len).rev() { + if !self.is_trivia(self.children[n].kind()) { + break; + } + + self.children.pop(); + } + } + + self.tokens.jump(self.prev_end()); + self.prev_end = self.tokens.index(); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); + self.repeek(); + } + } + + /// Low-level bump that consumes exactly one token without special trivia + /// handling. + fn bump(&mut self) { + let kind = self.current.take().unwrap(); + let len = self.tokens.index() - self.current_start; + self.children.push(GreenData::new(kind, len).into()); + self.current_start = self.tokens.index(); + self.current = self.tokens.next(); + } + + /// Take another look at the current token to recheck whether it ends a + /// group. + fn repeek(&mut self) { + self.eof = match &self.current { + Some(NodeKind::RightParen) => self.inside(Group::Paren), + Some(NodeKind::RightBracket) => self.inside(Group::Bracket), + Some(NodeKind::RightBrace) => self.inside(Group::Brace), + Some(NodeKind::Semicolon) => self.inside(Group::Stmt), + Some(NodeKind::From) => self.inside(Group::Imports), + Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(), + Some(_) => false, + None => true, + }; + } + + /// Returns whether the given type can be skipped over. + fn is_trivia(&self, token: &NodeKind) -> bool { + Self::is_trivia_ext(token, self.stop_at_newline()) + } + + /// Returns whether the given type can be skipped over given the current + /// newline mode. + fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool { + match token { + NodeKind::Space(n) => *n == 0 || !stop_at_newline, + NodeKind::LineComment => true, + NodeKind::BlockComment => true, + _ => false, + } + } + + /// Whether the active group must end at a newline. + fn stop_at_newline(&self) -> bool { + matches!( + self.groups.last().map(|group| group.kind), + Some(Group::Stmt | Group::Expr | Group::Imports) + ) + } + + /// Whether we are inside the given group. + fn inside(&self, kind: Group) -> bool { + self.groups.iter().any(|g| g.kind == kind) + } +} + +/// Error handling. +impl Parser<'_> { + /// Push an error into the children list. + pub fn push_error(&mut self, msg: impl Into) { + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.children.push(GreenData::new(error, 0).into()); + } + + /// Eat the current token and add an error that it is unexpected. + pub fn unexpected(&mut self) { + match self.peek() { + Some(found) => { + let msg = format!("unexpected {}", found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.push_error("unexpected end of file"), + } + } + + /// Eat the current token and add an error that it is not the expected `thing`. + pub fn expected(&mut self, thing: &str) { + match self.peek() { + Some(found) => { + let msg = format!("expected {}, found {}", thing, found); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + self.perform(error, Self::eat); + } + None => self.expected_at(thing), + } + } + + /// Add an error that the `thing` was expected at the end of the last + /// non-trivia token. + pub fn expected_at(&mut self, thing: &str) { + let mut found = self.children.len(); + for (i, node) in self.children.iter().enumerate().rev() { + if !self.is_trivia(node.kind()) { + break; + } + found = i; + } + + Marker(found).expected_at(self, thing); + } +} + +/// A marker that indicates where a node may start. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct Marker(usize); + +impl Marker { + /// Perform a subparse that wraps all children after the marker in a node + /// with the given kind. + pub fn perform(self, p: &mut Parser, kind: NodeKind, f: F) -> T + where + F: FnOnce(&mut Parser) -> T, + { + let success = f(p); + self.end(p, kind); + success + } + + /// Wrap all children after the marker in a node with the given `kind`. + pub fn end(self, p: &mut Parser, kind: NodeKind) { + let end = (self.0 .. p.children.len()) + .rev() + .find(|&i| !p.is_trivia(p.children[i].kind())) + .unwrap_or(self.0) + + 1; + + let children: Vec<_> = p.children.drain(self.0 .. end).collect(); + p.children + .insert(self.0, GreenNode::with_children(kind, children).into()); + } + + /// Wrap all children that do not fulfill the predicate in error nodes. + pub fn filter_children(self, p: &mut Parser, f: F) + where + F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, + { + for child in &mut p.children[self.0 ..] { + if (p.tokens.mode() == TokenMode::Markup + || !Parser::is_trivia_ext(child.kind(), false)) + && !child.kind().is_error() + { + if let Err((pos, msg)) = f(child) { + let error = NodeKind::Error(pos, msg); + let inner = mem::take(child); + *child = GreenNode::with_child(error, inner).into(); + } + } + } + } + + /// Insert an error message that `what` was expected at the marker position. + pub fn expected_at(self, p: &mut Parser, what: &str) { + let msg = format!("expected {}", what); + let error = NodeKind::Error(ErrorPosition::Full, msg.into()); + p.children.insert(self.0, GreenData::new(error, 0).into()); + } + + /// Return a reference to the child directly after the marker. + pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> { + p.children.get(self.0) + } +} + /// A logical group of tokens, e.g. `[...]`. struct GroupEntry { /// The kind of group this is. This decides which tokens will end the group. @@ -46,12 +443,12 @@ struct GroupEntry { /// A group, confined by optional start and end delimiters. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Group { - /// A parenthesized group: `(...)`. - Paren, /// A bracketed group: `[...]`. Bracket, /// A curly-braced group: `{...}`. Brace, + /// A parenthesized group: `(...)`. + Paren, /// A group ended by a semicolon or a line break: `;`, `\n`. Stmt, /// A group for a single expression, ended by a line break. @@ -59,460 +456,3 @@ pub enum Group { /// A group for import items, ended by a semicolon, line break or `from`. Imports, } - -impl<'s> Parser<'s> { - /// Create a new parser for the source string. - pub fn new(src: &'s str) -> Self { - let mut tokens = Tokens::new(src, TokenMode::Markup); - let next = tokens.next(); - Self { - src, - tokens, - groups: vec![], - next: next.clone(), - peeked: next, - prev_end: 0, - next_start: 0, - stack: vec![], - children: vec![], - } - } - - /// Perform a subparse that wraps its result in a node with the given kind. - pub fn perform(&mut self, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Self) -> T, - { - let prev = std::mem::take(&mut self.children); - let output = f(self); - let mut children = std::mem::replace(&mut self.children, prev); - - // Trailing trivia should not be wrapped into the new node. - let mut remains = vec![]; - if self.tokens.mode() == TokenMode::Code { - let len = children.len(); - for n in (0 .. len).rev() { - if !self.skip_type(&children[n].kind()) { - break; - } - - remains.push(children.pop().unwrap()); - } - remains.reverse(); - } - - self.children.push(GreenNode::with_children(kind, children).into()); - self.children.extend(remains); - - output - } - - /// Eat and wrap the next token. - pub fn convert(&mut self, kind: NodeKind) { - self.eat(); - self.children.last_mut().unwrap().set_kind(kind); - } - - /// End the current node and undo its existence, inling all accumulated - /// children into its parent. - pub fn lift(&mut self) { - let outer = self.stack.pop().unwrap(); - let children = std::mem::replace(&mut self.children, outer); - self.children.extend(children); - } - - /// Add an error to the current children list. - fn push_error(&mut self, msg: impl Into) { - self.children.push( - GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0) - .into(), - ); - } - - /// End the parsing process and return the last child. - pub fn finish(&mut self) -> Rc { - match self.children.pop().unwrap() { - Green::Node(n) => n, - _ => panic!(), - } - } - - /// Whether the end of the source string or group is reached. - pub fn eof(&self) -> bool { - self.peek().is_none() - } - - /// Consume the next token if it is the given one. - pub fn eat_if(&mut self, t: &NodeKind) -> bool { - if self.peek() == Some(t) { - self.eat(); - true - } else { - false - } - } - - /// Consume the next token if the closure maps it a to `Some`-variant. - pub fn eat_map(&mut self, f: F) -> Option - where - F: FnOnce(&NodeKind) -> Option, - { - let token = self.peek()?; - let mapped = f(token); - if mapped.is_some() { - self.eat(); - } - mapped - } - - /// Consume the next token if it is the given one and produce an error if - /// not. - pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult { - let eaten = self.eat_if(t); - if !eaten { - self.expected_at(t.as_str()); - } - if eaten { Ok(()) } else { Err(()) } - } - - /// Consume the next token, debug-asserting that it is one of the given ones. - pub fn eat_assert(&mut self, t: &NodeKind) { - let next = self.peek(); - debug_assert_eq!(next, Some(t)); - self.eat(); - } - - /// Consume tokens while the condition is true. - pub fn eat_while(&mut self, mut f: F) - where - F: FnMut(&NodeKind) -> bool, - { - while self.peek().map_or(false, |t| f(t)) { - self.eat(); - } - } - - /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option<&NodeKind> { - self.peeked.as_ref() - } - - /// Peek at the next token if it follows immediately after the last one - /// without any whitespace in between. - pub fn peek_direct(&self) -> Option<&NodeKind> { - if self.next_start() == self.prev_end() { - self.peeked.as_ref() - } else { - None - } - } - - /// Peek at the source of the next token. - pub fn peek_src(&self) -> &'s str { - self.get(self.next_start() .. self.next_end()) - } - - /// The byte index at which the last token ended. - /// - /// Refers to the end of the last _non-whitespace_ token in code mode. - pub fn prev_end(&self) -> usize { - self.prev_end - } - - /// The byte index at which the next token starts. - pub fn next_start(&self) -> usize { - self.next_start - } - - /// The byte index at which the next token will end. - /// - /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns - /// `None`. - pub fn next_end(&self) -> usize { - self.tokens.index() - } - - /// Determine the column index for the given byte index. - pub fn column(&self, index: usize) -> usize { - self.tokens.column(index) - } - - /// Slice out part of the source string. - pub fn get(&self, range: Range) -> &'s str { - self.src.get(range).unwrap() - } - - /// Continue parsing in a group. - /// - /// When the end delimiter of the group is reached, all subsequent calls to - /// `eat()` and `peek()` return `None`. Parsing can only continue with - /// a matching call to `end_group`. - /// - /// This panics if the next token does not start the given group. - pub fn start_group(&mut self, kind: Group, mode: TokenMode) { - self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() }); - self.tokens.set_mode(mode); - self.repeek(); - - match kind { - Group::Paren => self.eat_assert(&NodeKind::LeftParen), - Group::Bracket => self.eat_assert(&NodeKind::LeftBracket), - Group::Brace => self.eat_assert(&NodeKind::LeftBrace), - Group::Stmt => {} - Group::Expr => {} - Group::Imports => {} - } - } - - /// End the parsing of a group. - /// - /// This panics if no group was started. - pub fn end_group(&mut self) { - let prev_mode = self.tokens.mode(); - let group = self.groups.pop().expect("no started group"); - self.tokens.set_mode(group.prev_mode); - self.repeek(); - - let mut rescan = self.tokens.mode() != prev_mode; - - // Eat the end delimiter if there is one. - if let Some((end, required)) = match group.kind { - Group::Paren => Some((NodeKind::RightParen, true)), - Group::Bracket => Some((NodeKind::RightBracket, true)), - Group::Brace => Some((NodeKind::RightBrace, true)), - Group::Stmt => Some((NodeKind::Semicolon, false)), - Group::Expr => None, - Group::Imports => None, - } { - if self.next == Some(end.clone()) { - // Bump the delimeter and return. No need to rescan in this case. - self.eat(); - rescan = false; - } else if required { - self.push_error(format!("expected {}", end)); - } - } - - // Rescan the peeked token if the mode changed. - if rescan { - if prev_mode == TokenMode::Code { - let len = self.children.len(); - for n in (0 .. len).rev() { - if !self.skip_type(self.children[n].kind()) { - break; - } - - self.children.pop(); - } - } - - self.tokens.jump(self.prev_end()); - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - self.repeek(); - } - } - - /// Add an error that `what` was expected at the given span. - pub fn expected_at(&mut self, what: &str) { - let mut found = self.children.len(); - for (i, node) in self.children.iter().enumerate().rev() { - if !self.skip_type(node.kind()) { - break; - } - found = i; - } - - Marker(found).expected_at(self, what); - } - - /// Eat the next token and add an error that it is not the expected `thing`. - pub fn expected(&mut self, what: &str) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("expected {}, found {}", what, found).into(), - ), - Self::eat, - ); - } - None => self.expected_at(what), - } - } - - /// Eat the next token and add an error that it is unexpected. - pub fn unexpected(&mut self) { - match self.peek().cloned() { - Some(found) => { - self.perform( - NodeKind::Error( - ErrorPosition::Full, - format!("unexpected {}", found).into(), - ), - Self::eat, - ); - } - None => self.push_error("unexpected end of file"), - } - } - - /// Returns whether the given type can be skipped over given the current - /// newline mode. - pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool { - match token { - NodeKind::Space(n) => n < &1 || !stop_at_newline, - NodeKind::LineComment => true, - NodeKind::BlockComment => true, - _ => false, - } - } - - /// Returns whether the given type can be skipped over. - fn skip_type(&self, token: &NodeKind) -> bool { - Self::skip_type_ext(token, self.stop_at_newline()) - } - - /// Consume the next token. - pub fn eat(&mut self) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.prev_end = self.tokens.index().into(); - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - - if self.tokens.mode() == TokenMode::Code { - // Skip whitespace and comments. - while self.next.as_ref().map_or(false, |x| self.skip_type(x)) { - self.children.push( - GreenData::new( - self.next.clone().unwrap(), - self.tokens.index() - self.next_start, - ) - .into(), - ); - - self.next_start = self.tokens.index().into(); - self.next = self.tokens.next(); - } - } - - self.repeek(); - } - - /// Take another look at the next token to recheck whether it ends a group. - fn repeek(&mut self) { - self.peeked = self.next.clone(); - let token = match self.next.as_ref() { - Some(token) => token, - None => return, - }; - - if match token { - NodeKind::RightParen => self.inside(Group::Paren), - NodeKind::RightBracket => self.inside(Group::Bracket), - NodeKind::RightBrace => self.inside(Group::Brace), - NodeKind::Semicolon => self.inside(Group::Stmt), - NodeKind::From => self.inside(Group::Imports), - NodeKind::Space(n) => n > &0 && self.stop_at_newline(), - _ => false, - } { - self.peeked = None; - } - } - - /// Whether the active group ends at a newline. - fn stop_at_newline(&self) -> bool { - matches!( - self.groups.last().map(|group| group.kind), - Some(Group::Stmt | Group::Expr | Group::Imports) - ) - } - - /// Whether we are inside the given group. - fn inside(&self, kind: Group) -> bool { - self.groups.iter().any(|g| g.kind == kind) - } - - /// Returns the last child of the current stack frame. - pub fn last_child(&self) -> Option<&Green> { - self.children.last() - } - - /// Create a new marker. - pub fn marker(&mut self) -> Marker { - Marker(self.children.len()) - } -} - -/// A marker that indicates where a child may start. -pub struct Marker(usize); - -impl Marker { - /// Wraps all children in front of the marker. - pub fn end(&self, p: &mut Parser, kind: NodeKind) { - let stop_nl = p.stop_at_newline(); - let end = (self.0 .. p.children.len()) - .rev() - .find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl)) - .unwrap_or(self.0) - + 1; - - let children: Vec<_> = p.children.drain(self.0 .. end).collect(); - p.children - .insert(self.0, GreenNode::with_children(kind, children).into()); - } - - /// Wrap all children that do not fulfill the predicate in error nodes. - pub fn filter_children(&self, p: &mut Parser, f: F) - where - F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>, - { - for child in &mut p.children[self.0 ..] { - if !((p.tokens.mode() != TokenMode::Code - || Parser::skip_type_ext(child.kind(), false)) - || child.kind().is_error()) - { - if let Err((pos, msg)) = f(child) { - let inner = std::mem::take(child); - *child = - GreenNode::with_child(NodeKind::Error(pos, msg), inner).into(); - } - } - } - } - - /// Insert an error message that `what` was expected at the marker position. - pub fn expected_at(&self, p: &mut Parser, what: &str) { - p.children.insert( - self.0, - GreenData::new( - NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()), - 0, - ) - .into(), - ); - } - - /// Return a reference to the child after the marker. - pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> { - p.children.get(self.0) - } - - pub fn perform(&self, p: &mut Parser, kind: NodeKind, f: F) -> T - where - F: FnOnce(&mut Parser) -> T, - { - let success = f(p); - self.end(p, kind); - success - } -} diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 3fab98a4b..b330dbd6a 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -172,8 +172,8 @@ mod tests { test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); - test(" hi", "", " hi"); - test("`", "", "`"); + test(" hi", "", " hi"); + test("`", "", "`"); } #[test] diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index aa28e1f50..494a9f0b9 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -57,12 +57,6 @@ impl<'s> Tokens<'s> { self.s.jump(index); } - /// The column of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.s.column(index) - } - /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -314,7 +308,7 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.column(self.s.index() - 1); + let column = self.s.column(self.s.index() - 1); let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { @@ -342,10 +336,8 @@ impl<'s> Tokens<'s> { } } - let terminated = found == backticks; - let end = self.s.index() - if terminated { found as usize } else { 0 }; - - if terminated { + if found == backticks { + let end = self.s.index() - found as usize; NodeKind::Raw(Rc::new(resolve_raw( column, backticks, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 363cbe6e9..022b51de0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -42,11 +42,10 @@ impl Green { /// Set the type of the node. pub fn set_kind(&mut self, kind: NodeKind) { - let data = match self { - Self::Node(node) => &mut Rc::make_mut(node).data, - Self::Token(data) => data, - }; - data.set_kind(kind); + match self { + Self::Node(node) => Rc::make_mut(node).data.set_kind(kind), + Self::Token(data) => data.set_kind(kind), + } } /// The length of the node. @@ -56,7 +55,10 @@ impl Green { /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { - self.data().erroneous() + match self { + Self::Node(node) => node.erroneous, + Self::Token(data) => data.kind.is_error(), + } } /// The node's children. @@ -94,26 +96,32 @@ pub struct GreenNode { data: GreenData, /// This node's children, losslessly make up this node. children: Vec, + /// Whether this node or any of its children are erroneous. + erroneous: bool, } impl GreenNode { - /// Creates a new node with the given kind and children. - pub fn with_children(kind: NodeKind, children: Vec) -> Self { - let mut data = GreenData::new(kind, 0); - let len = children - .iter() - .inspect(|c| data.erroneous |= c.erroneous()) - .map(Green::len) - .sum(); - data.len = len; - Self { data, children } - } - /// Creates a new node with the given kind and a single child. pub fn with_child(kind: NodeKind, child: impl Into) -> Self { Self::with_children(kind, vec![child.into()]) } + /// Creates a new node with the given kind and children. + pub fn with_children(kind: NodeKind, children: Vec) -> Self { + let mut erroneous = kind.is_error(); + let len = children + .iter() + .inspect(|c| erroneous |= c.erroneous()) + .map(Green::len) + .sum(); + + Self { + data: GreenData::new(kind, len), + children, + erroneous, + } + } + /// The node's children. pub fn children(&self) -> &[Green] { &self.children @@ -140,14 +148,12 @@ pub struct GreenData { kind: NodeKind, /// The byte length of the node in the source. len: usize, - /// Whether this node or any of its children contain an error. - erroneous: bool, } impl GreenData { /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { - Self { len, erroneous: kind.is_error(), kind } + Self { len, kind } } /// The type of the node. @@ -164,11 +170,6 @@ impl GreenData { pub fn len(&self) -> usize { self.len } - - /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { - self.erroneous - } } impl From for Green { @@ -219,7 +220,7 @@ impl<'a> RedRef<'a> { /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { - if !self.green.erroneous() { + if !self.erroneous() { return vec![]; } @@ -235,7 +236,7 @@ impl<'a> RedRef<'a> { } _ => self .children() - .filter(|red| red.green.erroneous()) + .filter(|red| red.erroneous()) .flat_map(|red| red.errors()) .collect(), } @@ -256,11 +257,11 @@ impl<'a> RedRef<'a> { Green::Token(_) => &[], }; - let mut offset = self.offset; + let mut cursor = self.offset; children.iter().map(move |green| { - let child_offset = offset; - offset += green.len(); - RedRef { id: self.id, offset: child_offset, green } + let offset = cursor; + cursor += green.len(); + RedRef { id: self.id, offset, green } }) } @@ -623,29 +624,17 @@ pub enum ErrorPosition { impl NodeKind { /// Whether this is some kind of parenthesis. pub fn is_paren(&self) -> bool { - match self { - Self::LeftParen => true, - Self::RightParen => true, - _ => false, - } + matches!(self, Self::LeftParen | Self::RightParen) } /// Whether this is some kind of bracket. pub fn is_bracket(&self) -> bool { - match self { - Self::LeftBracket => true, - Self::RightBracket => true, - _ => false, - } + matches!(self, Self::LeftBracket | Self::RightBracket) } /// Whether this is some kind of brace. pub fn is_brace(&self) -> bool { - match self { - Self::LeftBrace => true, - Self::RightBrace => true, - _ => false, - } + matches!(self, Self::LeftBrace | Self::RightBrace) } /// Whether this is some kind of error.