mirror of
https://github.com/typst/typst
synced 2025-06-30 01:02:52 +08:00
Faster parser
This commit is contained in:
parent
515fe89c5e
commit
41bdafb578
@ -13,13 +13,16 @@ pub use tokens::*;
|
|||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use crate::syntax::ast::{Associativity, BinOp, UnOp};
|
use crate::syntax::ast::{Associativity, BinOp, UnOp};
|
||||||
use crate::syntax::{ErrorPosition, GreenNode, NodeKind};
|
use crate::syntax::{ErrorPosition, Green, GreenNode, NodeKind};
|
||||||
|
|
||||||
/// Parse a source file.
|
/// Parse a source file.
|
||||||
pub fn parse(source: &str) -> Rc<GreenNode> {
|
pub fn parse(source: &str) -> Rc<GreenNode> {
|
||||||
let mut p = Parser::new(source);
|
let mut p = Parser::new(source);
|
||||||
markup(&mut p);
|
markup(&mut p);
|
||||||
p.finish()
|
match p.finish().into_iter().next() {
|
||||||
|
Some(Green::Node(node)) => node,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse markup.
|
/// Parse markup.
|
||||||
@ -36,7 +39,7 @@ fn markup_indented(p: &mut Parser, column: usize) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
markup_while(p, false, &mut |p| match p.peek() {
|
markup_while(p, false, &mut |p| match p.peek() {
|
||||||
Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column,
|
Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.current_end()) >= column,
|
||||||
_ => true,
|
_ => true,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -114,7 +117,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
|||||||
let stmt = matches!(token, NodeKind::Let | NodeKind::Import);
|
let stmt = matches!(token, NodeKind::Let | NodeKind::Import);
|
||||||
let group = if stmt { Group::Stmt } else { Group::Expr };
|
let group = if stmt { Group::Stmt } else { Group::Expr };
|
||||||
|
|
||||||
p.start_group(group, TokenMode::Code);
|
p.start_group(group);
|
||||||
let res = expr_prec(p, true, 0);
|
let res = expr_prec(p, true, 0);
|
||||||
if stmt && res.is_ok() && !p.eof() {
|
if stmt && res.is_ok() && !p.eof() {
|
||||||
p.expected_at("semicolon or line break");
|
p.expected_at("semicolon or line break");
|
||||||
@ -177,8 +180,9 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
|
|||||||
let marker = p.marker();
|
let marker = p.marker();
|
||||||
|
|
||||||
// Start the unary expression.
|
// Start the unary expression.
|
||||||
match p.eat_map(|x| UnOp::from_token(&x)) {
|
match p.peek().and_then(UnOp::from_token) {
|
||||||
Some(op) => {
|
Some(op) => {
|
||||||
|
p.eat();
|
||||||
let prec = op.precedence();
|
let prec = op.precedence();
|
||||||
expr_prec(p, atomic, prec)?;
|
expr_prec(p, atomic, prec)?;
|
||||||
marker.end(p, NodeKind::Unary);
|
marker.end(p, NodeKind::Unary);
|
||||||
@ -201,7 +205,7 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.peek() == Some(&NodeKind::With) {
|
if p.at(&NodeKind::With) {
|
||||||
with_expr(p, &marker)?;
|
with_expr(p, &marker)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -242,7 +246,7 @@ fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
|
|||||||
p.eat();
|
p.eat();
|
||||||
|
|
||||||
// Arrow means this is a closure's lone parameter.
|
// Arrow means this is a closure's lone parameter.
|
||||||
if !atomic && p.peek() == Some(&NodeKind::Arrow) {
|
if !atomic && p.at(&NodeKind::Arrow) {
|
||||||
marker.end(p, NodeKind::ClosureParams);
|
marker.end(p, NodeKind::ClosureParams);
|
||||||
p.eat();
|
p.eat();
|
||||||
marker.perform(p, NodeKind::Closure, expr)
|
marker.perform(p, NodeKind::Closure, expr)
|
||||||
@ -315,7 +319,7 @@ fn literal(p: &mut Parser) -> bool {
|
|||||||
fn parenthesized(p: &mut Parser) -> ParseResult {
|
fn parenthesized(p: &mut Parser) -> ParseResult {
|
||||||
let marker = p.marker();
|
let marker = p.marker();
|
||||||
|
|
||||||
p.start_group(Group::Paren, TokenMode::Code);
|
p.start_group(Group::Paren);
|
||||||
let colon = p.eat_if(&NodeKind::Colon);
|
let colon = p.eat_if(&NodeKind::Colon);
|
||||||
let kind = collection(p).0;
|
let kind = collection(p).0;
|
||||||
p.end_group();
|
p.end_group();
|
||||||
@ -327,14 +331,14 @@ fn parenthesized(p: &mut Parser) -> ParseResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Arrow means this is a closure's parameter list.
|
// Arrow means this is a closure's parameter list.
|
||||||
if p.peek() == Some(&NodeKind::Arrow) {
|
if p.at(&NodeKind::Arrow) {
|
||||||
params(p, &marker, true);
|
params(p, &marker, true);
|
||||||
marker.end(p, NodeKind::ClosureParams);
|
marker.end(p, NodeKind::ClosureParams);
|
||||||
p.eat_assert(&NodeKind::Arrow);
|
p.eat_assert(&NodeKind::Arrow);
|
||||||
return marker.perform(p, NodeKind::Closure, expr);
|
return marker.perform(p, NodeKind::Closure, expr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find out which kind of collection this is.
|
// Transform into the identified collection.
|
||||||
match kind {
|
match kind {
|
||||||
CollectionKind::Group => marker.end(p, NodeKind::Group),
|
CollectionKind::Group => marker.end(p, NodeKind::Group),
|
||||||
CollectionKind::Positional => array(p, &marker),
|
CollectionKind::Positional => array(p, &marker),
|
||||||
@ -402,7 +406,8 @@ fn collection(p: &mut Parser) -> (CollectionKind, usize) {
|
|||||||
(kind, items)
|
(kind, items)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse an expression or a named pair. Returns if this is a named pair.
|
/// Parse an expression or a named pair, returning whether it's a spread or a
|
||||||
|
/// named pair.
|
||||||
fn item(p: &mut Parser) -> ParseResult<NodeKind> {
|
fn item(p: &mut Parser) -> ParseResult<NodeKind> {
|
||||||
let marker = p.marker();
|
let marker = p.marker();
|
||||||
if p.eat_if(&NodeKind::Dots) {
|
if p.eat_if(&NodeKind::Dots) {
|
||||||
@ -412,25 +417,24 @@ fn item(p: &mut Parser) -> ParseResult<NodeKind> {
|
|||||||
|
|
||||||
expr(p)?;
|
expr(p)?;
|
||||||
|
|
||||||
if p.peek() == Some(&NodeKind::Colon) {
|
if p.at(&NodeKind::Colon) {
|
||||||
marker.perform(p, NodeKind::Named, |p| {
|
marker.perform(p, NodeKind::Named, |p| {
|
||||||
if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) {
|
if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) {
|
||||||
p.eat();
|
p.eat();
|
||||||
expr(p)
|
expr(p)
|
||||||
} else {
|
} else {
|
||||||
marker.end(
|
let error =
|
||||||
p,
|
NodeKind::Error(ErrorPosition::Full, "expected identifier".into());
|
||||||
NodeKind::Error(ErrorPosition::Full, "expected identifier".into()),
|
marker.end(p, error);
|
||||||
);
|
|
||||||
p.eat();
|
p.eat();
|
||||||
|
|
||||||
expr(p).ok();
|
expr(p).ok();
|
||||||
Err(())
|
Err(())
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
Ok(NodeKind::Named)
|
Ok(NodeKind::Named)
|
||||||
} else {
|
} else {
|
||||||
Ok(p.last_child().unwrap().kind().clone())
|
Ok(NodeKind::None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -488,7 +492,7 @@ fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) {
|
|||||||
// Parse a template block: `[...]`.
|
// Parse a template block: `[...]`.
|
||||||
fn template(p: &mut Parser) {
|
fn template(p: &mut Parser) {
|
||||||
p.perform(NodeKind::Template, |p| {
|
p.perform(NodeKind::Template, |p| {
|
||||||
p.start_group(Group::Bracket, TokenMode::Markup);
|
p.start_group(Group::Bracket);
|
||||||
markup(p);
|
markup(p);
|
||||||
p.end_group();
|
p.end_group();
|
||||||
});
|
});
|
||||||
@ -497,9 +501,9 @@ fn template(p: &mut Parser) {
|
|||||||
/// Parse a code block: `{...}`.
|
/// Parse a code block: `{...}`.
|
||||||
fn block(p: &mut Parser) {
|
fn block(p: &mut Parser) {
|
||||||
p.perform(NodeKind::Block, |p| {
|
p.perform(NodeKind::Block, |p| {
|
||||||
p.start_group(Group::Brace, TokenMode::Code);
|
p.start_group(Group::Brace);
|
||||||
while !p.eof() {
|
while !p.eof() {
|
||||||
p.start_group(Group::Stmt, TokenMode::Code);
|
p.start_group(Group::Stmt);
|
||||||
if expr(p).is_ok() && !p.eof() {
|
if expr(p).is_ok() && !p.eof() {
|
||||||
p.expected_at("semicolon or line break");
|
p.expected_at("semicolon or line break");
|
||||||
}
|
}
|
||||||
@ -515,7 +519,7 @@ fn block(p: &mut Parser) {
|
|||||||
/// Parse a function call.
|
/// Parse a function call.
|
||||||
fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
|
fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
|
||||||
callee.perform(p, NodeKind::Call, |p| match p.peek_direct() {
|
callee.perform(p, NodeKind::Call, |p| match p.peek_direct() {
|
||||||
Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => {
|
Some(NodeKind::LeftParen | NodeKind::LeftBracket) => {
|
||||||
args(p, true);
|
args(p, true);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -530,7 +534,7 @@ fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
|
|||||||
fn args(p: &mut Parser, allow_template: bool) {
|
fn args(p: &mut Parser, allow_template: bool) {
|
||||||
p.perform(NodeKind::CallArgs, |p| {
|
p.perform(NodeKind::CallArgs, |p| {
|
||||||
if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) {
|
if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) {
|
||||||
p.start_group(Group::Paren, TokenMode::Code);
|
p.start_group(Group::Paren);
|
||||||
collection(p);
|
collection(p);
|
||||||
p.end_group();
|
p.end_group();
|
||||||
}
|
}
|
||||||
@ -546,7 +550,7 @@ fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult {
|
|||||||
marker.perform(p, NodeKind::WithExpr, |p| {
|
marker.perform(p, NodeKind::WithExpr, |p| {
|
||||||
p.eat_assert(&NodeKind::With);
|
p.eat_assert(&NodeKind::With);
|
||||||
|
|
||||||
if p.peek() == Some(&NodeKind::LeftParen) {
|
if p.at(&NodeKind::LeftParen) {
|
||||||
args(p, false);
|
args(p, false);
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
@ -564,14 +568,14 @@ fn let_expr(p: &mut Parser) -> ParseResult {
|
|||||||
let marker = p.marker();
|
let marker = p.marker();
|
||||||
ident(p)?;
|
ident(p)?;
|
||||||
|
|
||||||
if p.peek() == Some(&NodeKind::With) {
|
if p.at(&NodeKind::With) {
|
||||||
with_expr(p, &marker)?;
|
with_expr(p, &marker)?;
|
||||||
} else {
|
} else {
|
||||||
// If a parenthesis follows, this is a function definition.
|
// If a parenthesis follows, this is a function definition.
|
||||||
let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
|
let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
|
||||||
if has_params {
|
if has_params {
|
||||||
p.perform(NodeKind::ClosureParams, |p| {
|
p.perform(NodeKind::ClosureParams, |p| {
|
||||||
p.start_group(Group::Paren, TokenMode::Code);
|
p.start_group(Group::Paren);
|
||||||
let marker = p.marker();
|
let marker = p.marker();
|
||||||
collection(p);
|
collection(p);
|
||||||
params(p, &marker, true);
|
params(p, &marker, true);
|
||||||
@ -605,7 +609,7 @@ fn if_expr(p: &mut Parser) -> ParseResult {
|
|||||||
body(p)?;
|
body(p)?;
|
||||||
|
|
||||||
if p.eat_if(&NodeKind::Else) {
|
if p.eat_if(&NodeKind::Else) {
|
||||||
if p.peek() == Some(&NodeKind::If) {
|
if p.at(&NodeKind::If) {
|
||||||
if_expr(p)?;
|
if_expr(p)?;
|
||||||
} else {
|
} else {
|
||||||
body(p)?;
|
body(p)?;
|
||||||
@ -657,7 +661,7 @@ fn import_expr(p: &mut Parser) -> ParseResult {
|
|||||||
if !p.eat_if(&NodeKind::Star) {
|
if !p.eat_if(&NodeKind::Star) {
|
||||||
// This is the list of identifiers scenario.
|
// This is the list of identifiers scenario.
|
||||||
p.perform(NodeKind::ImportItems, |p| {
|
p.perform(NodeKind::ImportItems, |p| {
|
||||||
p.start_group(Group::Imports, TokenMode::Code);
|
p.start_group(Group::Imports);
|
||||||
let marker = p.marker();
|
let marker = p.marker();
|
||||||
let items = collection(p).1;
|
let items = collection(p).1;
|
||||||
if items == 0 {
|
if items == 0 {
|
||||||
@ -712,6 +716,5 @@ fn body(p: &mut Parser) -> ParseResult {
|
|||||||
return Err(());
|
return Err(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::ops::Range;
|
use std::mem;
|
||||||
use std::rc::Rc;
|
|
||||||
|
|
||||||
use super::{TokenMode, Tokens};
|
use super::{TokenMode, Tokens};
|
||||||
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
|
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
|
||||||
@ -11,27 +10,425 @@ pub(crate) type ParseResult<T = ()> = Result<T, ()>;
|
|||||||
|
|
||||||
/// A convenient token-based parser.
|
/// A convenient token-based parser.
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<'s> {
|
||||||
/// The parsed file.
|
|
||||||
src: &'s str,
|
|
||||||
/// An iterator over the source tokens.
|
/// An iterator over the source tokens.
|
||||||
tokens: Tokens<'s>,
|
tokens: Tokens<'s>,
|
||||||
|
/// Whether we are at the end of the file or of a group.
|
||||||
|
eof: bool,
|
||||||
|
/// The current token.
|
||||||
|
current: Option<NodeKind>,
|
||||||
|
/// The end byte index of the last (non-whitespace if in code mode) token.
|
||||||
|
prev_end: usize,
|
||||||
|
/// The start byte index of the peeked token.
|
||||||
|
current_start: usize,
|
||||||
/// The stack of open groups.
|
/// The stack of open groups.
|
||||||
groups: Vec<GroupEntry>,
|
groups: Vec<GroupEntry>,
|
||||||
/// The next token.
|
|
||||||
next: Option<NodeKind>,
|
|
||||||
/// The peeked token.
|
|
||||||
/// (Same as `next` except if we are at the end of group, then `None`).
|
|
||||||
peeked: Option<NodeKind>,
|
|
||||||
/// The end index of the last (non-whitespace if in code mode) token.
|
|
||||||
prev_end: usize,
|
|
||||||
/// The start index of the peeked token.
|
|
||||||
next_start: usize,
|
|
||||||
/// A stack of outer children vectors.
|
|
||||||
stack: Vec<Vec<Green>>,
|
|
||||||
/// The children of the currently built node.
|
/// The children of the currently built node.
|
||||||
children: Vec<Green>,
|
children: Vec<Green>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'s> Parser<'s> {
|
||||||
|
/// Create a new parser for the source string.
|
||||||
|
pub fn new(src: &'s str) -> Self {
|
||||||
|
let mut tokens = Tokens::new(src, TokenMode::Markup);
|
||||||
|
let current = tokens.next();
|
||||||
|
Self {
|
||||||
|
tokens,
|
||||||
|
eof: current.is_none(),
|
||||||
|
current,
|
||||||
|
prev_end: 0,
|
||||||
|
current_start: 0,
|
||||||
|
groups: vec![],
|
||||||
|
children: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// End the parsing process and return the last child.
|
||||||
|
pub fn finish(self) -> Vec<Green> {
|
||||||
|
self.children
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new marker.
|
||||||
|
pub fn marker(&mut self) -> Marker {
|
||||||
|
Marker(self.children.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform a subparse that wraps its result in a node with the given kind.
|
||||||
|
pub fn perform<F, T>(&mut self, kind: NodeKind, f: F) -> T
|
||||||
|
where
|
||||||
|
F: FnOnce(&mut Self) -> T,
|
||||||
|
{
|
||||||
|
let prev = mem::take(&mut self.children);
|
||||||
|
let output = f(self);
|
||||||
|
let mut children = mem::replace(&mut self.children, prev);
|
||||||
|
|
||||||
|
// Trailing trivia should not be wrapped into the new node.
|
||||||
|
let mut remains = vec![];
|
||||||
|
if self.tokens.mode() == TokenMode::Code {
|
||||||
|
let len = children.len();
|
||||||
|
for n in (0 .. len).rev() {
|
||||||
|
if !self.is_trivia(&children[n].kind()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
remains.push(children.pop().unwrap());
|
||||||
|
}
|
||||||
|
remains.reverse();
|
||||||
|
}
|
||||||
|
|
||||||
|
self.children.push(GreenNode::with_children(kind, children).into());
|
||||||
|
self.children.extend(remains);
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether the end of the source string or group is reached.
|
||||||
|
pub fn eof(&self) -> bool {
|
||||||
|
self.eof
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consume the current token and also trailing trivia if in code mode.
|
||||||
|
pub fn eat(&mut self) {
|
||||||
|
self.prev_end = self.tokens.index();
|
||||||
|
self.bump();
|
||||||
|
|
||||||
|
if self.tokens.mode() == TokenMode::Code {
|
||||||
|
// Skip whitespace and comments.
|
||||||
|
while self.current.as_ref().map_or(false, |x| self.is_trivia(x)) {
|
||||||
|
self.bump();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.repeek();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat if the current token it is the given one.
|
||||||
|
pub fn eat_if(&mut self, t: &NodeKind) -> bool {
|
||||||
|
let at = self.at(t);
|
||||||
|
if at {
|
||||||
|
self.eat();
|
||||||
|
}
|
||||||
|
at
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat if the current token is the given one and produce an error if not.
|
||||||
|
pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult {
|
||||||
|
let eaten = self.eat_if(t);
|
||||||
|
if !eaten {
|
||||||
|
self.expected_at(t.as_str());
|
||||||
|
}
|
||||||
|
if eaten { Ok(()) } else { Err(()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat, debug-asserting that the token is the given one.
|
||||||
|
pub fn eat_assert(&mut self, t: &NodeKind) {
|
||||||
|
debug_assert_eq!(self.peek(), Some(t));
|
||||||
|
self.eat();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat tokens while the condition is true.
|
||||||
|
pub fn eat_while<F>(&mut self, mut f: F)
|
||||||
|
where
|
||||||
|
F: FnMut(&NodeKind) -> bool,
|
||||||
|
{
|
||||||
|
while self.peek().map_or(false, |t| f(t)) {
|
||||||
|
self.eat();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat the current token, but change its type.
|
||||||
|
pub fn convert(&mut self, kind: NodeKind) {
|
||||||
|
let idx = self.children.len();
|
||||||
|
self.eat();
|
||||||
|
if let Some(child) = self.children.get_mut(idx) {
|
||||||
|
child.set_kind(kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether the current token is of the given type.
|
||||||
|
pub fn at(&self, kind: &NodeKind) -> bool {
|
||||||
|
self.peek() == Some(kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the current token without consuming it.
|
||||||
|
pub fn peek(&self) -> Option<&NodeKind> {
|
||||||
|
if self.eof { None } else { self.current.as_ref() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the current token, if it follows immediately after the last one
|
||||||
|
/// without any trivia in between.
|
||||||
|
pub fn peek_direct(&self) -> Option<&NodeKind> {
|
||||||
|
if self.prev_end() == self.current_start() {
|
||||||
|
self.peek()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the source of the current token.
|
||||||
|
pub fn peek_src(&self) -> &'s str {
|
||||||
|
self.tokens.scanner().get(self.current_start() .. self.current_end())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The byte index at which the last token ended.
|
||||||
|
///
|
||||||
|
/// Refers to the end of the last non-trivia token in code mode.
|
||||||
|
pub fn prev_end(&self) -> usize {
|
||||||
|
self.prev_end
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The byte index at which the current token starts.
|
||||||
|
pub fn current_start(&self) -> usize {
|
||||||
|
self.current_start
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The byte index at which the current token ends.
|
||||||
|
pub fn current_end(&self) -> usize {
|
||||||
|
self.tokens.index()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine the column index for the given byte index.
|
||||||
|
pub fn column(&self, index: usize) -> usize {
|
||||||
|
self.tokens.scanner().column(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Continue parsing in a group.
|
||||||
|
///
|
||||||
|
/// When the end delimiter of the group is reached, all subsequent calls to
|
||||||
|
/// `peek()` return `None`. Parsing can only continue with a matching call
|
||||||
|
/// to `end_group`.
|
||||||
|
///
|
||||||
|
/// This panics if the current token does not start the given group.
|
||||||
|
pub fn start_group(&mut self, kind: Group) {
|
||||||
|
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
|
||||||
|
self.tokens.set_mode(match kind {
|
||||||
|
Group::Bracket => TokenMode::Markup,
|
||||||
|
_ => TokenMode::Code,
|
||||||
|
});
|
||||||
|
|
||||||
|
self.repeek();
|
||||||
|
match kind {
|
||||||
|
Group::Paren => self.eat_assert(&NodeKind::LeftParen),
|
||||||
|
Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
|
||||||
|
Group::Brace => self.eat_assert(&NodeKind::LeftBrace),
|
||||||
|
Group::Stmt => {}
|
||||||
|
Group::Expr => {}
|
||||||
|
Group::Imports => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// End the parsing of a group.
|
||||||
|
///
|
||||||
|
/// This panics if no group was started.
|
||||||
|
pub fn end_group(&mut self) {
|
||||||
|
let group_mode = self.tokens.mode();
|
||||||
|
let group = self.groups.pop().expect("no started group");
|
||||||
|
self.tokens.set_mode(group.prev_mode);
|
||||||
|
self.repeek();
|
||||||
|
|
||||||
|
let mut rescan = self.tokens.mode() != group_mode;
|
||||||
|
|
||||||
|
// Eat the end delimiter if there is one.
|
||||||
|
if let Some((end, required)) = match group.kind {
|
||||||
|
Group::Paren => Some((NodeKind::RightParen, true)),
|
||||||
|
Group::Bracket => Some((NodeKind::RightBracket, true)),
|
||||||
|
Group::Brace => Some((NodeKind::RightBrace, true)),
|
||||||
|
Group::Stmt => Some((NodeKind::Semicolon, false)),
|
||||||
|
Group::Expr => None,
|
||||||
|
Group::Imports => None,
|
||||||
|
} {
|
||||||
|
if self.current.as_ref() == Some(&end) {
|
||||||
|
// Bump the delimeter and return. No need to rescan in this case.
|
||||||
|
self.eat();
|
||||||
|
rescan = false;
|
||||||
|
} else if required {
|
||||||
|
self.push_error(format!("expected {}", end));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rescan the peeked token if the mode changed.
|
||||||
|
if rescan {
|
||||||
|
if group_mode == TokenMode::Code {
|
||||||
|
let len = self.children.len();
|
||||||
|
for n in (0 .. len).rev() {
|
||||||
|
if !self.is_trivia(self.children[n].kind()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.children.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.tokens.jump(self.prev_end());
|
||||||
|
self.prev_end = self.tokens.index();
|
||||||
|
self.current_start = self.tokens.index();
|
||||||
|
self.current = self.tokens.next();
|
||||||
|
self.repeek();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Low-level bump that consumes exactly one token without special trivia
|
||||||
|
/// handling.
|
||||||
|
fn bump(&mut self) {
|
||||||
|
let kind = self.current.take().unwrap();
|
||||||
|
let len = self.tokens.index() - self.current_start;
|
||||||
|
self.children.push(GreenData::new(kind, len).into());
|
||||||
|
self.current_start = self.tokens.index();
|
||||||
|
self.current = self.tokens.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take another look at the current token to recheck whether it ends a
|
||||||
|
/// group.
|
||||||
|
fn repeek(&mut self) {
|
||||||
|
self.eof = match &self.current {
|
||||||
|
Some(NodeKind::RightParen) => self.inside(Group::Paren),
|
||||||
|
Some(NodeKind::RightBracket) => self.inside(Group::Bracket),
|
||||||
|
Some(NodeKind::RightBrace) => self.inside(Group::Brace),
|
||||||
|
Some(NodeKind::Semicolon) => self.inside(Group::Stmt),
|
||||||
|
Some(NodeKind::From) => self.inside(Group::Imports),
|
||||||
|
Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(),
|
||||||
|
Some(_) => false,
|
||||||
|
None => true,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns whether the given type can be skipped over.
|
||||||
|
fn is_trivia(&self, token: &NodeKind) -> bool {
|
||||||
|
Self::is_trivia_ext(token, self.stop_at_newline())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns whether the given type can be skipped over given the current
|
||||||
|
/// newline mode.
|
||||||
|
fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
|
||||||
|
match token {
|
||||||
|
NodeKind::Space(n) => *n == 0 || !stop_at_newline,
|
||||||
|
NodeKind::LineComment => true,
|
||||||
|
NodeKind::BlockComment => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether the active group must end at a newline.
|
||||||
|
fn stop_at_newline(&self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self.groups.last().map(|group| group.kind),
|
||||||
|
Some(Group::Stmt | Group::Expr | Group::Imports)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether we are inside the given group.
|
||||||
|
fn inside(&self, kind: Group) -> bool {
|
||||||
|
self.groups.iter().any(|g| g.kind == kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Error handling.
|
||||||
|
impl Parser<'_> {
|
||||||
|
/// Push an error into the children list.
|
||||||
|
pub fn push_error(&mut self, msg: impl Into<EcoString>) {
|
||||||
|
let error = NodeKind::Error(ErrorPosition::Full, msg.into());
|
||||||
|
self.children.push(GreenData::new(error, 0).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat the current token and add an error that it is unexpected.
|
||||||
|
pub fn unexpected(&mut self) {
|
||||||
|
match self.peek() {
|
||||||
|
Some(found) => {
|
||||||
|
let msg = format!("unexpected {}", found);
|
||||||
|
let error = NodeKind::Error(ErrorPosition::Full, msg.into());
|
||||||
|
self.perform(error, Self::eat);
|
||||||
|
}
|
||||||
|
None => self.push_error("unexpected end of file"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat the current token and add an error that it is not the expected `thing`.
|
||||||
|
pub fn expected(&mut self, thing: &str) {
|
||||||
|
match self.peek() {
|
||||||
|
Some(found) => {
|
||||||
|
let msg = format!("expected {}, found {}", thing, found);
|
||||||
|
let error = NodeKind::Error(ErrorPosition::Full, msg.into());
|
||||||
|
self.perform(error, Self::eat);
|
||||||
|
}
|
||||||
|
None => self.expected_at(thing),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an error that the `thing` was expected at the end of the last
|
||||||
|
/// non-trivia token.
|
||||||
|
pub fn expected_at(&mut self, thing: &str) {
|
||||||
|
let mut found = self.children.len();
|
||||||
|
for (i, node) in self.children.iter().enumerate().rev() {
|
||||||
|
if !self.is_trivia(node.kind()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
found = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
Marker(found).expected_at(self, thing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A marker that indicates where a node may start.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub struct Marker(usize);
|
||||||
|
|
||||||
|
impl Marker {
|
||||||
|
/// Perform a subparse that wraps all children after the marker in a node
|
||||||
|
/// with the given kind.
|
||||||
|
pub fn perform<T, F>(self, p: &mut Parser, kind: NodeKind, f: F) -> T
|
||||||
|
where
|
||||||
|
F: FnOnce(&mut Parser) -> T,
|
||||||
|
{
|
||||||
|
let success = f(p);
|
||||||
|
self.end(p, kind);
|
||||||
|
success
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrap all children after the marker in a node with the given `kind`.
|
||||||
|
pub fn end(self, p: &mut Parser, kind: NodeKind) {
|
||||||
|
let end = (self.0 .. p.children.len())
|
||||||
|
.rev()
|
||||||
|
.find(|&i| !p.is_trivia(p.children[i].kind()))
|
||||||
|
.unwrap_or(self.0)
|
||||||
|
+ 1;
|
||||||
|
|
||||||
|
let children: Vec<_> = p.children.drain(self.0 .. end).collect();
|
||||||
|
p.children
|
||||||
|
.insert(self.0, GreenNode::with_children(kind, children).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrap all children that do not fulfill the predicate in error nodes.
|
||||||
|
pub fn filter_children<F>(self, p: &mut Parser, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>,
|
||||||
|
{
|
||||||
|
for child in &mut p.children[self.0 ..] {
|
||||||
|
if (p.tokens.mode() == TokenMode::Markup
|
||||||
|
|| !Parser::is_trivia_ext(child.kind(), false))
|
||||||
|
&& !child.kind().is_error()
|
||||||
|
{
|
||||||
|
if let Err((pos, msg)) = f(child) {
|
||||||
|
let error = NodeKind::Error(pos, msg);
|
||||||
|
let inner = mem::take(child);
|
||||||
|
*child = GreenNode::with_child(error, inner).into();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert an error message that `what` was expected at the marker position.
|
||||||
|
pub fn expected_at(self, p: &mut Parser, what: &str) {
|
||||||
|
let msg = format!("expected {}", what);
|
||||||
|
let error = NodeKind::Error(ErrorPosition::Full, msg.into());
|
||||||
|
p.children.insert(self.0, GreenData::new(error, 0).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a reference to the child directly after the marker.
|
||||||
|
pub fn child_at<'a>(self, p: &'a Parser) -> Option<&'a Green> {
|
||||||
|
p.children.get(self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A logical group of tokens, e.g. `[...]`.
|
/// A logical group of tokens, e.g. `[...]`.
|
||||||
struct GroupEntry {
|
struct GroupEntry {
|
||||||
/// The kind of group this is. This decides which tokens will end the group.
|
/// The kind of group this is. This decides which tokens will end the group.
|
||||||
@ -46,12 +443,12 @@ struct GroupEntry {
|
|||||||
/// A group, confined by optional start and end delimiters.
|
/// A group, confined by optional start and end delimiters.
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
pub enum Group {
|
pub enum Group {
|
||||||
/// A parenthesized group: `(...)`.
|
|
||||||
Paren,
|
|
||||||
/// A bracketed group: `[...]`.
|
/// A bracketed group: `[...]`.
|
||||||
Bracket,
|
Bracket,
|
||||||
/// A curly-braced group: `{...}`.
|
/// A curly-braced group: `{...}`.
|
||||||
Brace,
|
Brace,
|
||||||
|
/// A parenthesized group: `(...)`.
|
||||||
|
Paren,
|
||||||
/// A group ended by a semicolon or a line break: `;`, `\n`.
|
/// A group ended by a semicolon or a line break: `;`, `\n`.
|
||||||
Stmt,
|
Stmt,
|
||||||
/// A group for a single expression, ended by a line break.
|
/// A group for a single expression, ended by a line break.
|
||||||
@ -59,460 +456,3 @@ pub enum Group {
|
|||||||
/// A group for import items, ended by a semicolon, line break or `from`.
|
/// A group for import items, ended by a semicolon, line break or `from`.
|
||||||
Imports,
|
Imports,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Parser<'s> {
|
|
||||||
/// Create a new parser for the source string.
|
|
||||||
pub fn new(src: &'s str) -> Self {
|
|
||||||
let mut tokens = Tokens::new(src, TokenMode::Markup);
|
|
||||||
let next = tokens.next();
|
|
||||||
Self {
|
|
||||||
src,
|
|
||||||
tokens,
|
|
||||||
groups: vec![],
|
|
||||||
next: next.clone(),
|
|
||||||
peeked: next,
|
|
||||||
prev_end: 0,
|
|
||||||
next_start: 0,
|
|
||||||
stack: vec![],
|
|
||||||
children: vec![],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Perform a subparse that wraps its result in a node with the given kind.
|
|
||||||
pub fn perform<T, F>(&mut self, kind: NodeKind, f: F) -> T
|
|
||||||
where
|
|
||||||
F: FnOnce(&mut Self) -> T,
|
|
||||||
{
|
|
||||||
let prev = std::mem::take(&mut self.children);
|
|
||||||
let output = f(self);
|
|
||||||
let mut children = std::mem::replace(&mut self.children, prev);
|
|
||||||
|
|
||||||
// Trailing trivia should not be wrapped into the new node.
|
|
||||||
let mut remains = vec![];
|
|
||||||
if self.tokens.mode() == TokenMode::Code {
|
|
||||||
let len = children.len();
|
|
||||||
for n in (0 .. len).rev() {
|
|
||||||
if !self.skip_type(&children[n].kind()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
remains.push(children.pop().unwrap());
|
|
||||||
}
|
|
||||||
remains.reverse();
|
|
||||||
}
|
|
||||||
|
|
||||||
self.children.push(GreenNode::with_children(kind, children).into());
|
|
||||||
self.children.extend(remains);
|
|
||||||
|
|
||||||
output
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Eat and wrap the next token.
|
|
||||||
pub fn convert(&mut self, kind: NodeKind) {
|
|
||||||
self.eat();
|
|
||||||
self.children.last_mut().unwrap().set_kind(kind);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// End the current node and undo its existence, inling all accumulated
|
|
||||||
/// children into its parent.
|
|
||||||
pub fn lift(&mut self) {
|
|
||||||
let outer = self.stack.pop().unwrap();
|
|
||||||
let children = std::mem::replace(&mut self.children, outer);
|
|
||||||
self.children.extend(children);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add an error to the current children list.
|
|
||||||
fn push_error(&mut self, msg: impl Into<String>) {
|
|
||||||
self.children.push(
|
|
||||||
GreenData::new(NodeKind::Error(ErrorPosition::Full, msg.into().into()), 0)
|
|
||||||
.into(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// End the parsing process and return the last child.
|
|
||||||
pub fn finish(&mut self) -> Rc<GreenNode> {
|
|
||||||
match self.children.pop().unwrap() {
|
|
||||||
Green::Node(n) => n,
|
|
||||||
_ => panic!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether the end of the source string or group is reached.
|
|
||||||
pub fn eof(&self) -> bool {
|
|
||||||
self.peek().is_none()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next token if it is the given one.
|
|
||||||
pub fn eat_if(&mut self, t: &NodeKind) -> bool {
|
|
||||||
if self.peek() == Some(t) {
|
|
||||||
self.eat();
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next token if the closure maps it a to `Some`-variant.
|
|
||||||
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
|
|
||||||
where
|
|
||||||
F: FnOnce(&NodeKind) -> Option<T>,
|
|
||||||
{
|
|
||||||
let token = self.peek()?;
|
|
||||||
let mapped = f(token);
|
|
||||||
if mapped.is_some() {
|
|
||||||
self.eat();
|
|
||||||
}
|
|
||||||
mapped
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next token if it is the given one and produce an error if
|
|
||||||
/// not.
|
|
||||||
pub fn eat_expect(&mut self, t: &NodeKind) -> ParseResult {
|
|
||||||
let eaten = self.eat_if(t);
|
|
||||||
if !eaten {
|
|
||||||
self.expected_at(t.as_str());
|
|
||||||
}
|
|
||||||
if eaten { Ok(()) } else { Err(()) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next token, debug-asserting that it is one of the given ones.
|
|
||||||
pub fn eat_assert(&mut self, t: &NodeKind) {
|
|
||||||
let next = self.peek();
|
|
||||||
debug_assert_eq!(next, Some(t));
|
|
||||||
self.eat();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume tokens while the condition is true.
|
|
||||||
pub fn eat_while<F>(&mut self, mut f: F)
|
|
||||||
where
|
|
||||||
F: FnMut(&NodeKind) -> bool,
|
|
||||||
{
|
|
||||||
while self.peek().map_or(false, |t| f(t)) {
|
|
||||||
self.eat();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the next token without consuming it.
|
|
||||||
pub fn peek(&self) -> Option<&NodeKind> {
|
|
||||||
self.peeked.as_ref()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the next token if it follows immediately after the last one
|
|
||||||
/// without any whitespace in between.
|
|
||||||
pub fn peek_direct(&self) -> Option<&NodeKind> {
|
|
||||||
if self.next_start() == self.prev_end() {
|
|
||||||
self.peeked.as_ref()
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the source of the next token.
|
|
||||||
pub fn peek_src(&self) -> &'s str {
|
|
||||||
self.get(self.next_start() .. self.next_end())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The byte index at which the last token ended.
|
|
||||||
///
|
|
||||||
/// Refers to the end of the last _non-whitespace_ token in code mode.
|
|
||||||
pub fn prev_end(&self) -> usize {
|
|
||||||
self.prev_end
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The byte index at which the next token starts.
|
|
||||||
pub fn next_start(&self) -> usize {
|
|
||||||
self.next_start
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The byte index at which the next token will end.
|
|
||||||
///
|
|
||||||
/// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
|
|
||||||
/// `None`.
|
|
||||||
pub fn next_end(&self) -> usize {
|
|
||||||
self.tokens.index()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Determine the column index for the given byte index.
|
|
||||||
pub fn column(&self, index: usize) -> usize {
|
|
||||||
self.tokens.column(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Slice out part of the source string.
|
|
||||||
pub fn get(&self, range: Range<usize>) -> &'s str {
|
|
||||||
self.src.get(range).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Continue parsing in a group.
|
|
||||||
///
|
|
||||||
/// When the end delimiter of the group is reached, all subsequent calls to
|
|
||||||
/// `eat()` and `peek()` return `None`. Parsing can only continue with
|
|
||||||
/// a matching call to `end_group`.
|
|
||||||
///
|
|
||||||
/// This panics if the next token does not start the given group.
|
|
||||||
pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
|
|
||||||
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
|
|
||||||
self.tokens.set_mode(mode);
|
|
||||||
self.repeek();
|
|
||||||
|
|
||||||
match kind {
|
|
||||||
Group::Paren => self.eat_assert(&NodeKind::LeftParen),
|
|
||||||
Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
|
|
||||||
Group::Brace => self.eat_assert(&NodeKind::LeftBrace),
|
|
||||||
Group::Stmt => {}
|
|
||||||
Group::Expr => {}
|
|
||||||
Group::Imports => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// End the parsing of a group.
|
|
||||||
///
|
|
||||||
/// This panics if no group was started.
|
|
||||||
pub fn end_group(&mut self) {
|
|
||||||
let prev_mode = self.tokens.mode();
|
|
||||||
let group = self.groups.pop().expect("no started group");
|
|
||||||
self.tokens.set_mode(group.prev_mode);
|
|
||||||
self.repeek();
|
|
||||||
|
|
||||||
let mut rescan = self.tokens.mode() != prev_mode;
|
|
||||||
|
|
||||||
// Eat the end delimiter if there is one.
|
|
||||||
if let Some((end, required)) = match group.kind {
|
|
||||||
Group::Paren => Some((NodeKind::RightParen, true)),
|
|
||||||
Group::Bracket => Some((NodeKind::RightBracket, true)),
|
|
||||||
Group::Brace => Some((NodeKind::RightBrace, true)),
|
|
||||||
Group::Stmt => Some((NodeKind::Semicolon, false)),
|
|
||||||
Group::Expr => None,
|
|
||||||
Group::Imports => None,
|
|
||||||
} {
|
|
||||||
if self.next == Some(end.clone()) {
|
|
||||||
// Bump the delimeter and return. No need to rescan in this case.
|
|
||||||
self.eat();
|
|
||||||
rescan = false;
|
|
||||||
} else if required {
|
|
||||||
self.push_error(format!("expected {}", end));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rescan the peeked token if the mode changed.
|
|
||||||
if rescan {
|
|
||||||
if prev_mode == TokenMode::Code {
|
|
||||||
let len = self.children.len();
|
|
||||||
for n in (0 .. len).rev() {
|
|
||||||
if !self.skip_type(self.children[n].kind()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.children.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.tokens.jump(self.prev_end());
|
|
||||||
self.prev_end = self.tokens.index().into();
|
|
||||||
self.next_start = self.tokens.index().into();
|
|
||||||
self.next = self.tokens.next();
|
|
||||||
self.repeek();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add an error that `what` was expected at the given span.
|
|
||||||
pub fn expected_at(&mut self, what: &str) {
|
|
||||||
let mut found = self.children.len();
|
|
||||||
for (i, node) in self.children.iter().enumerate().rev() {
|
|
||||||
if !self.skip_type(node.kind()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
found = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
Marker(found).expected_at(self, what);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Eat the next token and add an error that it is not the expected `thing`.
|
|
||||||
pub fn expected(&mut self, what: &str) {
|
|
||||||
match self.peek().cloned() {
|
|
||||||
Some(found) => {
|
|
||||||
self.perform(
|
|
||||||
NodeKind::Error(
|
|
||||||
ErrorPosition::Full,
|
|
||||||
format!("expected {}, found {}", what, found).into(),
|
|
||||||
),
|
|
||||||
Self::eat,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
None => self.expected_at(what),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Eat the next token and add an error that it is unexpected.
|
|
||||||
pub fn unexpected(&mut self) {
|
|
||||||
match self.peek().cloned() {
|
|
||||||
Some(found) => {
|
|
||||||
self.perform(
|
|
||||||
NodeKind::Error(
|
|
||||||
ErrorPosition::Full,
|
|
||||||
format!("unexpected {}", found).into(),
|
|
||||||
),
|
|
||||||
Self::eat,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
None => self.push_error("unexpected end of file"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns whether the given type can be skipped over given the current
|
|
||||||
/// newline mode.
|
|
||||||
pub fn skip_type_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
|
|
||||||
match token {
|
|
||||||
NodeKind::Space(n) => n < &1 || !stop_at_newline,
|
|
||||||
NodeKind::LineComment => true,
|
|
||||||
NodeKind::BlockComment => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns whether the given type can be skipped over.
|
|
||||||
fn skip_type(&self, token: &NodeKind) -> bool {
|
|
||||||
Self::skip_type_ext(token, self.stop_at_newline())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next token.
|
|
||||||
pub fn eat(&mut self) {
|
|
||||||
self.children.push(
|
|
||||||
GreenData::new(
|
|
||||||
self.next.clone().unwrap(),
|
|
||||||
self.tokens.index() - self.next_start,
|
|
||||||
)
|
|
||||||
.into(),
|
|
||||||
);
|
|
||||||
|
|
||||||
self.prev_end = self.tokens.index().into();
|
|
||||||
self.next_start = self.tokens.index().into();
|
|
||||||
self.next = self.tokens.next();
|
|
||||||
|
|
||||||
if self.tokens.mode() == TokenMode::Code {
|
|
||||||
// Skip whitespace and comments.
|
|
||||||
while self.next.as_ref().map_or(false, |x| self.skip_type(x)) {
|
|
||||||
self.children.push(
|
|
||||||
GreenData::new(
|
|
||||||
self.next.clone().unwrap(),
|
|
||||||
self.tokens.index() - self.next_start,
|
|
||||||
)
|
|
||||||
.into(),
|
|
||||||
);
|
|
||||||
|
|
||||||
self.next_start = self.tokens.index().into();
|
|
||||||
self.next = self.tokens.next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.repeek();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Take another look at the next token to recheck whether it ends a group.
|
|
||||||
fn repeek(&mut self) {
|
|
||||||
self.peeked = self.next.clone();
|
|
||||||
let token = match self.next.as_ref() {
|
|
||||||
Some(token) => token,
|
|
||||||
None => return,
|
|
||||||
};
|
|
||||||
|
|
||||||
if match token {
|
|
||||||
NodeKind::RightParen => self.inside(Group::Paren),
|
|
||||||
NodeKind::RightBracket => self.inside(Group::Bracket),
|
|
||||||
NodeKind::RightBrace => self.inside(Group::Brace),
|
|
||||||
NodeKind::Semicolon => self.inside(Group::Stmt),
|
|
||||||
NodeKind::From => self.inside(Group::Imports),
|
|
||||||
NodeKind::Space(n) => n > &0 && self.stop_at_newline(),
|
|
||||||
_ => false,
|
|
||||||
} {
|
|
||||||
self.peeked = None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether the active group ends at a newline.
|
|
||||||
fn stop_at_newline(&self) -> bool {
|
|
||||||
matches!(
|
|
||||||
self.groups.last().map(|group| group.kind),
|
|
||||||
Some(Group::Stmt | Group::Expr | Group::Imports)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether we are inside the given group.
|
|
||||||
fn inside(&self, kind: Group) -> bool {
|
|
||||||
self.groups.iter().any(|g| g.kind == kind)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the last child of the current stack frame.
|
|
||||||
pub fn last_child(&self) -> Option<&Green> {
|
|
||||||
self.children.last()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new marker.
|
|
||||||
pub fn marker(&mut self) -> Marker {
|
|
||||||
Marker(self.children.len())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A marker that indicates where a child may start.
|
|
||||||
pub struct Marker(usize);
|
|
||||||
|
|
||||||
impl Marker {
|
|
||||||
/// Wraps all children in front of the marker.
|
|
||||||
pub fn end(&self, p: &mut Parser, kind: NodeKind) {
|
|
||||||
let stop_nl = p.stop_at_newline();
|
|
||||||
let end = (self.0 .. p.children.len())
|
|
||||||
.rev()
|
|
||||||
.find(|&i| !Parser::skip_type_ext(p.children[i].kind(), stop_nl))
|
|
||||||
.unwrap_or(self.0)
|
|
||||||
+ 1;
|
|
||||||
|
|
||||||
let children: Vec<_> = p.children.drain(self.0 .. end).collect();
|
|
||||||
p.children
|
|
||||||
.insert(self.0, GreenNode::with_children(kind, children).into());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Wrap all children that do not fulfill the predicate in error nodes.
|
|
||||||
pub fn filter_children<F>(&self, p: &mut Parser, f: F)
|
|
||||||
where
|
|
||||||
F: Fn(&Green) -> Result<(), (ErrorPosition, EcoString)>,
|
|
||||||
{
|
|
||||||
for child in &mut p.children[self.0 ..] {
|
|
||||||
if !((p.tokens.mode() != TokenMode::Code
|
|
||||||
|| Parser::skip_type_ext(child.kind(), false))
|
|
||||||
|| child.kind().is_error())
|
|
||||||
{
|
|
||||||
if let Err((pos, msg)) = f(child) {
|
|
||||||
let inner = std::mem::take(child);
|
|
||||||
*child =
|
|
||||||
GreenNode::with_child(NodeKind::Error(pos, msg), inner).into();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Insert an error message that `what` was expected at the marker position.
|
|
||||||
pub fn expected_at(&self, p: &mut Parser, what: &str) {
|
|
||||||
p.children.insert(
|
|
||||||
self.0,
|
|
||||||
GreenData::new(
|
|
||||||
NodeKind::Error(ErrorPosition::Full, format!("expected {}", what).into()),
|
|
||||||
0,
|
|
||||||
)
|
|
||||||
.into(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return a reference to the child after the marker.
|
|
||||||
pub fn child_at<'a>(&self, p: &'a Parser) -> Option<&'a Green> {
|
|
||||||
p.children.get(self.0)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn perform<T, F>(&self, p: &mut Parser, kind: NodeKind, f: F) -> T
|
|
||||||
where
|
|
||||||
F: FnOnce(&mut Parser) -> T,
|
|
||||||
{
|
|
||||||
let success = f(p);
|
|
||||||
self.end(p, kind);
|
|
||||||
success
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -172,8 +172,8 @@ mod tests {
|
|||||||
test("typst\n it!", "typst", "\n it!");
|
test("typst\n it!", "typst", "\n it!");
|
||||||
test("typst\n it!", "typst", "\n it!");
|
test("typst\n it!", "typst", "\n it!");
|
||||||
test("abc`", "abc", "`");
|
test("abc`", "abc", "`");
|
||||||
test(" hi", "", " hi");
|
test(" hi", "", " hi");
|
||||||
test("`", "", "`");
|
test("`", "", "`");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -57,12 +57,6 @@ impl<'s> Tokens<'s> {
|
|||||||
self.s.jump(index);
|
self.s.jump(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The column of a given index in the source string.
|
|
||||||
#[inline]
|
|
||||||
pub fn column(&self, index: usize) -> usize {
|
|
||||||
self.s.column(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The underlying scanner.
|
/// The underlying scanner.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn scanner(&self) -> Scanner<'s> {
|
pub fn scanner(&self) -> Scanner<'s> {
|
||||||
@ -314,7 +308,7 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn raw(&mut self) -> NodeKind {
|
fn raw(&mut self) -> NodeKind {
|
||||||
let column = self.column(self.s.index() - 1);
|
let column = self.s.column(self.s.index() - 1);
|
||||||
|
|
||||||
let mut backticks = 1;
|
let mut backticks = 1;
|
||||||
while self.s.eat_if('`') && backticks < u8::MAX {
|
while self.s.eat_if('`') && backticks < u8::MAX {
|
||||||
@ -342,10 +336,8 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let terminated = found == backticks;
|
if found == backticks {
|
||||||
let end = self.s.index() - if terminated { found as usize } else { 0 };
|
let end = self.s.index() - found as usize;
|
||||||
|
|
||||||
if terminated {
|
|
||||||
NodeKind::Raw(Rc::new(resolve_raw(
|
NodeKind::Raw(Rc::new(resolve_raw(
|
||||||
column,
|
column,
|
||||||
backticks,
|
backticks,
|
||||||
|
@ -42,11 +42,10 @@ impl Green {
|
|||||||
|
|
||||||
/// Set the type of the node.
|
/// Set the type of the node.
|
||||||
pub fn set_kind(&mut self, kind: NodeKind) {
|
pub fn set_kind(&mut self, kind: NodeKind) {
|
||||||
let data = match self {
|
match self {
|
||||||
Self::Node(node) => &mut Rc::make_mut(node).data,
|
Self::Node(node) => Rc::make_mut(node).data.set_kind(kind),
|
||||||
Self::Token(data) => data,
|
Self::Token(data) => data.set_kind(kind),
|
||||||
};
|
}
|
||||||
data.set_kind(kind);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The length of the node.
|
/// The length of the node.
|
||||||
@ -56,7 +55,10 @@ impl Green {
|
|||||||
|
|
||||||
/// Whether the node or its children contain an error.
|
/// Whether the node or its children contain an error.
|
||||||
pub fn erroneous(&self) -> bool {
|
pub fn erroneous(&self) -> bool {
|
||||||
self.data().erroneous()
|
match self {
|
||||||
|
Self::Node(node) => node.erroneous,
|
||||||
|
Self::Token(data) => data.kind.is_error(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The node's children.
|
/// The node's children.
|
||||||
@ -94,26 +96,32 @@ pub struct GreenNode {
|
|||||||
data: GreenData,
|
data: GreenData,
|
||||||
/// This node's children, losslessly make up this node.
|
/// This node's children, losslessly make up this node.
|
||||||
children: Vec<Green>,
|
children: Vec<Green>,
|
||||||
|
/// Whether this node or any of its children are erroneous.
|
||||||
|
erroneous: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GreenNode {
|
impl GreenNode {
|
||||||
/// Creates a new node with the given kind and children.
|
|
||||||
pub fn with_children(kind: NodeKind, children: Vec<Green>) -> Self {
|
|
||||||
let mut data = GreenData::new(kind, 0);
|
|
||||||
let len = children
|
|
||||||
.iter()
|
|
||||||
.inspect(|c| data.erroneous |= c.erroneous())
|
|
||||||
.map(Green::len)
|
|
||||||
.sum();
|
|
||||||
data.len = len;
|
|
||||||
Self { data, children }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates a new node with the given kind and a single child.
|
/// Creates a new node with the given kind and a single child.
|
||||||
pub fn with_child(kind: NodeKind, child: impl Into<Green>) -> Self {
|
pub fn with_child(kind: NodeKind, child: impl Into<Green>) -> Self {
|
||||||
Self::with_children(kind, vec![child.into()])
|
Self::with_children(kind, vec![child.into()])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a new node with the given kind and children.
|
||||||
|
pub fn with_children(kind: NodeKind, children: Vec<Green>) -> Self {
|
||||||
|
let mut erroneous = kind.is_error();
|
||||||
|
let len = children
|
||||||
|
.iter()
|
||||||
|
.inspect(|c| erroneous |= c.erroneous())
|
||||||
|
.map(Green::len)
|
||||||
|
.sum();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
data: GreenData::new(kind, len),
|
||||||
|
children,
|
||||||
|
erroneous,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// The node's children.
|
/// The node's children.
|
||||||
pub fn children(&self) -> &[Green] {
|
pub fn children(&self) -> &[Green] {
|
||||||
&self.children
|
&self.children
|
||||||
@ -140,14 +148,12 @@ pub struct GreenData {
|
|||||||
kind: NodeKind,
|
kind: NodeKind,
|
||||||
/// The byte length of the node in the source.
|
/// The byte length of the node in the source.
|
||||||
len: usize,
|
len: usize,
|
||||||
/// Whether this node or any of its children contain an error.
|
|
||||||
erroneous: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GreenData {
|
impl GreenData {
|
||||||
/// Create new node metadata.
|
/// Create new node metadata.
|
||||||
pub fn new(kind: NodeKind, len: usize) -> Self {
|
pub fn new(kind: NodeKind, len: usize) -> Self {
|
||||||
Self { len, erroneous: kind.is_error(), kind }
|
Self { len, kind }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The type of the node.
|
/// The type of the node.
|
||||||
@ -164,11 +170,6 @@ impl GreenData {
|
|||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.len
|
self.len
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether the node or its children contain an error.
|
|
||||||
pub fn erroneous(&self) -> bool {
|
|
||||||
self.erroneous
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<GreenData> for Green {
|
impl From<GreenData> for Green {
|
||||||
@ -219,7 +220,7 @@ impl<'a> RedRef<'a> {
|
|||||||
|
|
||||||
/// The error messages for this node and its descendants.
|
/// The error messages for this node and its descendants.
|
||||||
pub fn errors(self) -> Vec<Error> {
|
pub fn errors(self) -> Vec<Error> {
|
||||||
if !self.green.erroneous() {
|
if !self.erroneous() {
|
||||||
return vec![];
|
return vec![];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,7 +236,7 @@ impl<'a> RedRef<'a> {
|
|||||||
}
|
}
|
||||||
_ => self
|
_ => self
|
||||||
.children()
|
.children()
|
||||||
.filter(|red| red.green.erroneous())
|
.filter(|red| red.erroneous())
|
||||||
.flat_map(|red| red.errors())
|
.flat_map(|red| red.errors())
|
||||||
.collect(),
|
.collect(),
|
||||||
}
|
}
|
||||||
@ -256,11 +257,11 @@ impl<'a> RedRef<'a> {
|
|||||||
Green::Token(_) => &[],
|
Green::Token(_) => &[],
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut offset = self.offset;
|
let mut cursor = self.offset;
|
||||||
children.iter().map(move |green| {
|
children.iter().map(move |green| {
|
||||||
let child_offset = offset;
|
let offset = cursor;
|
||||||
offset += green.len();
|
cursor += green.len();
|
||||||
RedRef { id: self.id, offset: child_offset, green }
|
RedRef { id: self.id, offset, green }
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -623,29 +624,17 @@ pub enum ErrorPosition {
|
|||||||
impl NodeKind {
|
impl NodeKind {
|
||||||
/// Whether this is some kind of parenthesis.
|
/// Whether this is some kind of parenthesis.
|
||||||
pub fn is_paren(&self) -> bool {
|
pub fn is_paren(&self) -> bool {
|
||||||
match self {
|
matches!(self, Self::LeftParen | Self::RightParen)
|
||||||
Self::LeftParen => true,
|
|
||||||
Self::RightParen => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether this is some kind of bracket.
|
/// Whether this is some kind of bracket.
|
||||||
pub fn is_bracket(&self) -> bool {
|
pub fn is_bracket(&self) -> bool {
|
||||||
match self {
|
matches!(self, Self::LeftBracket | Self::RightBracket)
|
||||||
Self::LeftBracket => true,
|
|
||||||
Self::RightBracket => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether this is some kind of brace.
|
/// Whether this is some kind of brace.
|
||||||
pub fn is_brace(&self) -> bool {
|
pub fn is_brace(&self) -> bool {
|
||||||
match self {
|
matches!(self, Self::LeftBrace | Self::RightBrace)
|
||||||
Self::LeftBrace => true,
|
|
||||||
Self::RightBrace => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether this is some kind of error.
|
/// Whether this is some kind of error.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user