diff --git a/src/parse/parser.rs b/src/parse/parser.rs index d0735931a..9d5eb0a9d 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -2,12 +2,13 @@ use std::fmt::{self, Debug, Formatter}; use super::{Scanner, TokenMode, Tokens}; use crate::diagnostic::Diagnostic; -use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token}; +use crate::syntax::{Decoration, Pos, Span, Spanned, Token}; use crate::Feedback; /// A convenient token-based parser. pub struct Parser<'s> { tokens: Tokens<'s>, + peeked: Option>>, modes: Vec, groups: Vec<(Pos, Group)>, f: Feedback, @@ -18,6 +19,7 @@ impl<'s> Parser<'s> { pub fn new(src: &'s str) -> Self { Self { tokens: Tokens::new(src, TokenMode::Body), + peeked: None, modes: vec![], groups: vec![], f: Feedback::new(), @@ -34,7 +36,8 @@ impl<'s> Parser<'s> { self.f.diagnostics.push(diag); } - /// Eat the next token and add a diagnostic that it was not expected thing. + /// Eat the next token and add a diagnostic that it was not the expected + /// `thing`. pub fn diag_expected(&mut self, thing: &str) { if let Some(found) = self.eat() { self.diag(error!( @@ -48,12 +51,12 @@ impl<'s> Parser<'s> { } } - /// Add a diagnostic that the thing was expected at the given position. + /// Add a diagnostic that the `thing` was expected at the given position. pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) { self.diag(error!(pos, "expected {}", thing)); } - /// Add a diagnostic that the given token was unexpected. + /// Add a diagnostic that the given `token` was unexpected. pub fn diag_unexpected(&mut self, token: Spanned) { self.diag(error!(token.span, "unexpected {}", token.v.name())); } @@ -101,6 +104,7 @@ impl<'s> Parser<'s> { /// # Panics /// This panics if no group was started. pub fn end_group(&mut self) -> Span { + // Check that we are indeed at the end of the group. debug_assert_eq!(self.peek(), None, "unfinished group"); let (start, group) = self.groups.pop().expect("unstarted group"); @@ -112,9 +116,11 @@ impl<'s> Parser<'s> { }; if let Some(token) = end { - let next = self.tokens.clone().next().map(|s| s.v); - if next == Some(token) { - self.tokens.next(); + // This `peek()` can't be used directly because it hides the end of + // group token. To circumvent this, we drop down to `self.peeked`. + self.peek(); + if self.peeked.map(|s| s.v) == Some(token) { + self.peeked = None; } else { self.diag(error!(self.pos(), "expected {}", token.name())); } @@ -123,37 +129,33 @@ impl<'s> Parser<'s> { Span::new(start, self.pos()) } + /// Skip whitespace tokens. + pub fn skip_white(&mut self) { + self.eat_while(|t| { + matches!(t, Token::Space(_) | Token::LineComment(_) | Token::BlockComment(_)) + }); + } + /// Consume the next token. pub fn eat(&mut self) -> Option>> { - next_group_aware(&mut self.tokens, &self.groups) + self.peek()?; + self.peeked.take() } /// Consume the next token if it is the given one. pub fn eat_if(&mut self, t: Token) -> Option>> { - // Don't call eat() twice if it suceeds. - // - // TODO: Benchmark this vs. the naive version. - let before = self.pos(); - let token = self.eat()?; - if token.v == t { - Some(token) - } else { - self.jump(before); - None - } + if self.peek()? == t { self.peeked.take() } else { None } } - /// Consume the next token if the closure maps to `Some`. + /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map( &mut self, mut f: impl FnMut(Token<'s>) -> Option, ) -> Option> { - let before = self.pos(); - let token = self.eat()?; - if let Some(t) = f(token.v) { - Some(t.span_with(token.span)) + let token = self.peek()?; + if let Some(t) = f(token) { + self.peeked.take().map(|spanned| spanned.map(|_| t)) } else { - self.jump(before); None } } @@ -176,100 +178,77 @@ impl<'s> Parser<'s> { /// Returns how many tokens were eaten. pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize { let mut count = 0; - let mut before = self.pos(); - while let Some(t) = self.eat() { - if f(t.v) { - // Undo the last eat by jumping. This prevents - // double-tokenization by not peeking all the time. - // - // TODO: Benchmark this vs. the naive peeking version. - self.jump(before); + while let Some(t) = self.peek() { + if f(t) { break; } - before = self.pos(); + self.peeked = None; count += 1; } count } /// Peek at the next token without consuming it. - pub fn peek(&self) -> Option> { - next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v) + pub fn peek(&mut self) -> Option> { + let token = match self.peeked { + Some(token) => token.v, + None => { + let token = self.tokens.next()?; + self.peeked = Some(token); + token.v + } + }; + + let group = match token { + Token::RightParen => Group::Paren, + Token::RightBracket => Group::Bracket, + Token::RightBrace => Group::Brace, + Token::Chain => Group::Subheader, + _ => return Some(token), + }; + + if self.groups.iter().rev().any(|&(_, g)| g == group) { + None + } else { + Some(token) + } } /// Checks whether the next token fulfills a condition. /// /// Returns `false` if there is no next token. - pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool { + pub fn check(&mut self, f: impl FnMut(Token<'s>) -> bool) -> bool { self.peek().map(f).unwrap_or(false) } - /// Whether the there is no next token. - pub fn eof(&self) -> bool { + /// Whether the end of the source string or group is reached. + pub fn eof(&mut self) -> bool { self.peek().is_none() } - /// Skip whitespace tokens. - pub fn skip_white(&mut self) { - self.eat_while(|t| { - matches!(t, - Token::Space(_) | - Token::LineComment(_) | - Token::BlockComment(_)) - }); - } - /// The position in the string at which the last token ends and next token /// will start. pub fn pos(&self) -> Pos { - self.tokens.pos() + self.peeked.map(|s| s.span.start).unwrap_or_else(|| self.tokens.pos()) } /// Jump to a position in the source string. pub fn jump(&mut self, pos: Pos) { self.tokens.jump(pos); + self.peeked = None; } - /// The full source string. - pub fn src(&self) -> &'s str { - self.scanner().src() - } - - /// The part of the source string that is spanned by the given span. + /// Returns the part of the source string that is spanned by the given span. pub fn get(&self, span: Span) -> &'s str { self.scanner().get(span.start.to_usize() .. span.end.to_usize()) } /// The underlying scanner. - pub fn scanner(&self) -> &Scanner<'s> { + pub fn scanner(&self) -> Scanner<'s> { self.tokens.scanner() } } -/// Wraps `tokens.next()`, but is group-aware. -fn next_group_aware<'s>( - tokens: &mut Tokens<'s>, - groups: &[(Pos, Group)], -) -> Option>> { - let pos = tokens.pos(); - let token = tokens.next(); - - let group = match token?.v { - Token::RightParen => Group::Paren, - Token::RightBracket => Group::Bracket, - Token::RightBrace => Group::Brace, - Token::Chain => Group::Subheader, - _ => return token, - }; - - if groups.iter().rev().any(|&(_, g)| g == group) { - tokens.jump(pos); - None - } else { - token - } -} - impl Debug for Parser<'_> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let s = self.scanner(); diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 6036a74e7..1b289b1e0 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -14,7 +14,7 @@ pub fn resolve_string(string: &str) -> String { continue; } - let start = s.prev_index(); + let start = s.last_index(); match s.eat() { Some('\\') => out.push('\\'), Some('"') => out.push('"'), diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index 6ff8c801e..38c8736f5 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -81,9 +81,9 @@ impl<'s> Scanner<'s> { &self.src[start .. self.index] } - /// Uneat the last eaten character. + /// Uneat the last eaten char. pub fn uneat(&mut self) { - self.index = self.prev_index(); + self.index = self.last_index(); self.reset(); } @@ -97,9 +97,9 @@ impl<'s> Scanner<'s> { self.iter.clone().nth(n) } - /// Checks whether the next character fulfills a condition. + /// Checks whether the next char fulfills a condition. /// - /// Returns `false` if there is no next character. + /// Returns `false` if there is no next char. pub fn check(&self, f: impl FnMut(char) -> bool) -> bool { self.peek().map(f).unwrap_or(false) } @@ -109,13 +109,8 @@ impl<'s> Scanner<'s> { self.iter.as_str().is_empty() } - /// The current index in the source string. - pub fn index(&self) -> usize { - self.index - } - /// The previous index in the source string. - pub fn prev_index(&self) -> usize { + pub fn last_index(&self) -> usize { self.src[.. self.index] .chars() .next_back() @@ -123,6 +118,11 @@ impl<'s> Scanner<'s> { .unwrap_or(0) } + /// The current index in the source string. + pub fn index(&self) -> usize { + self.index + } + /// Jump to an index in the source string. pub fn jump(&mut self, index: usize) { self.index = index; diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 72d7b2d96..354aae7df 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -53,8 +53,8 @@ impl<'s> Tokens<'s> { } /// The underlying scanner. - pub fn scanner(&self) -> &Scanner<'s> { - &self.s + pub fn scanner(&self) -> Scanner<'s> { + self.s.clone() } }