Optimize parser by remembering peeked token

This commit is contained in:
Laurenz 2020-10-02 16:52:01 +02:00
parent 3533268b1f
commit 343982c56f
4 changed files with 73 additions and 94 deletions

View File

@ -2,12 +2,13 @@ use std::fmt::{self, Debug, Formatter};
use super::{Scanner, TokenMode, Tokens};
use crate::diagnostic::Diagnostic;
use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token};
use crate::syntax::{Decoration, Pos, Span, Spanned, Token};
use crate::Feedback;
/// A convenient token-based parser.
pub struct Parser<'s> {
tokens: Tokens<'s>,
peeked: Option<Spanned<Token<'s>>>,
modes: Vec<TokenMode>,
groups: Vec<(Pos, Group)>,
f: Feedback,
@ -18,6 +19,7 @@ impl<'s> Parser<'s> {
pub fn new(src: &'s str) -> Self {
Self {
tokens: Tokens::new(src, TokenMode::Body),
peeked: None,
modes: vec![],
groups: vec![],
f: Feedback::new(),
@ -34,7 +36,8 @@ impl<'s> Parser<'s> {
self.f.diagnostics.push(diag);
}
/// Eat the next token and add a diagnostic that it was not expected thing.
/// Eat the next token and add a diagnostic that it was not the expected
/// `thing`.
pub fn diag_expected(&mut self, thing: &str) {
if let Some(found) = self.eat() {
self.diag(error!(
@ -48,12 +51,12 @@ impl<'s> Parser<'s> {
}
}
/// Add a diagnostic that the thing was expected at the given position.
/// Add a diagnostic that the `thing` was expected at the given position.
pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) {
self.diag(error!(pos, "expected {}", thing));
}
/// Add a diagnostic that the given token was unexpected.
/// Add a diagnostic that the given `token` was unexpected.
pub fn diag_unexpected(&mut self, token: Spanned<Token>) {
self.diag(error!(token.span, "unexpected {}", token.v.name()));
}
@ -101,6 +104,7 @@ impl<'s> Parser<'s> {
/// # Panics
/// This panics if no group was started.
pub fn end_group(&mut self) -> Span {
// Check that we are indeed at the end of the group.
debug_assert_eq!(self.peek(), None, "unfinished group");
let (start, group) = self.groups.pop().expect("unstarted group");
@ -112,9 +116,11 @@ impl<'s> Parser<'s> {
};
if let Some(token) = end {
let next = self.tokens.clone().next().map(|s| s.v);
if next == Some(token) {
self.tokens.next();
// This `peek()` can't be used directly because it hides the end of
// group token. To circumvent this, we drop down to `self.peeked`.
self.peek();
if self.peeked.map(|s| s.v) == Some(token) {
self.peeked = None;
} else {
self.diag(error!(self.pos(), "expected {}", token.name()));
}
@ -123,37 +129,33 @@ impl<'s> Parser<'s> {
Span::new(start, self.pos())
}
/// Skip whitespace tokens.
pub fn skip_white(&mut self) {
self.eat_while(|t| {
matches!(t, Token::Space(_) | Token::LineComment(_) | Token::BlockComment(_))
});
}
/// Consume the next token.
pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
next_group_aware(&mut self.tokens, &self.groups)
self.peek()?;
self.peeked.take()
}
/// Consume the next token if it is the given one.
pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> {
// Don't call eat() twice if it suceeds.
//
// TODO: Benchmark this vs. the naive version.
let before = self.pos();
let token = self.eat()?;
if token.v == t {
Some(token)
} else {
self.jump(before);
None
}
if self.peek()? == t { self.peeked.take() } else { None }
}
/// Consume the next token if the closure maps to `Some`.
/// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T>(
&mut self,
mut f: impl FnMut(Token<'s>) -> Option<T>,
) -> Option<Spanned<T>> {
let before = self.pos();
let token = self.eat()?;
if let Some(t) = f(token.v) {
Some(t.span_with(token.span))
let token = self.peek()?;
if let Some(t) = f(token) {
self.peeked.take().map(|spanned| spanned.map(|_| t))
} else {
self.jump(before);
None
}
}
@ -176,100 +178,77 @@ impl<'s> Parser<'s> {
/// Returns how many tokens were eaten.
pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
let mut count = 0;
let mut before = self.pos();
while let Some(t) = self.eat() {
if f(t.v) {
// Undo the last eat by jumping. This prevents
// double-tokenization by not peeking all the time.
//
// TODO: Benchmark this vs. the naive peeking version.
self.jump(before);
while let Some(t) = self.peek() {
if f(t) {
break;
}
before = self.pos();
self.peeked = None;
count += 1;
}
count
}
/// Peek at the next token without consuming it.
pub fn peek(&self) -> Option<Token<'s>> {
next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v)
pub fn peek(&mut self) -> Option<Token<'s>> {
let token = match self.peeked {
Some(token) => token.v,
None => {
let token = self.tokens.next()?;
self.peeked = Some(token);
token.v
}
};
let group = match token {
Token::RightParen => Group::Paren,
Token::RightBracket => Group::Bracket,
Token::RightBrace => Group::Brace,
Token::Chain => Group::Subheader,
_ => return Some(token),
};
if self.groups.iter().rev().any(|&(_, g)| g == group) {
None
} else {
Some(token)
}
}
/// Checks whether the next token fulfills a condition.
///
/// Returns `false` if there is no next token.
pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool {
pub fn check(&mut self, f: impl FnMut(Token<'s>) -> bool) -> bool {
self.peek().map(f).unwrap_or(false)
}
/// Whether the there is no next token.
pub fn eof(&self) -> bool {
/// Whether the end of the source string or group is reached.
pub fn eof(&mut self) -> bool {
self.peek().is_none()
}
/// Skip whitespace tokens.
pub fn skip_white(&mut self) {
self.eat_while(|t| {
matches!(t,
Token::Space(_) |
Token::LineComment(_) |
Token::BlockComment(_))
});
}
/// The position in the string at which the last token ends and next token
/// will start.
pub fn pos(&self) -> Pos {
self.tokens.pos()
self.peeked.map(|s| s.span.start).unwrap_or_else(|| self.tokens.pos())
}
/// Jump to a position in the source string.
pub fn jump(&mut self, pos: Pos) {
self.tokens.jump(pos);
self.peeked = None;
}
/// The full source string.
pub fn src(&self) -> &'s str {
self.scanner().src()
}
/// The part of the source string that is spanned by the given span.
/// Returns the part of the source string that is spanned by the given span.
pub fn get(&self, span: Span) -> &'s str {
self.scanner().get(span.start.to_usize() .. span.end.to_usize())
}
/// The underlying scanner.
pub fn scanner(&self) -> &Scanner<'s> {
pub fn scanner(&self) -> Scanner<'s> {
self.tokens.scanner()
}
}
/// Wraps `tokens.next()`, but is group-aware.
fn next_group_aware<'s>(
tokens: &mut Tokens<'s>,
groups: &[(Pos, Group)],
) -> Option<Spanned<Token<'s>>> {
let pos = tokens.pos();
let token = tokens.next();
let group = match token?.v {
Token::RightParen => Group::Paren,
Token::RightBracket => Group::Bracket,
Token::RightBrace => Group::Brace,
Token::Chain => Group::Subheader,
_ => return token,
};
if groups.iter().rev().any(|&(_, g)| g == group) {
tokens.jump(pos);
None
} else {
token
}
}
impl Debug for Parser<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let s = self.scanner();

View File

@ -14,7 +14,7 @@ pub fn resolve_string(string: &str) -> String {
continue;
}
let start = s.prev_index();
let start = s.last_index();
match s.eat() {
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),

View File

@ -81,9 +81,9 @@ impl<'s> Scanner<'s> {
&self.src[start .. self.index]
}
/// Uneat the last eaten character.
/// Uneat the last eaten char.
pub fn uneat(&mut self) {
self.index = self.prev_index();
self.index = self.last_index();
self.reset();
}
@ -97,9 +97,9 @@ impl<'s> Scanner<'s> {
self.iter.clone().nth(n)
}
/// Checks whether the next character fulfills a condition.
/// Checks whether the next char fulfills a condition.
///
/// Returns `false` if there is no next character.
/// Returns `false` if there is no next char.
pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
self.peek().map(f).unwrap_or(false)
}
@ -109,13 +109,8 @@ impl<'s> Scanner<'s> {
self.iter.as_str().is_empty()
}
/// The current index in the source string.
pub fn index(&self) -> usize {
self.index
}
/// The previous index in the source string.
pub fn prev_index(&self) -> usize {
pub fn last_index(&self) -> usize {
self.src[.. self.index]
.chars()
.next_back()
@ -123,6 +118,11 @@ impl<'s> Scanner<'s> {
.unwrap_or(0)
}
/// The current index in the source string.
pub fn index(&self) -> usize {
self.index
}
/// Jump to an index in the source string.
pub fn jump(&mut self, index: usize) {
self.index = index;

View File

@ -53,8 +53,8 @@ impl<'s> Tokens<'s> {
}
/// The underlying scanner.
pub fn scanner(&self) -> &Scanner<'s> {
&self.s
pub fn scanner(&self) -> Scanner<'s> {
self.s.clone()
}
}