Optimize parser by remembering peeked token

This commit is contained in:
Laurenz 2020-10-02 16:52:01 +02:00
parent 3533268b1f
commit 343982c56f
4 changed files with 73 additions and 94 deletions

View File

@ -2,12 +2,13 @@ use std::fmt::{self, Debug, Formatter};
use super::{Scanner, TokenMode, Tokens}; use super::{Scanner, TokenMode, Tokens};
use crate::diagnostic::Diagnostic; use crate::diagnostic::Diagnostic;
use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token}; use crate::syntax::{Decoration, Pos, Span, Spanned, Token};
use crate::Feedback; use crate::Feedback;
/// A convenient token-based parser. /// A convenient token-based parser.
pub struct Parser<'s> { pub struct Parser<'s> {
tokens: Tokens<'s>, tokens: Tokens<'s>,
peeked: Option<Spanned<Token<'s>>>,
modes: Vec<TokenMode>, modes: Vec<TokenMode>,
groups: Vec<(Pos, Group)>, groups: Vec<(Pos, Group)>,
f: Feedback, f: Feedback,
@ -18,6 +19,7 @@ impl<'s> Parser<'s> {
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
Self { Self {
tokens: Tokens::new(src, TokenMode::Body), tokens: Tokens::new(src, TokenMode::Body),
peeked: None,
modes: vec![], modes: vec![],
groups: vec![], groups: vec![],
f: Feedback::new(), f: Feedback::new(),
@ -34,7 +36,8 @@ impl<'s> Parser<'s> {
self.f.diagnostics.push(diag); self.f.diagnostics.push(diag);
} }
/// Eat the next token and add a diagnostic that it was not expected thing. /// Eat the next token and add a diagnostic that it was not the expected
/// `thing`.
pub fn diag_expected(&mut self, thing: &str) { pub fn diag_expected(&mut self, thing: &str) {
if let Some(found) = self.eat() { if let Some(found) = self.eat() {
self.diag(error!( self.diag(error!(
@ -48,12 +51,12 @@ impl<'s> Parser<'s> {
} }
} }
/// Add a diagnostic that the thing was expected at the given position. /// Add a diagnostic that the `thing` was expected at the given position.
pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) { pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) {
self.diag(error!(pos, "expected {}", thing)); self.diag(error!(pos, "expected {}", thing));
} }
/// Add a diagnostic that the given token was unexpected. /// Add a diagnostic that the given `token` was unexpected.
pub fn diag_unexpected(&mut self, token: Spanned<Token>) { pub fn diag_unexpected(&mut self, token: Spanned<Token>) {
self.diag(error!(token.span, "unexpected {}", token.v.name())); self.diag(error!(token.span, "unexpected {}", token.v.name()));
} }
@ -101,6 +104,7 @@ impl<'s> Parser<'s> {
/// # Panics /// # Panics
/// This panics if no group was started. /// This panics if no group was started.
pub fn end_group(&mut self) -> Span { pub fn end_group(&mut self) -> Span {
// Check that we are indeed at the end of the group.
debug_assert_eq!(self.peek(), None, "unfinished group"); debug_assert_eq!(self.peek(), None, "unfinished group");
let (start, group) = self.groups.pop().expect("unstarted group"); let (start, group) = self.groups.pop().expect("unstarted group");
@ -112,9 +116,11 @@ impl<'s> Parser<'s> {
}; };
if let Some(token) = end { if let Some(token) = end {
let next = self.tokens.clone().next().map(|s| s.v); // This `peek()` can't be used directly because it hides the end of
if next == Some(token) { // group token. To circumvent this, we drop down to `self.peeked`.
self.tokens.next(); self.peek();
if self.peeked.map(|s| s.v) == Some(token) {
self.peeked = None;
} else { } else {
self.diag(error!(self.pos(), "expected {}", token.name())); self.diag(error!(self.pos(), "expected {}", token.name()));
} }
@ -123,37 +129,33 @@ impl<'s> Parser<'s> {
Span::new(start, self.pos()) Span::new(start, self.pos())
} }
/// Skip whitespace tokens.
pub fn skip_white(&mut self) {
self.eat_while(|t| {
matches!(t, Token::Space(_) | Token::LineComment(_) | Token::BlockComment(_))
});
}
/// Consume the next token. /// Consume the next token.
pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> { pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
next_group_aware(&mut self.tokens, &self.groups) self.peek()?;
self.peeked.take()
} }
/// Consume the next token if it is the given one. /// Consume the next token if it is the given one.
pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> { pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> {
// Don't call eat() twice if it suceeds. if self.peek()? == t { self.peeked.take() } else { None }
//
// TODO: Benchmark this vs. the naive version.
let before = self.pos();
let token = self.eat()?;
if token.v == t {
Some(token)
} else {
self.jump(before);
None
}
} }
/// Consume the next token if the closure maps to `Some`. /// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T>( pub fn eat_map<T>(
&mut self, &mut self,
mut f: impl FnMut(Token<'s>) -> Option<T>, mut f: impl FnMut(Token<'s>) -> Option<T>,
) -> Option<Spanned<T>> { ) -> Option<Spanned<T>> {
let before = self.pos(); let token = self.peek()?;
let token = self.eat()?; if let Some(t) = f(token) {
if let Some(t) = f(token.v) { self.peeked.take().map(|spanned| spanned.map(|_| t))
Some(t.span_with(token.span))
} else { } else {
self.jump(before);
None None
} }
} }
@ -176,100 +178,77 @@ impl<'s> Parser<'s> {
/// Returns how many tokens were eaten. /// Returns how many tokens were eaten.
pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize { pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
let mut count = 0; let mut count = 0;
let mut before = self.pos(); while let Some(t) = self.peek() {
while let Some(t) = self.eat() { if f(t) {
if f(t.v) {
// Undo the last eat by jumping. This prevents
// double-tokenization by not peeking all the time.
//
// TODO: Benchmark this vs. the naive peeking version.
self.jump(before);
break; break;
} }
before = self.pos(); self.peeked = None;
count += 1; count += 1;
} }
count count
} }
/// Peek at the next token without consuming it. /// Peek at the next token without consuming it.
pub fn peek(&self) -> Option<Token<'s>> { pub fn peek(&mut self) -> Option<Token<'s>> {
next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v) let token = match self.peeked {
Some(token) => token.v,
None => {
let token = self.tokens.next()?;
self.peeked = Some(token);
token.v
}
};
let group = match token {
Token::RightParen => Group::Paren,
Token::RightBracket => Group::Bracket,
Token::RightBrace => Group::Brace,
Token::Chain => Group::Subheader,
_ => return Some(token),
};
if self.groups.iter().rev().any(|&(_, g)| g == group) {
None
} else {
Some(token)
}
} }
/// Checks whether the next token fulfills a condition. /// Checks whether the next token fulfills a condition.
/// ///
/// Returns `false` if there is no next token. /// Returns `false` if there is no next token.
pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool { pub fn check(&mut self, f: impl FnMut(Token<'s>) -> bool) -> bool {
self.peek().map(f).unwrap_or(false) self.peek().map(f).unwrap_or(false)
} }
/// Whether the there is no next token. /// Whether the end of the source string or group is reached.
pub fn eof(&self) -> bool { pub fn eof(&mut self) -> bool {
self.peek().is_none() self.peek().is_none()
} }
/// Skip whitespace tokens.
pub fn skip_white(&mut self) {
self.eat_while(|t| {
matches!(t,
Token::Space(_) |
Token::LineComment(_) |
Token::BlockComment(_))
});
}
/// The position in the string at which the last token ends and next token /// The position in the string at which the last token ends and next token
/// will start. /// will start.
pub fn pos(&self) -> Pos { pub fn pos(&self) -> Pos {
self.tokens.pos() self.peeked.map(|s| s.span.start).unwrap_or_else(|| self.tokens.pos())
} }
/// Jump to a position in the source string. /// Jump to a position in the source string.
pub fn jump(&mut self, pos: Pos) { pub fn jump(&mut self, pos: Pos) {
self.tokens.jump(pos); self.tokens.jump(pos);
self.peeked = None;
} }
/// The full source string. /// Returns the part of the source string that is spanned by the given span.
pub fn src(&self) -> &'s str {
self.scanner().src()
}
/// The part of the source string that is spanned by the given span.
pub fn get(&self, span: Span) -> &'s str { pub fn get(&self, span: Span) -> &'s str {
self.scanner().get(span.start.to_usize() .. span.end.to_usize()) self.scanner().get(span.start.to_usize() .. span.end.to_usize())
} }
/// The underlying scanner. /// The underlying scanner.
pub fn scanner(&self) -> &Scanner<'s> { pub fn scanner(&self) -> Scanner<'s> {
self.tokens.scanner() self.tokens.scanner()
} }
} }
/// Wraps `tokens.next()`, but is group-aware.
fn next_group_aware<'s>(
tokens: &mut Tokens<'s>,
groups: &[(Pos, Group)],
) -> Option<Spanned<Token<'s>>> {
let pos = tokens.pos();
let token = tokens.next();
let group = match token?.v {
Token::RightParen => Group::Paren,
Token::RightBracket => Group::Bracket,
Token::RightBrace => Group::Brace,
Token::Chain => Group::Subheader,
_ => return token,
};
if groups.iter().rev().any(|&(_, g)| g == group) {
tokens.jump(pos);
None
} else {
token
}
}
impl Debug for Parser<'_> { impl Debug for Parser<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let s = self.scanner(); let s = self.scanner();

View File

@ -14,7 +14,7 @@ pub fn resolve_string(string: &str) -> String {
continue; continue;
} }
let start = s.prev_index(); let start = s.last_index();
match s.eat() { match s.eat() {
Some('\\') => out.push('\\'), Some('\\') => out.push('\\'),
Some('"') => out.push('"'), Some('"') => out.push('"'),

View File

@ -81,9 +81,9 @@ impl<'s> Scanner<'s> {
&self.src[start .. self.index] &self.src[start .. self.index]
} }
/// Uneat the last eaten character. /// Uneat the last eaten char.
pub fn uneat(&mut self) { pub fn uneat(&mut self) {
self.index = self.prev_index(); self.index = self.last_index();
self.reset(); self.reset();
} }
@ -97,9 +97,9 @@ impl<'s> Scanner<'s> {
self.iter.clone().nth(n) self.iter.clone().nth(n)
} }
/// Checks whether the next character fulfills a condition. /// Checks whether the next char fulfills a condition.
/// ///
/// Returns `false` if there is no next character. /// Returns `false` if there is no next char.
pub fn check(&self, f: impl FnMut(char) -> bool) -> bool { pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
self.peek().map(f).unwrap_or(false) self.peek().map(f).unwrap_or(false)
} }
@ -109,13 +109,8 @@ impl<'s> Scanner<'s> {
self.iter.as_str().is_empty() self.iter.as_str().is_empty()
} }
/// The current index in the source string.
pub fn index(&self) -> usize {
self.index
}
/// The previous index in the source string. /// The previous index in the source string.
pub fn prev_index(&self) -> usize { pub fn last_index(&self) -> usize {
self.src[.. self.index] self.src[.. self.index]
.chars() .chars()
.next_back() .next_back()
@ -123,6 +118,11 @@ impl<'s> Scanner<'s> {
.unwrap_or(0) .unwrap_or(0)
} }
/// The current index in the source string.
pub fn index(&self) -> usize {
self.index
}
/// Jump to an index in the source string. /// Jump to an index in the source string.
pub fn jump(&mut self, index: usize) { pub fn jump(&mut self, index: usize) {
self.index = index; self.index = index;

View File

@ -53,8 +53,8 @@ impl<'s> Tokens<'s> {
} }
/// The underlying scanner. /// The underlying scanner.
pub fn scanner(&self) -> &Scanner<'s> { pub fn scanner(&self) -> Scanner<'s> {
&self.s self.s.clone()
} }
} }