mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Optimize parser by remembering peeked token ⚡
This commit is contained in:
parent
3533268b1f
commit
343982c56f
@ -2,12 +2,13 @@ use std::fmt::{self, Debug, Formatter};
|
||||
|
||||
use super::{Scanner, TokenMode, Tokens};
|
||||
use crate::diagnostic::Diagnostic;
|
||||
use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token};
|
||||
use crate::syntax::{Decoration, Pos, Span, Spanned, Token};
|
||||
use crate::Feedback;
|
||||
|
||||
/// A convenient token-based parser.
|
||||
pub struct Parser<'s> {
|
||||
tokens: Tokens<'s>,
|
||||
peeked: Option<Spanned<Token<'s>>>,
|
||||
modes: Vec<TokenMode>,
|
||||
groups: Vec<(Pos, Group)>,
|
||||
f: Feedback,
|
||||
@ -18,6 +19,7 @@ impl<'s> Parser<'s> {
|
||||
pub fn new(src: &'s str) -> Self {
|
||||
Self {
|
||||
tokens: Tokens::new(src, TokenMode::Body),
|
||||
peeked: None,
|
||||
modes: vec![],
|
||||
groups: vec![],
|
||||
f: Feedback::new(),
|
||||
@ -34,7 +36,8 @@ impl<'s> Parser<'s> {
|
||||
self.f.diagnostics.push(diag);
|
||||
}
|
||||
|
||||
/// Eat the next token and add a diagnostic that it was not expected thing.
|
||||
/// Eat the next token and add a diagnostic that it was not the expected
|
||||
/// `thing`.
|
||||
pub fn diag_expected(&mut self, thing: &str) {
|
||||
if let Some(found) = self.eat() {
|
||||
self.diag(error!(
|
||||
@ -48,12 +51,12 @@ impl<'s> Parser<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a diagnostic that the thing was expected at the given position.
|
||||
/// Add a diagnostic that the `thing` was expected at the given position.
|
||||
pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) {
|
||||
self.diag(error!(pos, "expected {}", thing));
|
||||
}
|
||||
|
||||
/// Add a diagnostic that the given token was unexpected.
|
||||
/// Add a diagnostic that the given `token` was unexpected.
|
||||
pub fn diag_unexpected(&mut self, token: Spanned<Token>) {
|
||||
self.diag(error!(token.span, "unexpected {}", token.v.name()));
|
||||
}
|
||||
@ -101,6 +104,7 @@ impl<'s> Parser<'s> {
|
||||
/// # Panics
|
||||
/// This panics if no group was started.
|
||||
pub fn end_group(&mut self) -> Span {
|
||||
// Check that we are indeed at the end of the group.
|
||||
debug_assert_eq!(self.peek(), None, "unfinished group");
|
||||
|
||||
let (start, group) = self.groups.pop().expect("unstarted group");
|
||||
@ -112,9 +116,11 @@ impl<'s> Parser<'s> {
|
||||
};
|
||||
|
||||
if let Some(token) = end {
|
||||
let next = self.tokens.clone().next().map(|s| s.v);
|
||||
if next == Some(token) {
|
||||
self.tokens.next();
|
||||
// This `peek()` can't be used directly because it hides the end of
|
||||
// group token. To circumvent this, we drop down to `self.peeked`.
|
||||
self.peek();
|
||||
if self.peeked.map(|s| s.v) == Some(token) {
|
||||
self.peeked = None;
|
||||
} else {
|
||||
self.diag(error!(self.pos(), "expected {}", token.name()));
|
||||
}
|
||||
@ -123,37 +129,33 @@ impl<'s> Parser<'s> {
|
||||
Span::new(start, self.pos())
|
||||
}
|
||||
|
||||
/// Skip whitespace tokens.
|
||||
pub fn skip_white(&mut self) {
|
||||
self.eat_while(|t| {
|
||||
matches!(t, Token::Space(_) | Token::LineComment(_) | Token::BlockComment(_))
|
||||
});
|
||||
}
|
||||
|
||||
/// Consume the next token.
|
||||
pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||
next_group_aware(&mut self.tokens, &self.groups)
|
||||
self.peek()?;
|
||||
self.peeked.take()
|
||||
}
|
||||
|
||||
/// Consume the next token if it is the given one.
|
||||
pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> {
|
||||
// Don't call eat() twice if it suceeds.
|
||||
//
|
||||
// TODO: Benchmark this vs. the naive version.
|
||||
let before = self.pos();
|
||||
let token = self.eat()?;
|
||||
if token.v == t {
|
||||
Some(token)
|
||||
} else {
|
||||
self.jump(before);
|
||||
None
|
||||
}
|
||||
if self.peek()? == t { self.peeked.take() } else { None }
|
||||
}
|
||||
|
||||
/// Consume the next token if the closure maps to `Some`.
|
||||
/// Consume the next token if the closure maps it a to `Some`-variant.
|
||||
pub fn eat_map<T>(
|
||||
&mut self,
|
||||
mut f: impl FnMut(Token<'s>) -> Option<T>,
|
||||
) -> Option<Spanned<T>> {
|
||||
let before = self.pos();
|
||||
let token = self.eat()?;
|
||||
if let Some(t) = f(token.v) {
|
||||
Some(t.span_with(token.span))
|
||||
let token = self.peek()?;
|
||||
if let Some(t) = f(token) {
|
||||
self.peeked.take().map(|spanned| spanned.map(|_| t))
|
||||
} else {
|
||||
self.jump(before);
|
||||
None
|
||||
}
|
||||
}
|
||||
@ -176,100 +178,77 @@ impl<'s> Parser<'s> {
|
||||
/// Returns how many tokens were eaten.
|
||||
pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
|
||||
let mut count = 0;
|
||||
let mut before = self.pos();
|
||||
while let Some(t) = self.eat() {
|
||||
if f(t.v) {
|
||||
// Undo the last eat by jumping. This prevents
|
||||
// double-tokenization by not peeking all the time.
|
||||
//
|
||||
// TODO: Benchmark this vs. the naive peeking version.
|
||||
self.jump(before);
|
||||
while let Some(t) = self.peek() {
|
||||
if f(t) {
|
||||
break;
|
||||
}
|
||||
before = self.pos();
|
||||
self.peeked = None;
|
||||
count += 1;
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Peek at the next token without consuming it.
|
||||
pub fn peek(&self) -> Option<Token<'s>> {
|
||||
next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v)
|
||||
pub fn peek(&mut self) -> Option<Token<'s>> {
|
||||
let token = match self.peeked {
|
||||
Some(token) => token.v,
|
||||
None => {
|
||||
let token = self.tokens.next()?;
|
||||
self.peeked = Some(token);
|
||||
token.v
|
||||
}
|
||||
};
|
||||
|
||||
let group = match token {
|
||||
Token::RightParen => Group::Paren,
|
||||
Token::RightBracket => Group::Bracket,
|
||||
Token::RightBrace => Group::Brace,
|
||||
Token::Chain => Group::Subheader,
|
||||
_ => return Some(token),
|
||||
};
|
||||
|
||||
if self.groups.iter().rev().any(|&(_, g)| g == group) {
|
||||
None
|
||||
} else {
|
||||
Some(token)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether the next token fulfills a condition.
|
||||
///
|
||||
/// Returns `false` if there is no next token.
|
||||
pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool {
|
||||
pub fn check(&mut self, f: impl FnMut(Token<'s>) -> bool) -> bool {
|
||||
self.peek().map(f).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Whether the there is no next token.
|
||||
pub fn eof(&self) -> bool {
|
||||
/// Whether the end of the source string or group is reached.
|
||||
pub fn eof(&mut self) -> bool {
|
||||
self.peek().is_none()
|
||||
}
|
||||
|
||||
/// Skip whitespace tokens.
|
||||
pub fn skip_white(&mut self) {
|
||||
self.eat_while(|t| {
|
||||
matches!(t,
|
||||
Token::Space(_) |
|
||||
Token::LineComment(_) |
|
||||
Token::BlockComment(_))
|
||||
});
|
||||
}
|
||||
|
||||
/// The position in the string at which the last token ends and next token
|
||||
/// will start.
|
||||
pub fn pos(&self) -> Pos {
|
||||
self.tokens.pos()
|
||||
self.peeked.map(|s| s.span.start).unwrap_or_else(|| self.tokens.pos())
|
||||
}
|
||||
|
||||
/// Jump to a position in the source string.
|
||||
pub fn jump(&mut self, pos: Pos) {
|
||||
self.tokens.jump(pos);
|
||||
self.peeked = None;
|
||||
}
|
||||
|
||||
/// The full source string.
|
||||
pub fn src(&self) -> &'s str {
|
||||
self.scanner().src()
|
||||
}
|
||||
|
||||
/// The part of the source string that is spanned by the given span.
|
||||
/// Returns the part of the source string that is spanned by the given span.
|
||||
pub fn get(&self, span: Span) -> &'s str {
|
||||
self.scanner().get(span.start.to_usize() .. span.end.to_usize())
|
||||
}
|
||||
|
||||
/// The underlying scanner.
|
||||
pub fn scanner(&self) -> &Scanner<'s> {
|
||||
pub fn scanner(&self) -> Scanner<'s> {
|
||||
self.tokens.scanner()
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps `tokens.next()`, but is group-aware.
|
||||
fn next_group_aware<'s>(
|
||||
tokens: &mut Tokens<'s>,
|
||||
groups: &[(Pos, Group)],
|
||||
) -> Option<Spanned<Token<'s>>> {
|
||||
let pos = tokens.pos();
|
||||
let token = tokens.next();
|
||||
|
||||
let group = match token?.v {
|
||||
Token::RightParen => Group::Paren,
|
||||
Token::RightBracket => Group::Bracket,
|
||||
Token::RightBrace => Group::Brace,
|
||||
Token::Chain => Group::Subheader,
|
||||
_ => return token,
|
||||
};
|
||||
|
||||
if groups.iter().rev().any(|&(_, g)| g == group) {
|
||||
tokens.jump(pos);
|
||||
None
|
||||
} else {
|
||||
token
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Parser<'_> {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
let s = self.scanner();
|
||||
|
@ -14,7 +14,7 @@ pub fn resolve_string(string: &str) -> String {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = s.prev_index();
|
||||
let start = s.last_index();
|
||||
match s.eat() {
|
||||
Some('\\') => out.push('\\'),
|
||||
Some('"') => out.push('"'),
|
||||
|
@ -81,9 +81,9 @@ impl<'s> Scanner<'s> {
|
||||
&self.src[start .. self.index]
|
||||
}
|
||||
|
||||
/// Uneat the last eaten character.
|
||||
/// Uneat the last eaten char.
|
||||
pub fn uneat(&mut self) {
|
||||
self.index = self.prev_index();
|
||||
self.index = self.last_index();
|
||||
self.reset();
|
||||
}
|
||||
|
||||
@ -97,9 +97,9 @@ impl<'s> Scanner<'s> {
|
||||
self.iter.clone().nth(n)
|
||||
}
|
||||
|
||||
/// Checks whether the next character fulfills a condition.
|
||||
/// Checks whether the next char fulfills a condition.
|
||||
///
|
||||
/// Returns `false` if there is no next character.
|
||||
/// Returns `false` if there is no next char.
|
||||
pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
|
||||
self.peek().map(f).unwrap_or(false)
|
||||
}
|
||||
@ -109,13 +109,8 @@ impl<'s> Scanner<'s> {
|
||||
self.iter.as_str().is_empty()
|
||||
}
|
||||
|
||||
/// The current index in the source string.
|
||||
pub fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// The previous index in the source string.
|
||||
pub fn prev_index(&self) -> usize {
|
||||
pub fn last_index(&self) -> usize {
|
||||
self.src[.. self.index]
|
||||
.chars()
|
||||
.next_back()
|
||||
@ -123,6 +118,11 @@ impl<'s> Scanner<'s> {
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// The current index in the source string.
|
||||
pub fn index(&self) -> usize {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// Jump to an index in the source string.
|
||||
pub fn jump(&mut self, index: usize) {
|
||||
self.index = index;
|
||||
|
@ -53,8 +53,8 @@ impl<'s> Tokens<'s> {
|
||||
}
|
||||
|
||||
/// The underlying scanner.
|
||||
pub fn scanner(&self) -> &Scanner<'s> {
|
||||
&self.s
|
||||
pub fn scanner(&self) -> Scanner<'s> {
|
||||
self.s.clone()
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user