mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Optimize parser by remembering peeked token ⚡
This commit is contained in:
parent
3533268b1f
commit
343982c56f
@ -2,12 +2,13 @@ use std::fmt::{self, Debug, Formatter};
|
|||||||
|
|
||||||
use super::{Scanner, TokenMode, Tokens};
|
use super::{Scanner, TokenMode, Tokens};
|
||||||
use crate::diagnostic::Diagnostic;
|
use crate::diagnostic::Diagnostic;
|
||||||
use crate::syntax::{Decoration, Pos, Span, SpanWith, Spanned, Token};
|
use crate::syntax::{Decoration, Pos, Span, Spanned, Token};
|
||||||
use crate::Feedback;
|
use crate::Feedback;
|
||||||
|
|
||||||
/// A convenient token-based parser.
|
/// A convenient token-based parser.
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<'s> {
|
||||||
tokens: Tokens<'s>,
|
tokens: Tokens<'s>,
|
||||||
|
peeked: Option<Spanned<Token<'s>>>,
|
||||||
modes: Vec<TokenMode>,
|
modes: Vec<TokenMode>,
|
||||||
groups: Vec<(Pos, Group)>,
|
groups: Vec<(Pos, Group)>,
|
||||||
f: Feedback,
|
f: Feedback,
|
||||||
@ -18,6 +19,7 @@ impl<'s> Parser<'s> {
|
|||||||
pub fn new(src: &'s str) -> Self {
|
pub fn new(src: &'s str) -> Self {
|
||||||
Self {
|
Self {
|
||||||
tokens: Tokens::new(src, TokenMode::Body),
|
tokens: Tokens::new(src, TokenMode::Body),
|
||||||
|
peeked: None,
|
||||||
modes: vec![],
|
modes: vec![],
|
||||||
groups: vec![],
|
groups: vec![],
|
||||||
f: Feedback::new(),
|
f: Feedback::new(),
|
||||||
@ -34,7 +36,8 @@ impl<'s> Parser<'s> {
|
|||||||
self.f.diagnostics.push(diag);
|
self.f.diagnostics.push(diag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eat the next token and add a diagnostic that it was not expected thing.
|
/// Eat the next token and add a diagnostic that it was not the expected
|
||||||
|
/// `thing`.
|
||||||
pub fn diag_expected(&mut self, thing: &str) {
|
pub fn diag_expected(&mut self, thing: &str) {
|
||||||
if let Some(found) = self.eat() {
|
if let Some(found) = self.eat() {
|
||||||
self.diag(error!(
|
self.diag(error!(
|
||||||
@ -48,12 +51,12 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a diagnostic that the thing was expected at the given position.
|
/// Add a diagnostic that the `thing` was expected at the given position.
|
||||||
pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) {
|
pub fn diag_expected_at(&mut self, thing: &str, pos: Pos) {
|
||||||
self.diag(error!(pos, "expected {}", thing));
|
self.diag(error!(pos, "expected {}", thing));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a diagnostic that the given token was unexpected.
|
/// Add a diagnostic that the given `token` was unexpected.
|
||||||
pub fn diag_unexpected(&mut self, token: Spanned<Token>) {
|
pub fn diag_unexpected(&mut self, token: Spanned<Token>) {
|
||||||
self.diag(error!(token.span, "unexpected {}", token.v.name()));
|
self.diag(error!(token.span, "unexpected {}", token.v.name()));
|
||||||
}
|
}
|
||||||
@ -101,6 +104,7 @@ impl<'s> Parser<'s> {
|
|||||||
/// # Panics
|
/// # Panics
|
||||||
/// This panics if no group was started.
|
/// This panics if no group was started.
|
||||||
pub fn end_group(&mut self) -> Span {
|
pub fn end_group(&mut self) -> Span {
|
||||||
|
// Check that we are indeed at the end of the group.
|
||||||
debug_assert_eq!(self.peek(), None, "unfinished group");
|
debug_assert_eq!(self.peek(), None, "unfinished group");
|
||||||
|
|
||||||
let (start, group) = self.groups.pop().expect("unstarted group");
|
let (start, group) = self.groups.pop().expect("unstarted group");
|
||||||
@ -112,9 +116,11 @@ impl<'s> Parser<'s> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if let Some(token) = end {
|
if let Some(token) = end {
|
||||||
let next = self.tokens.clone().next().map(|s| s.v);
|
// This `peek()` can't be used directly because it hides the end of
|
||||||
if next == Some(token) {
|
// group token. To circumvent this, we drop down to `self.peeked`.
|
||||||
self.tokens.next();
|
self.peek();
|
||||||
|
if self.peeked.map(|s| s.v) == Some(token) {
|
||||||
|
self.peeked = None;
|
||||||
} else {
|
} else {
|
||||||
self.diag(error!(self.pos(), "expected {}", token.name()));
|
self.diag(error!(self.pos(), "expected {}", token.name()));
|
||||||
}
|
}
|
||||||
@ -123,37 +129,33 @@ impl<'s> Parser<'s> {
|
|||||||
Span::new(start, self.pos())
|
Span::new(start, self.pos())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Skip whitespace tokens.
|
||||||
|
pub fn skip_white(&mut self) {
|
||||||
|
self.eat_while(|t| {
|
||||||
|
matches!(t, Token::Space(_) | Token::LineComment(_) | Token::BlockComment(_))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Consume the next token.
|
/// Consume the next token.
|
||||||
pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
pub fn eat(&mut self) -> Option<Spanned<Token<'s>>> {
|
||||||
next_group_aware(&mut self.tokens, &self.groups)
|
self.peek()?;
|
||||||
|
self.peeked.take()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume the next token if it is the given one.
|
/// Consume the next token if it is the given one.
|
||||||
pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> {
|
pub fn eat_if(&mut self, t: Token) -> Option<Spanned<Token<'s>>> {
|
||||||
// Don't call eat() twice if it suceeds.
|
if self.peek()? == t { self.peeked.take() } else { None }
|
||||||
//
|
|
||||||
// TODO: Benchmark this vs. the naive version.
|
|
||||||
let before = self.pos();
|
|
||||||
let token = self.eat()?;
|
|
||||||
if token.v == t {
|
|
||||||
Some(token)
|
|
||||||
} else {
|
|
||||||
self.jump(before);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume the next token if the closure maps to `Some`.
|
/// Consume the next token if the closure maps it a to `Some`-variant.
|
||||||
pub fn eat_map<T>(
|
pub fn eat_map<T>(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut f: impl FnMut(Token<'s>) -> Option<T>,
|
mut f: impl FnMut(Token<'s>) -> Option<T>,
|
||||||
) -> Option<Spanned<T>> {
|
) -> Option<Spanned<T>> {
|
||||||
let before = self.pos();
|
let token = self.peek()?;
|
||||||
let token = self.eat()?;
|
if let Some(t) = f(token) {
|
||||||
if let Some(t) = f(token.v) {
|
self.peeked.take().map(|spanned| spanned.map(|_| t))
|
||||||
Some(t.span_with(token.span))
|
|
||||||
} else {
|
} else {
|
||||||
self.jump(before);
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -176,100 +178,77 @@ impl<'s> Parser<'s> {
|
|||||||
/// Returns how many tokens were eaten.
|
/// Returns how many tokens were eaten.
|
||||||
pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
|
pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
let mut before = self.pos();
|
while let Some(t) = self.peek() {
|
||||||
while let Some(t) = self.eat() {
|
if f(t) {
|
||||||
if f(t.v) {
|
|
||||||
// Undo the last eat by jumping. This prevents
|
|
||||||
// double-tokenization by not peeking all the time.
|
|
||||||
//
|
|
||||||
// TODO: Benchmark this vs. the naive peeking version.
|
|
||||||
self.jump(before);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
before = self.pos();
|
self.peeked = None;
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
count
|
count
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peek at the next token without consuming it.
|
/// Peek at the next token without consuming it.
|
||||||
pub fn peek(&self) -> Option<Token<'s>> {
|
pub fn peek(&mut self) -> Option<Token<'s>> {
|
||||||
next_group_aware(&mut self.tokens.clone(), &self.groups).map(|s| s.v)
|
let token = match self.peeked {
|
||||||
|
Some(token) => token.v,
|
||||||
|
None => {
|
||||||
|
let token = self.tokens.next()?;
|
||||||
|
self.peeked = Some(token);
|
||||||
|
token.v
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let group = match token {
|
||||||
|
Token::RightParen => Group::Paren,
|
||||||
|
Token::RightBracket => Group::Bracket,
|
||||||
|
Token::RightBrace => Group::Brace,
|
||||||
|
Token::Chain => Group::Subheader,
|
||||||
|
_ => return Some(token),
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.groups.iter().rev().any(|&(_, g)| g == group) {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(token)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether the next token fulfills a condition.
|
/// Checks whether the next token fulfills a condition.
|
||||||
///
|
///
|
||||||
/// Returns `false` if there is no next token.
|
/// Returns `false` if there is no next token.
|
||||||
pub fn check(&self, f: impl FnMut(Token<'s>) -> bool) -> bool {
|
pub fn check(&mut self, f: impl FnMut(Token<'s>) -> bool) -> bool {
|
||||||
self.peek().map(f).unwrap_or(false)
|
self.peek().map(f).unwrap_or(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether the there is no next token.
|
/// Whether the end of the source string or group is reached.
|
||||||
pub fn eof(&self) -> bool {
|
pub fn eof(&mut self) -> bool {
|
||||||
self.peek().is_none()
|
self.peek().is_none()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Skip whitespace tokens.
|
|
||||||
pub fn skip_white(&mut self) {
|
|
||||||
self.eat_while(|t| {
|
|
||||||
matches!(t,
|
|
||||||
Token::Space(_) |
|
|
||||||
Token::LineComment(_) |
|
|
||||||
Token::BlockComment(_))
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The position in the string at which the last token ends and next token
|
/// The position in the string at which the last token ends and next token
|
||||||
/// will start.
|
/// will start.
|
||||||
pub fn pos(&self) -> Pos {
|
pub fn pos(&self) -> Pos {
|
||||||
self.tokens.pos()
|
self.peeked.map(|s| s.span.start).unwrap_or_else(|| self.tokens.pos())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Jump to a position in the source string.
|
/// Jump to a position in the source string.
|
||||||
pub fn jump(&mut self, pos: Pos) {
|
pub fn jump(&mut self, pos: Pos) {
|
||||||
self.tokens.jump(pos);
|
self.tokens.jump(pos);
|
||||||
|
self.peeked = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The full source string.
|
/// Returns the part of the source string that is spanned by the given span.
|
||||||
pub fn src(&self) -> &'s str {
|
|
||||||
self.scanner().src()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The part of the source string that is spanned by the given span.
|
|
||||||
pub fn get(&self, span: Span) -> &'s str {
|
pub fn get(&self, span: Span) -> &'s str {
|
||||||
self.scanner().get(span.start.to_usize() .. span.end.to_usize())
|
self.scanner().get(span.start.to_usize() .. span.end.to_usize())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The underlying scanner.
|
/// The underlying scanner.
|
||||||
pub fn scanner(&self) -> &Scanner<'s> {
|
pub fn scanner(&self) -> Scanner<'s> {
|
||||||
self.tokens.scanner()
|
self.tokens.scanner()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Wraps `tokens.next()`, but is group-aware.
|
|
||||||
fn next_group_aware<'s>(
|
|
||||||
tokens: &mut Tokens<'s>,
|
|
||||||
groups: &[(Pos, Group)],
|
|
||||||
) -> Option<Spanned<Token<'s>>> {
|
|
||||||
let pos = tokens.pos();
|
|
||||||
let token = tokens.next();
|
|
||||||
|
|
||||||
let group = match token?.v {
|
|
||||||
Token::RightParen => Group::Paren,
|
|
||||||
Token::RightBracket => Group::Bracket,
|
|
||||||
Token::RightBrace => Group::Brace,
|
|
||||||
Token::Chain => Group::Subheader,
|
|
||||||
_ => return token,
|
|
||||||
};
|
|
||||||
|
|
||||||
if groups.iter().rev().any(|&(_, g)| g == group) {
|
|
||||||
tokens.jump(pos);
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
token
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Debug for Parser<'_> {
|
impl Debug for Parser<'_> {
|
||||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
let s = self.scanner();
|
let s = self.scanner();
|
||||||
|
@ -14,7 +14,7 @@ pub fn resolve_string(string: &str) -> String {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let start = s.prev_index();
|
let start = s.last_index();
|
||||||
match s.eat() {
|
match s.eat() {
|
||||||
Some('\\') => out.push('\\'),
|
Some('\\') => out.push('\\'),
|
||||||
Some('"') => out.push('"'),
|
Some('"') => out.push('"'),
|
||||||
|
@ -81,9 +81,9 @@ impl<'s> Scanner<'s> {
|
|||||||
&self.src[start .. self.index]
|
&self.src[start .. self.index]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Uneat the last eaten character.
|
/// Uneat the last eaten char.
|
||||||
pub fn uneat(&mut self) {
|
pub fn uneat(&mut self) {
|
||||||
self.index = self.prev_index();
|
self.index = self.last_index();
|
||||||
self.reset();
|
self.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,9 +97,9 @@ impl<'s> Scanner<'s> {
|
|||||||
self.iter.clone().nth(n)
|
self.iter.clone().nth(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether the next character fulfills a condition.
|
/// Checks whether the next char fulfills a condition.
|
||||||
///
|
///
|
||||||
/// Returns `false` if there is no next character.
|
/// Returns `false` if there is no next char.
|
||||||
pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
|
pub fn check(&self, f: impl FnMut(char) -> bool) -> bool {
|
||||||
self.peek().map(f).unwrap_or(false)
|
self.peek().map(f).unwrap_or(false)
|
||||||
}
|
}
|
||||||
@ -109,13 +109,8 @@ impl<'s> Scanner<'s> {
|
|||||||
self.iter.as_str().is_empty()
|
self.iter.as_str().is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The current index in the source string.
|
|
||||||
pub fn index(&self) -> usize {
|
|
||||||
self.index
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The previous index in the source string.
|
/// The previous index in the source string.
|
||||||
pub fn prev_index(&self) -> usize {
|
pub fn last_index(&self) -> usize {
|
||||||
self.src[.. self.index]
|
self.src[.. self.index]
|
||||||
.chars()
|
.chars()
|
||||||
.next_back()
|
.next_back()
|
||||||
@ -123,6 +118,11 @@ impl<'s> Scanner<'s> {
|
|||||||
.unwrap_or(0)
|
.unwrap_or(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The current index in the source string.
|
||||||
|
pub fn index(&self) -> usize {
|
||||||
|
self.index
|
||||||
|
}
|
||||||
|
|
||||||
/// Jump to an index in the source string.
|
/// Jump to an index in the source string.
|
||||||
pub fn jump(&mut self, index: usize) {
|
pub fn jump(&mut self, index: usize) {
|
||||||
self.index = index;
|
self.index = index;
|
||||||
|
@ -53,8 +53,8 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The underlying scanner.
|
/// The underlying scanner.
|
||||||
pub fn scanner(&self) -> &Scanner<'s> {
|
pub fn scanner(&self) -> Scanner<'s> {
|
||||||
&self.s
|
self.s.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user