use std::fmt::{self, Debug, Formatter}; use super::{Scanner, TokenMode, Tokens}; use crate::diag::Diag; use crate::diag::{Deco, Feedback}; use crate::syntax::{Pos, Span, SpanWith, Spanned, Token}; /// A convenient token-based parser. pub struct Parser<'s> { /// An iterator over the source tokens. tokens: Tokens<'s>, /// The next token. /// (Only `None` if we are at the end of group or end of file). peeked: Option>, /// The start position of the peeked token. next_start: Pos, /// The end position of the last (non-whitespace if in header) token. last_end: Pos, /// The stack of modes we were in. modes: Vec, /// The stack of open groups. groups: Vec, /// Accumulated feedback. f: Feedback, } impl<'s> Parser<'s> { /// Create a new parser for the source string. pub fn new(src: &'s str) -> Self { let mut tokens = Tokens::new(src, TokenMode::Body); let peeked = tokens.next(); Self { tokens, peeked, next_start: Pos::ZERO, last_end: Pos::ZERO, modes: vec![], groups: vec![], f: Feedback::new(), } } /// Finish parsing and return the accumulated feedback. pub fn finish(self) -> Feedback { self.f } /// Add a diagnostic to the feedback. pub fn diag(&mut self, diag: Spanned) { self.f.diags.push(diag); } /// Eat the next token and add a diagnostic that it is not the expected /// `thing`. pub fn diag_expected(&mut self, what: &str) { let before = self.next_start; if let Some(found) = self.eat() { let after = self.last_end; self.diag(error!( before .. after, "expected {}, found {}", what, found.name(), )); } else { self.diag_expected_at(what, self.next_start); } } /// Add a diagnostic that the `thing` was expected at the given position. pub fn diag_expected_at(&mut self, what: &str, pos: Pos) { self.diag(error!(pos, "expected {}", what)); } /// Eat the next token and add a diagnostic that it is unexpected. pub fn diag_unexpected(&mut self) { let before = self.next_start; if let Some(found) = self.eat() { let after = self.last_end; self.diag(match found { Token::Invalid(_) => error!(before .. after, "invalid token"), _ => error!(before .. after, "unexpected {}", found.name()), }); } } /// Add a decoration to the feedback. pub fn deco(&mut self, deco: Spanned) { self.f.decos.push(deco); } /// Update the token mode and push the previous mode onto a stack. pub fn push_mode(&mut self, mode: TokenMode) { self.modes.push(self.tokens.mode()); self.tokens.set_mode(mode); } /// Pop the topmost token mode from the stack. /// /// # Panics /// This panics if there is no mode on the stack. pub fn pop_mode(&mut self) { self.tokens.set_mode(self.modes.pop().expect("no pushed mode")); } /// Continues parsing in a group. /// /// When the end delimiter of the group is reached, all subsequent calls to /// `eat()` and `peek()` return `None`. Parsing can only continue with /// a matching call to `end_group`. /// /// # Panics /// This panics if the next token does not start the given group. pub fn start_group(&mut self, group: Group) { match group { Group::Paren => self.eat_assert(Token::LeftParen), Group::Bracket => self.eat_assert(Token::LeftBracket), Group::Brace => self.eat_assert(Token::LeftBrace), Group::Subheader => {} } self.groups.push(group); } /// Ends the parsing of a group and returns the span of the whole group. /// /// # Panics /// This panics if no group was started. pub fn end_group(&mut self) { // Check that we are indeed at the end of the group. debug_assert_eq!(self.peek(), None, "unfinished group"); let group = self.groups.pop().expect("no started group"); let end = match group { Group::Paren => Some(Token::RightParen), Group::Bracket => Some(Token::RightBracket), Group::Brace => Some(Token::RightBrace), Group::Subheader => None, }; if let Some(token) = end { if self.peeked == Some(token) { self.bump(); } else { self.diag(error!(self.next_start, "expected {}", token.name())); } } } /// Execute `f` and return the result alongside the span of everything `f` /// ate. Excludes leading and trailing whitespace in header mode. pub fn span(&mut self, f: F) -> Spanned where F: FnOnce(&mut Self) -> T, { let start = self.next_start; let output = f(self); let end = self.last_end; output.span_with(start .. end) } /// A version of [`span`](Self::span) that works better with options. pub fn span_if(&mut self, f: F) -> Option> where F: FnOnce(&mut Self) -> Option, { self.span(|p| f(p)).transpose() } /// Consume the next token. pub fn eat(&mut self) -> Option> { let token = self.peek()?; self.bump(); Some(token) } /// Consume the next token if it is the given one. pub fn eat_if(&mut self, t: Token) -> bool { if self.peek() == Some(t) { self.bump(); true } else { false } } /// Consume the next token if the closure maps it a to `Some`-variant. pub fn eat_map(&mut self, f: F) -> Option where F: FnOnce(Token<'s>) -> Option, { let token = self.peek()?; let mapped = f(token); if mapped.is_some() { self.bump(); } mapped } /// Consume the next token, debug-asserting that it is the given one. pub fn eat_assert(&mut self, t: Token) { let next = self.eat(); debug_assert_eq!(next, Some(t)); } /// Peek at the next token without consuming it. pub fn peek(&mut self) -> Option> { let group = match self.peeked { Some(Token::RightParen) => Group::Paren, Some(Token::RightBracket) => Group::Bracket, Some(Token::RightBrace) => Group::Brace, Some(Token::Pipe) => Group::Subheader, other => return other, }; if self.groups.contains(&group) { return None; } self.peeked } /// Checks whether the next token fulfills a condition. /// /// Returns `false` if there is no next token. pub fn check(&mut self, f: F) -> bool where F: FnOnce(Token<'s>) -> bool, { self.peek().map_or(false, f) } /// Whether the end of the source string or group is reached. pub fn eof(&mut self) -> bool { self.peek().is_none() } /// The position at which the next token starts. pub fn next_start(&self) -> Pos { self.next_start } /// The position at which the last token ended. /// /// Refers to the end of the last _non-whitespace_ token in header mode. pub fn last_end(&self) -> Pos { self.last_end } /// Jump to a position in the source string. pub fn jump(&mut self, pos: Pos) { self.tokens.jump(pos); self.bump(); } /// Slice a part out of the source string. pub fn get(&self, span: impl Into) -> &'s str { self.tokens.scanner().get(span.into().to_range()) } /// The full source string up to the end of the last token. pub fn eaten(&self) -> &'s str { self.tokens.scanner().get(.. self.last_end.to_usize()) } /// The source string from `start` to the end of the last token. pub fn eaten_from(&self, start: Pos) -> &'s str { self.tokens .scanner() .get(start.to_usize() .. self.last_end.to_usize()) } /// The remaining source string after the start of the next token. pub fn rest(&self) -> &'s str { self.tokens.scanner().get(self.next_start.to_usize() ..) } /// The underlying scanner. pub fn scanner(&self) -> Scanner<'s> { let mut scanner = self.tokens.scanner().clone(); scanner.jump(self.next_start.to_usize()); scanner } fn bump(&mut self) { self.last_end = self.tokens.pos(); self.next_start = self.tokens.pos(); self.peeked = self.tokens.next(); match self.tokens.mode() { TokenMode::Body => {} TokenMode::Header => { while matches!( self.peeked, Some(Token::Space(_)) | Some(Token::LineComment(_)) | Some(Token::BlockComment(_)) ) { self.next_start = self.tokens.pos(); self.peeked = self.tokens.next(); } } } } } impl Debug for Parser<'_> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "Parser({}|{})", self.eaten(), self.rest()) } } /// A group, confined by optional start and end delimiters. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Group { /// A parenthesized group: `(...)`. Paren, /// A bracketed group: `[...]`. Bracket, /// A curly-braced group: `{...}`. Brace, /// A group ended by a chained subheader or a closing bracket: /// `... >>`, `...]`. Subheader, }