10. Change parser modes using closures instead of manual stacks

This commit is contained in:
Ian Wrzesinski 2024-10-10 20:30:33 -04:00
parent 88d86714a1
commit 2ae1e1627f

View File

@ -226,11 +226,11 @@ fn whitespace_line(p: &mut Parser) {
/// Parses a mathematical equation: `$x$`, `$ x^2 $`. /// Parses a mathematical equation: `$x$`, `$ x^2 $`.
fn equation(p: &mut Parser) { fn equation(p: &mut Parser) {
let m = p.marker(); let m = p.marker();
p.enter(LexMode::Math); p.with_mode(LexMode::Math, |p| {
p.assert(SyntaxKind::Dollar); p.assert(SyntaxKind::Dollar);
math(p, |p| p.at(SyntaxKind::Dollar)); math(p, |p| p.at(SyntaxKind::Dollar));
p.expect_closing_delimiter(m, SyntaxKind::Dollar); p.expect_closing_delimiter(m, SyntaxKind::Dollar);
p.exit(); });
p.wrap(m, SyntaxKind::Equation); p.wrap(m, SyntaxKind::Equation);
} }
@ -586,10 +586,11 @@ fn code(p: &mut Parser, stop: impl FnMut(&Parser) -> bool) {
/// Parses a sequence of code expressions. /// Parses a sequence of code expressions.
fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) { fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
while !p.end() && !stop(p) { while !p.end() && !stop(p) {
p.enter_newline_mode(NewlineMode::Contextual); p.with_nl_mode(AtNewline::Contextual, |p| {
if !p.at_set(set::CODE_EXPR) {
let at_expr = p.at_set(set::CODE_EXPR); p.unexpected();
if at_expr { return;
}
code_expr(p); code_expr(p);
if !p.end() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) { if !p.end() && !stop(p) && !p.eat_if(SyntaxKind::Semicolon) {
p.expected("semicolon or line break"); p.expected("semicolon or line break");
@ -598,12 +599,7 @@ fn code_exprs(p: &mut Parser, mut stop: impl FnMut(&Parser) -> bool) {
p.hint("try wrapping your code in a markup block (`[ ]`)"); p.hint("try wrapping your code in a markup block (`[ ]`)");
} }
} }
} });
p.exit_newline_mode();
if !at_expr && !p.end() {
p.unexpected();
}
} }
} }
@ -614,29 +610,28 @@ fn code_expr(p: &mut Parser) {
/// Parses an atomic code expression embedded in markup or math. /// Parses an atomic code expression embedded in markup or math.
fn embedded_code_expr(p: &mut Parser) { fn embedded_code_expr(p: &mut Parser) {
p.enter_newline_mode(NewlineMode::Stop); p.with_mode(LexMode::Code, |p| {
p.enter(LexMode::Code); p.with_nl_mode(AtNewline::Stop, |p| {
p.assert(SyntaxKind::Hash); p.assert(SyntaxKind::Hash);
p.unskip(); p.unskip();
let stmt = p.at_set(set::STMT); let stmt = p.at_set(set::STMT);
let at = p.at_set(set::ATOMIC_CODE_EXPR); let at = p.at_set(set::ATOMIC_CODE_EXPR);
code_expr_prec(p, true, 0); code_expr_prec(p, true, 0);
// Consume error for things like `#12p` or `#"abc\"`.# // Consume error for things like `#12p` or `#"abc\"`.#
if !at && !p.current().is_trivia() && !p.end() { if !at && !p.current().is_trivia() && !p.end() {
p.unexpected(); p.unexpected();
} }
let semi = let semi = (stmt || p.directly_at(SyntaxKind::Semicolon))
(stmt || p.directly_at(SyntaxKind::Semicolon)) && p.eat_if(SyntaxKind::Semicolon); && p.eat_if(SyntaxKind::Semicolon);
if stmt && !semi && !p.end() && !p.at(SyntaxKind::RightBracket) { if stmt && !semi && !p.end() && !p.at(SyntaxKind::RightBracket) {
p.expected("semicolon or line break"); p.expected("semicolon or line break");
} }
});
p.exit(); });
p.exit_newline_mode();
} }
/// Parses a code expression with at least the given precedence. /// Parses a code expression with at least the given precedence.
@ -790,24 +785,24 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod
/// Parses a code block: `{ let x = 1; x + 2 }`. /// Parses a code block: `{ let x = 1; x + 2 }`.
fn code_block(p: &mut Parser) { fn code_block(p: &mut Parser) {
let m = p.marker(); let m = p.marker();
p.enter(LexMode::Code); p.with_mode(LexMode::Code, |p| {
p.enter_newline_mode(NewlineMode::Continue); p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftBrace); p.assert(SyntaxKind::LeftBrace);
code(p, |p| p.at_set(syntax_set!(RightBrace, RightBracket, RightParen))); code(p, |p| p.at_set(syntax_set!(RightBrace, RightBracket, RightParen)));
p.expect_closing_delimiter(m, SyntaxKind::RightBrace); p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
p.exit(); });
p.exit_newline_mode(); });
p.wrap(m, SyntaxKind::CodeBlock); p.wrap(m, SyntaxKind::CodeBlock);
} }
/// Parses a content block: `[*Hi* there!]`. /// Parses a content block: `[*Hi* there!]`.
fn content_block(p: &mut Parser) { fn content_block(p: &mut Parser) {
let m = p.marker(); let m = p.marker();
p.enter(LexMode::Markup); p.with_mode(LexMode::Markup, |p| {
p.assert(SyntaxKind::LeftBracket); p.assert(SyntaxKind::LeftBracket);
markup(p, true, 0, |p| p.at(SyntaxKind::RightBracket)); markup(p, true, 0, |p| p.at(SyntaxKind::RightBracket));
p.expect_closing_delimiter(m, SyntaxKind::RightBracket); p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
p.exit(); });
p.wrap(m, SyntaxKind::ContentBlock); p.wrap(m, SyntaxKind::ContentBlock);
} }
@ -950,14 +945,14 @@ fn module_import(p: &mut Parser) {
if p.eat_if(SyntaxKind::Colon) { if p.eat_if(SyntaxKind::Colon) {
if p.at(SyntaxKind::LeftParen) { if p.at(SyntaxKind::LeftParen) {
let m1 = p.marker(); p.with_nl_mode(AtNewline::Continue, |p| {
p.enter_newline_mode(NewlineMode::Continue); let m2 = p.marker();
p.assert(SyntaxKind::LeftParen); p.assert(SyntaxKind::LeftParen);
import_items(p); import_items(p);
p.expect_closing_delimiter(m1, SyntaxKind::RightParen); p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
p.exit_newline_mode(); });
} else if !p.eat_if(SyntaxKind::Star) { } else if !p.eat_if(SyntaxKind::Star) {
import_items(p); import_items(p);
} }
@ -1098,10 +1093,6 @@ fn expr_with_paren(p: &mut Parser, atomic: bool) {
/// - an array: `(1, "hi", 12cm)`, or /// - an array: `(1, "hi", 12cm)`, or
/// - a dictionary: `(thickness: 3pt, pattern: dashed)`. /// - a dictionary: `(thickness: 3pt, pattern: dashed)`.
fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind { fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
let m = p.marker();
p.enter_newline_mode(NewlineMode::Continue);
p.assert(SyntaxKind::LeftParen);
let mut state = GroupState { let mut state = GroupState {
count: 0, count: 0,
maybe_just_parens: true, maybe_just_parens: true,
@ -1124,27 +1115,29 @@ fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
// //
// This does allow some unexpected expressions, such as `(: key: val)`, but // This does allow some unexpected expressions, such as `(: key: val)`, but
// it's currently intentional. // it's currently intentional.
if p.eat_if(SyntaxKind::Colon) { let m = p.marker();
state.kind = Some(SyntaxKind::Dict); p.with_nl_mode(AtNewline::Continue, |p| {
state.maybe_just_parens = false; p.assert(SyntaxKind::LeftParen);
} if p.eat_if(SyntaxKind::Colon) {
state.kind = Some(SyntaxKind::Dict);
while !p.current().is_terminator() {
if !p.at_set(set::ARRAY_OR_DICT_ITEM) {
p.unexpected();
continue;
} }
array_or_dict_item(p, &mut state); while !p.current().is_terminator() {
state.count += 1; if !p.at_set(set::ARRAY_OR_DICT_ITEM) {
p.unexpected();
continue;
}
if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) { array_or_dict_item(p, &mut state);
state.maybe_just_parens = false; state.count += 1;
if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
state.maybe_just_parens = false;
}
} }
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen); p.expect_closing_delimiter(m, SyntaxKind::RightParen);
p.exit_newline_mode(); });
let kind = if state.maybe_just_parens && state.count == 1 { let kind = if state.maybe_just_parens && state.count == 1 {
SyntaxKind::Parenthesized SyntaxKind::Parenthesized
@ -1230,25 +1223,25 @@ fn args(p: &mut Parser) {
let m = p.marker(); let m = p.marker();
if p.at(SyntaxKind::LeftParen) { if p.at(SyntaxKind::LeftParen) {
let m2 = p.marker(); let m2 = p.marker();
p.enter_newline_mode(NewlineMode::Continue); p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen); p.assert(SyntaxKind::LeftParen);
let mut seen = HashSet::new(); let mut seen = HashSet::new();
while !p.current().is_terminator() { while !p.current().is_terminator() {
if !p.at_set(set::ARG) { if !p.at_set(set::ARG) {
p.unexpected(); p.unexpected();
continue; continue;
}
arg(p, &mut seen);
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
} }
arg(p, &mut seen); p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
});
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
}
p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
p.exit_newline_mode();
} }
while p.directly_at(SyntaxKind::LeftBracket) { while p.directly_at(SyntaxKind::LeftBracket) {
@ -1293,27 +1286,27 @@ fn arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) {
/// Parses a closure's parameters: `(x, y)`. /// Parses a closure's parameters: `(x, y)`.
fn params(p: &mut Parser) { fn params(p: &mut Parser) {
let m = p.marker(); let m = p.marker();
p.enter_newline_mode(NewlineMode::Continue); p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen); p.assert(SyntaxKind::LeftParen);
let mut seen = HashSet::new(); let mut seen = HashSet::new();
let mut sink = false; let mut sink = false;
while !p.current().is_terminator() { while !p.current().is_terminator() {
if !p.at_set(set::PARAM) { if !p.at_set(set::PARAM) {
p.unexpected(); p.unexpected();
continue; continue;
}
param(p, &mut seen, &mut sink);
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
} }
param(p, &mut seen, &mut sink); p.expect_closing_delimiter(m, SyntaxKind::RightParen);
});
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
p.exit_newline_mode();
p.wrap(m, SyntaxKind::Params); p.wrap(m, SyntaxKind::Params);
} }
@ -1374,25 +1367,25 @@ fn destructuring_or_parenthesized<'s>(
let mut maybe_just_parens = true; let mut maybe_just_parens = true;
let m = p.marker(); let m = p.marker();
p.enter_newline_mode(NewlineMode::Continue); p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen); p.assert(SyntaxKind::LeftParen);
while !p.current().is_terminator() { while !p.current().is_terminator() {
if !p.at_set(set::DESTRUCTURING_ITEM) { if !p.at_set(set::DESTRUCTURING_ITEM) {
p.unexpected(); p.unexpected();
continue; continue;
}
destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink);
count += 1;
if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
maybe_just_parens = false;
}
} }
destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink); p.expect_closing_delimiter(m, SyntaxKind::RightParen);
count += 1; });
if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
maybe_just_parens = false;
}
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
p.exit_newline_mode();
if maybe_just_parens && count == 1 && !sink { if maybe_just_parens && count == 1 && !sink {
p.wrap(m, SyntaxKind::Parenthesized); p.wrap(m, SyntaxKind::Parenthesized);
@ -1510,7 +1503,7 @@ fn pattern_leaf<'s>(
/// ### Modes /// ### Modes
/// ///
/// The parser manages the transitions between the three modes of Typst through /// The parser manages the transitions between the three modes of Typst through
/// stacks of [lexer modes](`LexMode`) and [newline modes](`NewlineMode`). /// [lexer modes](`LexMode`) and [newline modes](`AtNewline`).
/// ///
/// The lexer modes map to the three Typst modes and are stored in the lexer, /// The lexer modes map to the three Typst modes and are stored in the lexer,
/// changing which`SyntaxKind`s it will generate. The mode also affects how the /// changing which`SyntaxKind`s it will generate. The mode also affects how the
@ -1527,8 +1520,11 @@ struct Parser<'s> {
/// The source text shared with the lexer. /// The source text shared with the lexer.
text: &'s str, text: &'s str,
/// A lexer over the source text with multiple modes. Defines the boundaries /// A lexer over the source text with multiple modes. Defines the boundaries
/// of tokens and determines their [`SyntaxKind`]. /// of tokens and determines their [`SyntaxKind`]. Contains the [`LexMode`]
/// defining our current Typst mode.
lexer: Lexer<'s>, lexer: Lexer<'s>,
/// The newline mode: whether to insert a temporary end at newlines in Code.
nl_mode: AtNewline,
/// The index into `text` of the end of the previous token. /// The index into `text` of the end of the previous token.
prev_end: usize, prev_end: usize,
/// The index into `text` of the start of our current token (the end is /// The index into `text` of the start of our current token (the end is
@ -1545,12 +1541,6 @@ struct Parser<'s> {
/// Nodes representing the concrete syntax tree of previously parsed text. /// Nodes representing the concrete syntax tree of previously parsed text.
/// In Code and Math, includes previously parsed trivia, but not `current`. /// In Code and Math, includes previously parsed trivia, but not `current`.
nodes: Vec<SyntaxNode>, nodes: Vec<SyntaxNode>,
/// Stack of lexer modes to be pushed/popped. The current mode is implicitly
/// stored in the lexer.
modes: Vec<LexMode>,
/// Stack of newline modes to be pushed/popped. The current mode is the tail
/// of the vector.
newline_modes: Vec<NewlineMode>,
/// Parser checkpoints for a given text index. Used for efficient parser /// Parser checkpoints for a given text index. Used for efficient parser
/// backtracking similar to packrat parsing. See comments above in /// backtracking similar to packrat parsing. See comments above in
/// [`expr_with_paren`]. /// [`expr_with_paren`].
@ -1558,14 +1548,28 @@ struct Parser<'s> {
} }
/// How to proceed with parsing when at a newline in Code. /// How to proceed with parsing when at a newline in Code.
#[derive(Clone)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum NewlineMode { enum AtNewline {
/// Continue at newlines.
Continue,
/// Stop at any newline. /// Stop at any newline.
Stop, Stop,
/// Continue only if there is no continuation with `else` or `.`. /// Continue only if there is no continuation with `else` or `.`.
Contextual, Contextual,
/// Continue at newlines. }
Continue,
impl AtNewline {
/// Whether to stop at a newline or continue based on the current context.
fn stop(self, kind: impl FnOnce() -> SyntaxKind) -> bool {
match self {
AtNewline::Continue => false,
AtNewline::Stop => true,
AtNewline::Contextual => match kind() {
SyntaxKind::Else | SyntaxKind::Dot => false,
_ => true,
},
}
}
} }
/// A marker representing a node's position in the parser. Mainly used for /// A marker representing a node's position in the parser. Mainly used for
@ -1581,16 +1585,15 @@ impl<'s> Parser<'s> {
lexer.jump(offset); lexer.jump(offset);
let (current, current_node) = lexer.next(); let (current, current_node) = lexer.next();
Self { Self {
lexer,
text, text,
lexer,
nl_mode: AtNewline::Continue,
prev_end: offset, prev_end: offset,
current_start: offset, current_start: offset,
current, current,
current_node, current_node,
balanced: true, balanced: true,
nodes: vec![], nodes: vec![],
modes: vec![],
newline_modes: vec![],
memo: Default::default(), memo: Default::default(),
} }
} }
@ -1767,35 +1770,33 @@ impl<'s> Parser<'s> {
self.nodes.insert(from, SyntaxNode::inner(kind, children)); self.nodes.insert(from, SyntaxNode::inner(kind, children));
} }
/// Enter a new [`LexMode`] that will affect subsequent tokens (does not /// Parse within the [`LexMode`] for subsequent tokens (does not change the
/// modify the current token). /// current token). This may re-lex the final token on exit.
fn enter(&mut self, mode: LexMode) { ///
self.modes.push(self.lexer.mode()); /// This function effectively repurposes the call stack as a stack of modes.
fn with_mode(&mut self, mode: LexMode, func: impl FnOnce(&mut Parser<'s>)) {
let previous = self.lexer.mode();
self.lexer.set_mode(mode); self.lexer.set_mode(mode);
} func(self);
if mode != previous {
/// Exit the current [`LexMode`], possibly re-lexing the current token.
fn exit(&mut self) {
let mode = self.modes.pop().unwrap();
if mode != self.lexer.mode() {
self.unskip(); self.unskip();
self.lexer.set_mode(mode); self.lexer.set_mode(previous);
self.lexer.jump(self.current_start); self.lexer.jump(self.current_start);
self.lex(); self.lex();
self.skip(); self.skip();
} }
} }
/// Enter a new [`NewlineMode`] that will affect subsequent tokens (does not /// Parse within the [`AtNewline`] mode for subsequent tokens (does not
/// modify the current token). /// change the current token). This may re-lex the final token on exit.
fn enter_newline_mode(&mut self, stop: NewlineMode) { ///
self.newline_modes.push(stop); /// This function effectively repurposes the call stack as a stack of modes.
} fn with_nl_mode(&mut self, mode: AtNewline, func: impl FnOnce(&mut Parser<'s>)) {
let previous = self.nl_mode;
/// Exit the current [`NewlineMode`], possibly re-lexing the current token. self.nl_mode = mode;
fn exit_newline_mode(&mut self) { func(self);
self.unskip(); self.unskip();
self.newline_modes.pop(); self.nl_mode = previous;
self.lexer.jump(self.prev_end); self.lexer.jump(self.prev_end);
self.lex(); self.lex();
self.skip(); self.skip();
@ -1853,15 +1854,7 @@ impl<'s> Parser<'s> {
// Special cases to handle newlines in Code. // Special cases to handle newlines in Code.
if self.lexer.mode() == LexMode::Code if self.lexer.mode() == LexMode::Code
&& self.lexer.newline() && self.lexer.newline()
&& match self.newline_modes.last() { && self.nl_mode.stop(|| Self::next_non_trivia(&mut self.lexer.clone()))
Some(NewlineMode::Continue) => false,
Some(NewlineMode::Contextual) => !matches!(
Self::next_non_trivia(&mut self.lexer.clone()),
SyntaxKind::Else | SyntaxKind::Dot
),
Some(NewlineMode::Stop) => true,
None => false,
}
{ {
self.current = SyntaxKind::End; self.current = SyntaxKind::End;
} }