mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
3. Add typst-syntax README and parser comments
This commit is contained in:
parent
a2761ab75a
commit
a764aa4192
40
crates/typst-syntax/README.md
Normal file
40
crates/typst-syntax/README.md
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# typst-syntax
|
||||||
|
|
||||||
|
Welcome to the Typst Syntax crate! This crate manages the syntactical structure
|
||||||
|
of Typst by holding some core abstractions like assigning source file ids,
|
||||||
|
parsing Typst syntax, creating an Abstract Syntax Tree (AST), initializing
|
||||||
|
source "spans" (for linking AST elements to their outputs in a document), and
|
||||||
|
syntax highlighting.
|
||||||
|
|
||||||
|
Below are quick descriptions of the files you might be editing if you find
|
||||||
|
yourself here :)
|
||||||
|
|
||||||
|
- `lexer.rs`: The lexical foundation of the parser, which converts a string of
|
||||||
|
characters into tokens.
|
||||||
|
- `parser.rs`: The main parser definition, preparing a Concrete Syntax Tree made
|
||||||
|
of nested vectors of `SyntaxNode`s.
|
||||||
|
- `reparser.rs`: The algorithm for reparsing the minimal required amount of
|
||||||
|
source text for efficient incremental compilation.
|
||||||
|
- `ast.rs`: The conversion layer between the Concrete Syntax Tree of the parser
|
||||||
|
and the Abstract Syntax Tree used for code evaluation.
|
||||||
|
- `node.rs` & `span.rs`: The underlying data structure for the Concrete Syntax
|
||||||
|
Tree and the definitions of source spans used for efficiently pointing to a
|
||||||
|
syntax node in things like diagnostics.
|
||||||
|
- `kind.rs` & `set.rs`: An enum with all syntactical tokens and nodes and
|
||||||
|
bit-set data structure for sets of `SyntaxKind`s.
|
||||||
|
- `highlight.rs`: Extracting of syntax highlighting information out of the
|
||||||
|
Concrete Syntax Tree (and outputting as HTML).
|
||||||
|
- `path.rs`, `file.rs`, `package.rs`: The system for interning project and
|
||||||
|
package paths as unique file IDs and resolving them in a virtual filesystem
|
||||||
|
(not actually for _opening_ files).
|
||||||
|
|
||||||
|
The structure of the parser is largely adapted from Rust Analyzer. Their
|
||||||
|
[documentation][ra] is a good reference for a number of the design decisions
|
||||||
|
around the parser and AST.
|
||||||
|
|
||||||
|
The reparsing algorithm is explained in Section 4 of [Martin's thesis][thesis]
|
||||||
|
(though it changed a bit since).
|
||||||
|
|
||||||
|
[ra]: https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md
|
||||||
|
[thesis]:
|
||||||
|
https://www.researchgate.net/publication/364622490_Fast_Typesetting_with_Incremental_Compilation
|
@ -10,7 +10,7 @@ use crate::{
|
|||||||
ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode,
|
ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Parses a source file.
|
/// Parses a source file as top-level markup.
|
||||||
pub fn parse(text: &str) -> SyntaxNode {
|
pub fn parse(text: &str) -> SyntaxNode {
|
||||||
let _scope = typst_timing::TimingScope::new("parse");
|
let _scope = typst_timing::TimingScope::new("parse");
|
||||||
let mut p = Parser::new(text, 0, LexMode::Markup);
|
let mut p = Parser::new(text, 0, LexMode::Markup);
|
||||||
@ -37,7 +37,7 @@ pub fn parse_math(text: &str) -> SyntaxNode {
|
|||||||
p.finish().into_iter().next().unwrap()
|
p.finish().into_iter().next().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses the contents of a file or content block.
|
/// Parses markup expressions until a stop condition is met.
|
||||||
fn markup(
|
fn markup(
|
||||||
p: &mut Parser,
|
p: &mut Parser,
|
||||||
mut at_start: bool,
|
mut at_start: bool,
|
||||||
@ -96,7 +96,7 @@ pub(super) fn reparse_markup(
|
|||||||
(p.balanced && p.current_start() == range.end).then(|| p.finish())
|
(p.balanced && p.current_start() == range.end).then(|| p.finish())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a single markup expression: This includes markup elements like
|
/// Parses a single markup expression. This includes markup elements like
|
||||||
/// spaces, text, and headings, and embedded code expressions.
|
/// spaces, text, and headings, and embedded code expressions.
|
||||||
fn markup_expr(p: &mut Parser, at_start: &mut bool) {
|
fn markup_expr(p: &mut Parser, at_start: &mut bool) {
|
||||||
match p.current() {
|
match p.current() {
|
||||||
@ -414,6 +414,7 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try to parse delimiters based on the current token's unicode math class.
|
||||||
fn maybe_delimited(p: &mut Parser) -> bool {
|
fn maybe_delimited(p: &mut Parser) -> bool {
|
||||||
let open = math_class(p.current_text()) == Some(MathClass::Opening);
|
let open = math_class(p.current_text()) == Some(MathClass::Opening);
|
||||||
if open {
|
if open {
|
||||||
@ -422,6 +423,7 @@ fn maybe_delimited(p: &mut Parser) -> bool {
|
|||||||
open
|
open
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse matched delimiters in math: `[x + y]`.
|
||||||
fn math_delimited(p: &mut Parser) {
|
fn math_delimited(p: &mut Parser) {
|
||||||
let m = p.marker();
|
let m = p.marker();
|
||||||
p.eat();
|
p.eat();
|
||||||
@ -444,6 +446,8 @@ fn math_delimited(p: &mut Parser) {
|
|||||||
p.wrap(m, SyntaxKind::Math);
|
p.wrap(m, SyntaxKind::Math);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Remove one set of parentheses (if any) from a previously parsed expression
|
||||||
|
/// by converting to non-expression SyntaxKinds.
|
||||||
fn math_unparen(p: &mut Parser, m: Marker) {
|
fn math_unparen(p: &mut Parser, m: Marker) {
|
||||||
let Some(node) = p.nodes.get_mut(m.0) else { return };
|
let Some(node) = p.nodes.get_mut(m.0) else { return };
|
||||||
if node.kind() != SyntaxKind::MathDelimited {
|
if node.kind() != SyntaxKind::MathDelimited {
|
||||||
@ -460,6 +464,10 @@ fn math_unparen(p: &mut Parser, m: Marker) {
|
|||||||
node.convert_to_kind(SyntaxKind::Math);
|
node.convert_to_kind(SyntaxKind::Math);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The unicode math class of a string. Only returns `Some` if `text` has
|
||||||
|
/// exactly one unicode character or is a math shorthand string (currently just
|
||||||
|
/// `[|`, `||`, `|]`) and then only returns `Some` if there is a math class
|
||||||
|
/// defined for that character.
|
||||||
fn math_class(text: &str) -> Option<MathClass> {
|
fn math_class(text: &str) -> Option<MathClass> {
|
||||||
match text {
|
match text {
|
||||||
"[|" => return Some(MathClass::Opening),
|
"[|" => return Some(MathClass::Opening),
|
||||||
@ -475,6 +483,7 @@ fn math_class(text: &str) -> Option<MathClass> {
|
|||||||
.and_then(unicode_math_class::class)
|
.and_then(unicode_math_class::class)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Precedence and wrapper kinds for the binary math operators.
|
||||||
fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
|
fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
|
||||||
match kind {
|
match kind {
|
||||||
SyntaxKind::Underscore => {
|
SyntaxKind::Underscore => {
|
||||||
@ -490,6 +499,7 @@ fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usiz
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse an argument list in math: `(a, b; c, d; size: #50%)`.
|
||||||
fn math_args(p: &mut Parser) {
|
fn math_args(p: &mut Parser) {
|
||||||
let m = p.marker();
|
let m = p.marker();
|
||||||
p.convert(SyntaxKind::LeftParen);
|
p.convert(SyntaxKind::LeftParen);
|
||||||
@ -629,7 +639,7 @@ fn code_expr(p: &mut Parser) {
|
|||||||
code_expr_prec(p, false, 0)
|
code_expr_prec(p, false, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a code expression embedded in markup or math.
|
/// Parses an atomic code expression embedded in markup or math.
|
||||||
fn embedded_code_expr(p: &mut Parser) {
|
fn embedded_code_expr(p: &mut Parser) {
|
||||||
p.enter_newline_mode(NewlineMode::Stop);
|
p.enter_newline_mode(NewlineMode::Stop);
|
||||||
p.enter(LexMode::Code);
|
p.enter(LexMode::Code);
|
||||||
@ -1130,6 +1140,21 @@ fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
|
|||||||
seen: HashSet::new(),
|
seen: HashSet::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// An edge case with parens is whether we can interpret a leading spread
|
||||||
|
// expression as a dictionary, e.g. if we want `(..dict1, ..dict2)` to join
|
||||||
|
// the two dicts.
|
||||||
|
//
|
||||||
|
// The issue is that we decide on the type of the parenthesized expression
|
||||||
|
// here in the parser by the `SyntaxKind` we wrap with, instead of in eval
|
||||||
|
// based on the type of the spread item.
|
||||||
|
//
|
||||||
|
// The current fix is that we allow a leading colon to force the
|
||||||
|
// parenthesized value into a dict:
|
||||||
|
// - `(..arr1, ..arr2)` is wrapped as an `Array`.
|
||||||
|
// - `(: ..dict1, ..dict2)` is wrapped as a `Dict`.
|
||||||
|
//
|
||||||
|
// This does allow some unexpected expressions, such as `(: key: val)`, but
|
||||||
|
// it's currently intentional.
|
||||||
if p.eat_if(SyntaxKind::Colon) {
|
if p.eat_if(SyntaxKind::Colon) {
|
||||||
state.kind = Some(SyntaxKind::Dict);
|
state.kind = Some(SyntaxKind::Dict);
|
||||||
state.maybe_just_parens = false;
|
state.maybe_just_parens = false;
|
||||||
@ -1165,8 +1190,13 @@ fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
|
|||||||
/// State for array/dictionary parsing.
|
/// State for array/dictionary parsing.
|
||||||
struct GroupState {
|
struct GroupState {
|
||||||
count: usize,
|
count: usize,
|
||||||
|
/// Whether this is just a single expression in parens: `(a)`. Single
|
||||||
|
/// element arrays require an explicit comma: `(a,)`, unless we're
|
||||||
|
/// spreading: `(..a)`.
|
||||||
maybe_just_parens: bool,
|
maybe_just_parens: bool,
|
||||||
|
/// The `SyntaxKind` to wrap as (if we've figured it out yet).
|
||||||
kind: Option<SyntaxKind>,
|
kind: Option<SyntaxKind>,
|
||||||
|
/// Store named arguments so we can give an error if they're repeated.
|
||||||
seen: HashSet<EcoString>,
|
seen: HashSet<EcoString>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1484,32 +1514,90 @@ fn pattern_leaf<'s>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Manages parsing of a stream of tokens.
|
/// Manages parsing a stream of tokens into a tree of [`SyntaxNode`]s.
|
||||||
|
///
|
||||||
|
/// The implementation presents an interface that investigates a `current` token
|
||||||
|
/// and can take one of the following actions:
|
||||||
|
///
|
||||||
|
/// 1. Eat a token, pushing `current` into the `nodes` vector as a [leaf
|
||||||
|
/// node](`SyntaxNode::leaf`) and prepare a new `current` by calling into the
|
||||||
|
/// lexer.
|
||||||
|
/// 2. Wrap nodes from a marker to the end of `nodes` (excluding `current`) into
|
||||||
|
/// an [inner node](`SyntaxNode::inner`) of a specific [`SyntaxKind`].
|
||||||
|
/// 3. Produce or convert nodes into an [error node](`SyntaxNode::error`) when
|
||||||
|
/// something expected is missing or something unexpected is found.
|
||||||
|
///
|
||||||
|
/// Overall the parser produces a nested tree of SyntaxNodes as a "_Concrete_
|
||||||
|
/// Syntax Tree." The raw Concrete Syntax Tree should contain the entire source
|
||||||
|
/// text, and is used as-is for e.g. syntax highlighting and IDE features. In
|
||||||
|
/// `ast.rs` the CST is interpreted as a lazy view over an "_Abstract_ Syntax
|
||||||
|
/// Tree." The AST module skips over irrelevant tokens -- whitespace, comments,
|
||||||
|
/// code parens, commas in function args, etc. -- as it iterates through the
|
||||||
|
/// tree.
|
||||||
|
///
|
||||||
|
/// ### Modes
|
||||||
|
///
|
||||||
|
/// The parser manages the transitions between the three modes of Typst through
|
||||||
|
/// stacks of [lexer modes](`LexMode`) and [newline modes](`NewlineMode`).
|
||||||
|
///
|
||||||
|
/// The lexer modes map to the three Typst modes and are stored in the lexer,
|
||||||
|
/// changing which`SyntaxKind`s it will generate. The mode also affects how the
|
||||||
|
/// parser treats trivia tokens (comments and whitespace). In Markup, trivia is
|
||||||
|
/// handled manually to deal with list indentation and must be explicitly eaten.
|
||||||
|
/// In Code and Math, trivia is managed internally and is implicitly eaten by
|
||||||
|
/// pushing onto the end of the `nodes` vector until a non-trivia kind is found.
|
||||||
|
///
|
||||||
|
/// The newline mode is used in Code to determine whether a newline should end
|
||||||
|
/// the current expression. If so, the parser temporarily changes the current
|
||||||
|
/// token's kind to a fake [`SyntaxKind::End`]. When the parser exits the mode
|
||||||
|
/// the original `SyntaxKind` is restored.
|
||||||
struct Parser<'s> {
|
struct Parser<'s> {
|
||||||
|
/// The source text shared with the lexer.
|
||||||
text: &'s str,
|
text: &'s str,
|
||||||
|
/// A lexer over the source text with multiple modes. Defines the boundaries
|
||||||
|
/// of tokens and determines their [`SyntaxKind`].
|
||||||
lexer: Lexer<'s>,
|
lexer: Lexer<'s>,
|
||||||
|
/// The index into `text` of the end of the previous token.
|
||||||
prev_end: usize,
|
prev_end: usize,
|
||||||
|
/// The index into `text` of the start of our current token (the end is
|
||||||
|
/// stored as the lexer's cursor).
|
||||||
current_start: usize,
|
current_start: usize,
|
||||||
|
/// The [`SyntaxKind`] of the current token.
|
||||||
current: SyntaxKind,
|
current: SyntaxKind,
|
||||||
|
/// Whether the parser has the expected set of open/close delimiters. This
|
||||||
|
/// only ever transitions from `true` to `false`.
|
||||||
balanced: bool,
|
balanced: bool,
|
||||||
|
/// Nodes representing the concrete syntax tree of previously parsed text.
|
||||||
|
/// In Code and Math, includes previously parsed trivia, but not `current`.
|
||||||
nodes: Vec<SyntaxNode>,
|
nodes: Vec<SyntaxNode>,
|
||||||
|
/// Stack of lexer modes to be pushed/popped. The current mode is implicitly
|
||||||
|
/// stored in the lexer.
|
||||||
modes: Vec<LexMode>,
|
modes: Vec<LexMode>,
|
||||||
|
/// Stack of newline modes to be pushed/popped. The current mode is the tail
|
||||||
|
/// of the vector.
|
||||||
newline_modes: Vec<NewlineMode>,
|
newline_modes: Vec<NewlineMode>,
|
||||||
|
/// Parser checkpoints for a given text index. Used for efficient parser
|
||||||
|
/// backtracking similar to packrat parsing. See comments above in
|
||||||
|
/// [`expr_with_paren`].
|
||||||
memo: HashMap<usize, (Range<usize>, Checkpoint<'s>)>,
|
memo: HashMap<usize, (Range<usize>, Checkpoint<'s>)>,
|
||||||
|
/// The stored parse results at each checkpoint.
|
||||||
memo_arena: Vec<SyntaxNode>,
|
memo_arena: Vec<SyntaxNode>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// How to proceed with parsing when seeing a newline.
|
/// How to proceed with parsing when at a newline in Code.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
enum NewlineMode {
|
enum NewlineMode {
|
||||||
/// Stop always.
|
/// Stop at any newline.
|
||||||
Stop,
|
Stop,
|
||||||
/// Proceed if there is no continuation with `else` or `.`
|
/// Continue only if there is no continuation with `else` or `.`.
|
||||||
Contextual,
|
Contextual,
|
||||||
/// Just proceed like with normal whitespace.
|
/// Continue at newlines.
|
||||||
Continue,
|
Continue,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A marker representing a node's position in the parser. Mainly used for
|
||||||
|
/// wrapping, but can also index into the parser to access the node, like
|
||||||
|
/// `p[m]`.
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
struct Marker(usize);
|
struct Marker(usize);
|
||||||
|
|
||||||
@ -1523,6 +1611,7 @@ struct Checkpoint<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Parser<'s> {
|
impl<'s> Parser<'s> {
|
||||||
|
/// Create a new parser starting from the given text offset and lexer mode.
|
||||||
fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
|
fn new(text: &'s str, offset: usize, mode: LexMode) -> Self {
|
||||||
let mut lexer = Lexer::new(text, mode);
|
let mut lexer = Lexer::new(text, mode);
|
||||||
lexer.jump(offset);
|
lexer.jump(offset);
|
||||||
@ -1542,52 +1631,68 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consume the parser, yielding the full vector of parsed SyntaxNodes.
|
||||||
fn finish(self) -> Vec<SyntaxNode> {
|
fn finish(self) -> Vec<SyntaxNode> {
|
||||||
self.nodes
|
self.nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The offset into `text` of the previous token's end.
|
||||||
fn prev_end(&self) -> usize {
|
fn prev_end(&self) -> usize {
|
||||||
self.prev_end
|
self.prev_end
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Similar to a `peek()` function: returns the `kind` of the next token to
|
||||||
|
/// be eaten.
|
||||||
fn current(&self) -> SyntaxKind {
|
fn current(&self) -> SyntaxKind {
|
||||||
self.current
|
self.current
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The offset into `text` of the current token's start.
|
||||||
fn current_start(&self) -> usize {
|
fn current_start(&self) -> usize {
|
||||||
self.current_start
|
self.current_start
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The offset into `text` of the current token's end.
|
||||||
fn current_end(&self) -> usize {
|
fn current_end(&self) -> usize {
|
||||||
self.lexer.cursor()
|
self.lexer.cursor()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The current token's text.
|
||||||
fn current_text(&self) -> &'s str {
|
fn current_text(&self) -> &'s str {
|
||||||
&self.text[self.current_start..self.current_end()]
|
&self.text[self.current_start..self.current_end()]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the current token is a given [`SyntaxKind`].
|
||||||
fn at(&self, kind: SyntaxKind) -> bool {
|
fn at(&self, kind: SyntaxKind) -> bool {
|
||||||
self.current == kind
|
self.current == kind
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the current token is contained in a [`SyntaxSet`].
|
||||||
fn at_set(&self, set: SyntaxSet) -> bool {
|
fn at_set(&self, set: SyntaxSet) -> bool {
|
||||||
set.contains(self.current)
|
set.contains(self.current)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether we're at the end of the token stream.
|
||||||
|
///
|
||||||
|
/// Note: This might be a fake end due to the newline mode.
|
||||||
fn end(&self) -> bool {
|
fn end(&self) -> bool {
|
||||||
self.at(SyntaxKind::End)
|
self.at(SyntaxKind::End)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If we're at the given `kind` with no preceding trivia tokens.
|
||||||
fn directly_at(&self, kind: SyntaxKind) -> bool {
|
fn directly_at(&self, kind: SyntaxKind) -> bool {
|
||||||
self.current == kind && self.prev_end == self.current_start
|
self.current == kind && self.prev_end == self.current_start
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Eat the current token by saving it to the `nodes` vector, then move
|
||||||
|
/// the lexer forward to prepare a new token.
|
||||||
fn eat(&mut self) {
|
fn eat(&mut self) {
|
||||||
self.save();
|
self.save();
|
||||||
self.lex();
|
self.lex();
|
||||||
self.skip();
|
self.skip();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Eat the current node and return a reference for in-place mutation.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
fn eat_and_get(&mut self) -> &mut SyntaxNode {
|
fn eat_and_get(&mut self) -> &mut SyntaxNode {
|
||||||
let offset = self.nodes.len();
|
let offset = self.nodes.len();
|
||||||
@ -1597,9 +1702,9 @@ impl<'s> Parser<'s> {
|
|||||||
&mut self.nodes[offset]
|
&mut self.nodes[offset]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eats if at `kind`.
|
/// Eat the token if at `kind`. Returns `true` if eaten.
|
||||||
///
|
///
|
||||||
/// Note: In math and code mode, this will ignore trivia in front of the
|
/// Note: In Math and Code, this will ignore trivia in front of the
|
||||||
/// `kind`, To forbid skipping trivia, consider using `eat_if_direct`.
|
/// `kind`, To forbid skipping trivia, consider using `eat_if_direct`.
|
||||||
fn eat_if(&mut self, kind: SyntaxKind) -> bool {
|
fn eat_if(&mut self, kind: SyntaxKind) -> bool {
|
||||||
let at = self.at(kind);
|
let at = self.at(kind);
|
||||||
@ -1609,7 +1714,8 @@ impl<'s> Parser<'s> {
|
|||||||
at
|
at
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eats only if currently at the start of `kind`.
|
/// Eat the token only if at `kind` with no preceding trivia. Returns `true`
|
||||||
|
/// if eaten.
|
||||||
fn eat_if_direct(&mut self, kind: SyntaxKind) -> bool {
|
fn eat_if_direct(&mut self, kind: SyntaxKind) -> bool {
|
||||||
let at = self.directly_at(kind);
|
let at = self.directly_at(kind);
|
||||||
if at {
|
if at {
|
||||||
@ -1618,30 +1724,39 @@ impl<'s> Parser<'s> {
|
|||||||
at
|
at
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Assert that we are at the given [`SyntaxKind`] and eat it. This should
|
||||||
|
/// be used when moving between functions that expect to start with a
|
||||||
|
/// specific token.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
fn assert(&mut self, kind: SyntaxKind) {
|
fn assert(&mut self, kind: SyntaxKind) {
|
||||||
assert_eq!(self.current, kind);
|
assert_eq!(self.current, kind);
|
||||||
self.eat();
|
self.eat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert the current token's [`SyntaxKind`] and eat it.
|
||||||
fn convert(&mut self, kind: SyntaxKind) {
|
fn convert(&mut self, kind: SyntaxKind) {
|
||||||
self.current = kind;
|
self.current = kind;
|
||||||
self.eat();
|
self.eat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the current token is a newline, only used in Markup.
|
||||||
fn newline(&mut self) -> bool {
|
fn newline(&mut self) -> bool {
|
||||||
self.lexer.newline()
|
self.lexer.newline()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The number of characters until the most recent newline in `text`.
|
||||||
fn column(&self, at: usize) -> usize {
|
fn column(&self, at: usize) -> usize {
|
||||||
self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count()
|
self.text[..at].chars().rev().take_while(|&c| !is_newline(c)).count()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A marker that will point to the current token in the parser once it's
|
||||||
|
/// been eaten.
|
||||||
fn marker(&self) -> Marker {
|
fn marker(&self) -> Marker {
|
||||||
Marker(self.nodes.len())
|
Marker(self.nodes.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a marker after the last non-trivia node.
|
/// A marker that will point to first trivia before this token in the
|
||||||
|
/// parser (or the token itself if no trivia precede it).
|
||||||
fn before_trivia(&self) -> Marker {
|
fn before_trivia(&self) -> Marker {
|
||||||
let mut i = self.nodes.len();
|
let mut i = self.nodes.len();
|
||||||
if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
|
if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
|
||||||
@ -1658,6 +1773,7 @@ impl<'s> Parser<'s> {
|
|||||||
m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
|
m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Iterate over the non-trivia tokens following the marker.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> {
|
fn post_process(&mut self, m: Marker) -> impl Iterator<Item = &mut SyntaxNode> {
|
||||||
self.nodes[m.0..]
|
self.nodes[m.0..]
|
||||||
@ -1665,10 +1781,15 @@ impl<'s> Parser<'s> {
|
|||||||
.filter(|child| !child.kind().is_error() && !child.kind().is_trivia())
|
.filter(|child| !child.kind().is_error() && !child.kind().is_trivia())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Wrap the nodes from a marker up to (but excluding) the current token in
|
||||||
|
/// a new [inner node](`SyntaxNode::inner`) of the given kind. This is an
|
||||||
|
/// easy interface for creating nested syntax nodes _after_ having parsed
|
||||||
|
/// their children.
|
||||||
fn wrap(&mut self, from: Marker, kind: SyntaxKind) {
|
fn wrap(&mut self, from: Marker, kind: SyntaxKind) {
|
||||||
self.wrap_within(from, self.before_trivia(), kind);
|
self.wrap_within(from, self.before_trivia(), kind);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Wrap including any trailing trivia nodes.
|
||||||
fn wrap_all(&mut self, from: Marker, kind: SyntaxKind) {
|
fn wrap_all(&mut self, from: Marker, kind: SyntaxKind) {
|
||||||
self.wrap_within(from, Marker(self.nodes.len()), kind)
|
self.wrap_within(from, Marker(self.nodes.len()), kind)
|
||||||
}
|
}
|
||||||
@ -1681,11 +1802,14 @@ impl<'s> Parser<'s> {
|
|||||||
self.nodes.insert(from, SyntaxNode::inner(kind, children));
|
self.nodes.insert(from, SyntaxNode::inner(kind, children));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Enter a new [`LexMode`] that will affect subsequent tokens (does not
|
||||||
|
/// modify the current token).
|
||||||
fn enter(&mut self, mode: LexMode) {
|
fn enter(&mut self, mode: LexMode) {
|
||||||
self.modes.push(self.lexer.mode());
|
self.modes.push(self.lexer.mode());
|
||||||
self.lexer.set_mode(mode);
|
self.lexer.set_mode(mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Exit the current [`LexMode`], possibly re-lexing the current token.
|
||||||
fn exit(&mut self) {
|
fn exit(&mut self) {
|
||||||
let mode = self.modes.pop().unwrap();
|
let mode = self.modes.pop().unwrap();
|
||||||
if mode != self.lexer.mode() {
|
if mode != self.lexer.mode() {
|
||||||
@ -1697,10 +1821,13 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Enter a new [`NewlineMode`] that will affect subsequent tokens (does not
|
||||||
|
/// modify the current token).
|
||||||
fn enter_newline_mode(&mut self, stop: NewlineMode) {
|
fn enter_newline_mode(&mut self, stop: NewlineMode) {
|
||||||
self.newline_modes.push(stop);
|
self.newline_modes.push(stop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Exit the current [`NewlineMode`], possibly re-lexing the current token.
|
||||||
fn exit_newline_mode(&mut self) {
|
fn exit_newline_mode(&mut self) {
|
||||||
self.unskip();
|
self.unskip();
|
||||||
self.newline_modes.pop();
|
self.newline_modes.pop();
|
||||||
@ -1709,6 +1836,7 @@ impl<'s> Parser<'s> {
|
|||||||
self.skip();
|
self.skip();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Save a checkpoint of the parser state.
|
||||||
fn checkpoint(&self) -> Checkpoint<'s> {
|
fn checkpoint(&self) -> Checkpoint<'s> {
|
||||||
Checkpoint {
|
Checkpoint {
|
||||||
lexer: self.lexer.clone(),
|
lexer: self.lexer.clone(),
|
||||||
@ -1719,6 +1847,7 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reset the parser from a checkpoint.
|
||||||
fn restore(&mut self, checkpoint: Checkpoint<'s>) {
|
fn restore(&mut self, checkpoint: Checkpoint<'s>) {
|
||||||
self.lexer = checkpoint.lexer;
|
self.lexer = checkpoint.lexer;
|
||||||
self.prev_end = checkpoint.prev_end;
|
self.prev_end = checkpoint.prev_end;
|
||||||
@ -1727,6 +1856,7 @@ impl<'s> Parser<'s> {
|
|||||||
self.nodes.truncate(checkpoint.nodes);
|
self.nodes.truncate(checkpoint.nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move past trivia nodes in Code/Math.
|
||||||
fn skip(&mut self) {
|
fn skip(&mut self) {
|
||||||
if self.lexer.mode() != LexMode::Markup {
|
if self.lexer.mode() != LexMode::Markup {
|
||||||
while self.current.is_trivia() {
|
while self.current.is_trivia() {
|
||||||
@ -1736,6 +1866,8 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move the parser back to the start of this token or its leading trivia
|
||||||
|
/// (in Code/Math).
|
||||||
fn unskip(&mut self) {
|
fn unskip(&mut self) {
|
||||||
if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
|
if self.lexer.mode() != LexMode::Markup && self.prev_end != self.current_start {
|
||||||
while self.nodes.last().is_some_and(|last| last.kind().is_trivia()) {
|
while self.nodes.last().is_some_and(|last| last.kind().is_trivia()) {
|
||||||
@ -1747,6 +1879,7 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Save the current token to the `nodes` vector as an Inner or Error node.
|
||||||
fn save(&mut self) {
|
fn save(&mut self) {
|
||||||
let text = self.current_text();
|
let text = self.current_text();
|
||||||
if self.at(SyntaxKind::Error) {
|
if self.at(SyntaxKind::Error) {
|
||||||
@ -1761,21 +1894,24 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find the kind of the next non-trivia token in the lexer.
|
||||||
fn next_non_trivia(lexer: &mut Lexer<'s>) -> SyntaxKind {
|
fn next_non_trivia(lexer: &mut Lexer<'s>) -> SyntaxKind {
|
||||||
loop {
|
loop {
|
||||||
let next = lexer.next();
|
let next = lexer.next();
|
||||||
// Loop is terminable, because SyntaxKind::End is not a trivia.
|
// Loop is terminable, because `SyntaxKind::End` is not a trivia.
|
||||||
if !next.is_trivia() {
|
if !next.is_trivia() {
|
||||||
break next;
|
break next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move the lexer forward and prepare the current token. In Code, this
|
||||||
|
/// might insert a temporary [`SyntaxKind::End`] based on our newline mode.
|
||||||
fn lex(&mut self) {
|
fn lex(&mut self) {
|
||||||
self.current_start = self.lexer.cursor();
|
self.current_start = self.lexer.cursor();
|
||||||
self.current = self.lexer.next();
|
self.current = self.lexer.next();
|
||||||
|
|
||||||
// Special cases to handle newlines in code mode.
|
// Special cases to handle newlines in Code.
|
||||||
if self.lexer.mode() == LexMode::Code
|
if self.lexer.mode() == LexMode::Code
|
||||||
&& self.lexer.newline()
|
&& self.lexer.newline()
|
||||||
&& match self.newline_modes.last() {
|
&& match self.newline_modes.last() {
|
||||||
@ -1794,7 +1930,7 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Parser<'s> {
|
impl<'s> Parser<'s> {
|
||||||
/// Consume the given syntax `kind` or produce an error.
|
/// Consume the given `kind` or produce an error.
|
||||||
fn expect(&mut self, kind: SyntaxKind) -> bool {
|
fn expect(&mut self, kind: SyntaxKind) -> bool {
|
||||||
let at = self.at(kind);
|
let at = self.at(kind);
|
||||||
if at {
|
if at {
|
||||||
@ -1833,7 +1969,7 @@ impl<'s> Parser<'s> {
|
|||||||
self.nodes.insert(m.0, error);
|
self.nodes.insert(m.0, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produce a hint.
|
/// Add a hint to a trailing error.
|
||||||
fn hint(&mut self, hint: &str) {
|
fn hint(&mut self, hint: &str) {
|
||||||
let m = self.before_trivia();
|
let m = self.before_trivia();
|
||||||
if let Some(error) = self.nodes.get_mut(m.0 - 1) {
|
if let Some(error) = self.nodes.get_mut(m.0 - 1) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user