use lexer subtrees to parse decorators

This commit is contained in:
PgBiel 2024-06-20 22:20:39 -03:00
parent 3e69560267
commit 9f9e8770b8
5 changed files with 44 additions and 59 deletions

View File

@ -286,7 +286,6 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
SyntaxKind::Destructuring => None, SyntaxKind::Destructuring => None,
SyntaxKind::DestructAssignment => None, SyntaxKind::DestructAssignment => None,
SyntaxKind::DecoratorMarker => Some(Tag::Comment),
SyntaxKind::Decorator => None, SyntaxKind::Decorator => None,
SyntaxKind::LineComment => Some(Tag::Comment), SyntaxKind::LineComment => Some(Tag::Comment),

View File

@ -13,8 +13,8 @@ pub enum SyntaxKind {
LineComment, LineComment,
/// A block comment: `/* ... */`. /// A block comment: `/* ... */`.
BlockComment, BlockComment,
/// A decorator's marker: `/!` /// A decorator: `/! allow("warning")`
DecoratorMarker, Decorator,
/// The contents of a file or content block. /// The contents of a file or content block.
Markup, Markup,
@ -280,9 +280,6 @@ pub enum SyntaxKind {
Destructuring, Destructuring,
/// A destructuring assignment expression: `(x, y) = (1, 2)`. /// A destructuring assignment expression: `(x, y) = (1, 2)`.
DestructAssignment, DestructAssignment,
/// A decorator: `/! allow("amogus")`
Decorator,
} }
impl SyntaxKind { impl SyntaxKind {
@ -360,7 +357,11 @@ impl SyntaxKind {
pub fn is_trivia(self) -> bool { pub fn is_trivia(self) -> bool {
matches!( matches!(
self, self,
Self::LineComment | Self::BlockComment | Self::Space | Self::Parbreak Self::LineComment
| Self::BlockComment
| Self::Space
| Self::Parbreak
| Self::Decorator
) )
} }
@ -376,7 +377,7 @@ impl SyntaxKind {
Self::Error => "syntax error", Self::Error => "syntax error",
Self::LineComment => "line comment", Self::LineComment => "line comment",
Self::BlockComment => "block comment", Self::BlockComment => "block comment",
Self::DecoratorMarker => "decorator marker", Self::Decorator => "decorator",
Self::Markup => "markup", Self::Markup => "markup",
Self::Text => "text", Self::Text => "text",
Self::Space => "space", Self::Space => "space",
@ -504,7 +505,6 @@ impl SyntaxKind {
Self::FuncReturn => "`return` expression", Self::FuncReturn => "`return` expression",
Self::Destructuring => "destructuring pattern", Self::Destructuring => "destructuring pattern",
Self::DestructAssignment => "destructuring assignment expression", Self::DestructAssignment => "destructuring assignment expression",
Self::Decorator => "decorator",
} }
} }
} }

View File

@ -18,8 +18,10 @@ pub(super) struct Lexer<'s> {
newline: bool, newline: bool,
/// The state held by raw line lexing. /// The state held by raw line lexing.
raw: Vec<(SyntaxKind, usize)>, raw: Vec<(SyntaxKind, usize)>,
/// The state held by decorator lexing. /// The subtree of tokens associated with this token.
decorator: Vec<(SyntaxKind, usize)>, /// The parser is responsible for converting this subtree into syntax nodes
/// matching this structure.
subtree: Vec<(SyntaxKind, usize)>,
/// An error for the last token. /// An error for the last token.
error: Option<SyntaxError>, error: Option<SyntaxError>,
} }
@ -35,8 +37,6 @@ pub(super) enum LexMode {
Code, Code,
/// The contents of a raw block. /// The contents of a raw block.
Raw, Raw,
/// The contents of a decorator.
Decorator,
} }
impl<'s> Lexer<'s> { impl<'s> Lexer<'s> {
@ -49,7 +49,7 @@ impl<'s> Lexer<'s> {
newline: false, newline: false,
error: None, error: None,
raw: Vec::new(), raw: Vec::new(),
decorator: Vec::new(), subtree: Vec::new(),
} }
} }
@ -113,16 +113,9 @@ impl Lexer<'_> {
return kind; return kind;
} }
if self.mode == LexMode::Decorator {
let Some((kind, end)) = self.decorator.pop() else {
return SyntaxKind::End;
};
self.s.jump(end);
return kind;
}
self.newline = false; self.newline = false;
self.error = None; self.error = None;
self.subtree.clear();
let start = self.s.cursor(); let start = self.s.cursor();
match self.s.eat() { match self.s.eat() {
Some(c) if is_space(c, self.mode) => self.whitespace(start, c), Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
@ -142,13 +135,17 @@ impl Lexer<'_> {
LexMode::Math => self.math(start, c), LexMode::Math => self.math(start, c),
LexMode::Code => self.code(start, c), LexMode::Code => self.code(start, c),
LexMode::Raw => unreachable!(), LexMode::Raw => unreachable!(),
LexMode::Decorator => unreachable!(),
}, },
None => SyntaxKind::End, None => SyntaxKind::End,
} }
} }
/// Takes the subtree associated with the latest token.
pub fn take_subtree(&mut self) -> Vec<(SyntaxKind, usize)> {
std::mem::take(&mut self.subtree)
}
/// Eat whitespace characters greedily. /// Eat whitespace characters greedily.
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind { fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
let more = self.s.eat_while(|c| is_space(c, self.mode)); let more = self.s.eat_while(|c| is_space(c, self.mode));
@ -194,15 +191,8 @@ impl Lexer<'_> {
SyntaxKind::BlockComment SyntaxKind::BlockComment
} }
}
/// Decorators.
impl Lexer<'_> {
fn decorator(&mut self) -> SyntaxKind { fn decorator(&mut self) -> SyntaxKind {
let start = self.s.cursor() - 2;
self.decorator.clear();
while !self.s.eat_newline() { while !self.s.eat_newline() {
let start = self.s.cursor(); let start = self.s.cursor();
let token = match self.s.eat() { let token = match self.s.eat() {
@ -215,25 +205,20 @@ impl Lexer<'_> {
Some(c @ '0'..='9') => self.number(start, c), Some(c @ '0'..='9') => self.number(start, c),
Some(',') => SyntaxKind::Comma, Some(',') => SyntaxKind::Comma,
Some(c) if is_id_start(c) => self.ident(start), Some(c) if is_id_start(c) => self.ident(start),
Some(c) => { Some(c) => self
return self.error(eco_format!( .error(eco_format!("the character {c} is not valid in a decorator")),
"the character {c} is not valid in a decorator"
))
}
None => break, None => break,
}; };
let end = self.s.cursor(); if token.is_error() {
self.decorator.push((token, end)); return token;
} }
// The saved tokens will be removed in reverse. let end = self.s.cursor();
self.decorator.reverse(); self.subtree.push((token, end));
}
// Already collected all we need from the decorator. SyntaxKind::Decorator
self.s.jump(start + 2);
SyntaxKind::DecoratorMarker
} }
} }

View File

@ -108,7 +108,8 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
SyntaxKind::Space SyntaxKind::Space
| SyntaxKind::Parbreak | SyntaxKind::Parbreak
| SyntaxKind::LineComment | SyntaxKind::LineComment
| SyntaxKind::BlockComment => { | SyntaxKind::BlockComment
| SyntaxKind::Decorator => {
p.eat(); p.eat();
return; return;
} }
@ -121,7 +122,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Link | SyntaxKind::Link
| SyntaxKind::Label => p.eat(), | SyntaxKind::Label => p.eat(),
SyntaxKind::DecoratorMarker => decorator(p),
SyntaxKind::Hash => embedded_code_expr(p), SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::Star => strong(p), SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p), SyntaxKind::Underscore => emph(p),
@ -147,19 +147,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
*at_start = false; *at_start = false;
} }
fn decorator(p: &mut Parser) {
let m = p.marker();
p.enter(LexMode::Decorator);
p.assert(SyntaxKind::DecoratorMarker);
while !p.end() {
p.eat();
}
p.exit();
p.wrap(m, SyntaxKind::Decorator);
}
/// Parses strong content: `*Strong*`. /// Parses strong content: `*Strong*`.
fn strong(p: &mut Parser) { fn strong(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new() const END: SyntaxSet = SyntaxSet::new()
@ -1775,9 +1762,23 @@ impl<'s> Parser<'s> {
fn save(&mut self) { fn save(&mut self) {
let text = self.current_text(); let text = self.current_text();
let subtree = self.lexer.take_subtree();
if self.at(SyntaxKind::Error) { if self.at(SyntaxKind::Error) {
let error = self.lexer.take_error().unwrap(); let error = self.lexer.take_error().unwrap();
self.nodes.push(SyntaxNode::error(error, text)); self.nodes.push(SyntaxNode::error(error, text));
} else if !subtree.is_empty() {
let mut text_cursor = self.current_start;
let mut children = Vec::with_capacity(subtree.len());
for (kind, end) in subtree {
// Ensure no errors in the subtree
assert!(!kind.is_error());
children.push(SyntaxNode::leaf(kind, &self.text[text_cursor..end]));
text_cursor = end;
}
self.nodes.push(SyntaxNode::inner(self.current, children));
} else { } else {
self.nodes.push(SyntaxNode::leaf(self.current, text)); self.nodes.push(SyntaxNode::leaf(self.current, text));
} }

View File

@ -54,7 +54,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Parbreak) .add(SyntaxKind::Parbreak)
.add(SyntaxKind::LineComment) .add(SyntaxKind::LineComment)
.add(SyntaxKind::BlockComment) .add(SyntaxKind::BlockComment)
.add(SyntaxKind::DecoratorMarker) .add(SyntaxKind::Decorator)
.add(SyntaxKind::Text) .add(SyntaxKind::Text)
.add(SyntaxKind::Linebreak) .add(SyntaxKind::Linebreak)
.add(SyntaxKind::Escape) .add(SyntaxKind::Escape)