use lexer subtrees to parse decorators

This commit is contained in:
PgBiel 2024-06-20 22:20:39 -03:00
parent 3e69560267
commit 9f9e8770b8
5 changed files with 44 additions and 59 deletions

View File

@ -286,7 +286,6 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
SyntaxKind::Destructuring => None,
SyntaxKind::DestructAssignment => None,
SyntaxKind::DecoratorMarker => Some(Tag::Comment),
SyntaxKind::Decorator => None,
SyntaxKind::LineComment => Some(Tag::Comment),

View File

@ -13,8 +13,8 @@ pub enum SyntaxKind {
LineComment,
/// A block comment: `/* ... */`.
BlockComment,
/// A decorator's marker: `/!`
DecoratorMarker,
/// A decorator: `/! allow("warning")`
Decorator,
/// The contents of a file or content block.
Markup,
@ -280,9 +280,6 @@ pub enum SyntaxKind {
Destructuring,
/// A destructuring assignment expression: `(x, y) = (1, 2)`.
DestructAssignment,
/// A decorator: `/! allow("amogus")`
Decorator,
}
impl SyntaxKind {
@ -360,7 +357,11 @@ impl SyntaxKind {
pub fn is_trivia(self) -> bool {
matches!(
self,
Self::LineComment | Self::BlockComment | Self::Space | Self::Parbreak
Self::LineComment
| Self::BlockComment
| Self::Space
| Self::Parbreak
| Self::Decorator
)
}
@ -376,7 +377,7 @@ impl SyntaxKind {
Self::Error => "syntax error",
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
Self::DecoratorMarker => "decorator marker",
Self::Decorator => "decorator",
Self::Markup => "markup",
Self::Text => "text",
Self::Space => "space",
@ -504,7 +505,6 @@ impl SyntaxKind {
Self::FuncReturn => "`return` expression",
Self::Destructuring => "destructuring pattern",
Self::DestructAssignment => "destructuring assignment expression",
Self::Decorator => "decorator",
}
}
}

View File

@ -18,8 +18,10 @@ pub(super) struct Lexer<'s> {
newline: bool,
/// The state held by raw line lexing.
raw: Vec<(SyntaxKind, usize)>,
/// The state held by decorator lexing.
decorator: Vec<(SyntaxKind, usize)>,
/// The subtree of tokens associated with this token.
/// The parser is responsible for converting this subtree into syntax nodes
/// matching this structure.
subtree: Vec<(SyntaxKind, usize)>,
/// An error for the last token.
error: Option<SyntaxError>,
}
@ -35,8 +37,6 @@ pub(super) enum LexMode {
Code,
/// The contents of a raw block.
Raw,
/// The contents of a decorator.
Decorator,
}
impl<'s> Lexer<'s> {
@ -49,7 +49,7 @@ impl<'s> Lexer<'s> {
newline: false,
error: None,
raw: Vec::new(),
decorator: Vec::new(),
subtree: Vec::new(),
}
}
@ -113,16 +113,9 @@ impl Lexer<'_> {
return kind;
}
if self.mode == LexMode::Decorator {
let Some((kind, end)) = self.decorator.pop() else {
return SyntaxKind::End;
};
self.s.jump(end);
return kind;
}
self.newline = false;
self.error = None;
self.subtree.clear();
let start = self.s.cursor();
match self.s.eat() {
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
@ -142,13 +135,17 @@ impl Lexer<'_> {
LexMode::Math => self.math(start, c),
LexMode::Code => self.code(start, c),
LexMode::Raw => unreachable!(),
LexMode::Decorator => unreachable!(),
},
None => SyntaxKind::End,
}
}
/// Takes the subtree associated with the latest token.
pub fn take_subtree(&mut self) -> Vec<(SyntaxKind, usize)> {
std::mem::take(&mut self.subtree)
}
/// Eat whitespace characters greedily.
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
let more = self.s.eat_while(|c| is_space(c, self.mode));
@ -194,15 +191,8 @@ impl Lexer<'_> {
SyntaxKind::BlockComment
}
}
/// Decorators.
impl Lexer<'_> {
fn decorator(&mut self) -> SyntaxKind {
let start = self.s.cursor() - 2;
self.decorator.clear();
while !self.s.eat_newline() {
let start = self.s.cursor();
let token = match self.s.eat() {
@ -215,25 +205,20 @@ impl Lexer<'_> {
Some(c @ '0'..='9') => self.number(start, c),
Some(',') => SyntaxKind::Comma,
Some(c) if is_id_start(c) => self.ident(start),
Some(c) => {
return self.error(eco_format!(
"the character {c} is not valid in a decorator"
))
}
Some(c) => self
.error(eco_format!("the character {c} is not valid in a decorator")),
None => break,
};
let end = self.s.cursor();
self.decorator.push((token, end));
if token.is_error() {
return token;
}
// The saved tokens will be removed in reverse.
self.decorator.reverse();
let end = self.s.cursor();
self.subtree.push((token, end));
}
// Already collected all we need from the decorator.
self.s.jump(start + 2);
SyntaxKind::DecoratorMarker
SyntaxKind::Decorator
}
}

View File

@ -108,7 +108,8 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
SyntaxKind::Space
| SyntaxKind::Parbreak
| SyntaxKind::LineComment
| SyntaxKind::BlockComment => {
| SyntaxKind::BlockComment
| SyntaxKind::Decorator => {
p.eat();
return;
}
@ -121,7 +122,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::Link
| SyntaxKind::Label => p.eat(),
SyntaxKind::DecoratorMarker => decorator(p),
SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p),
@ -147,19 +147,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
*at_start = false;
}
fn decorator(p: &mut Parser) {
let m = p.marker();
p.enter(LexMode::Decorator);
p.assert(SyntaxKind::DecoratorMarker);
while !p.end() {
p.eat();
}
p.exit();
p.wrap(m, SyntaxKind::Decorator);
}
/// Parses strong content: `*Strong*`.
fn strong(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
@ -1775,9 +1762,23 @@ impl<'s> Parser<'s> {
fn save(&mut self) {
let text = self.current_text();
let subtree = self.lexer.take_subtree();
if self.at(SyntaxKind::Error) {
let error = self.lexer.take_error().unwrap();
self.nodes.push(SyntaxNode::error(error, text));
} else if !subtree.is_empty() {
let mut text_cursor = self.current_start;
let mut children = Vec::with_capacity(subtree.len());
for (kind, end) in subtree {
// Ensure no errors in the subtree
assert!(!kind.is_error());
children.push(SyntaxNode::leaf(kind, &self.text[text_cursor..end]));
text_cursor = end;
}
self.nodes.push(SyntaxNode::inner(self.current, children));
} else {
self.nodes.push(SyntaxNode::leaf(self.current, text));
}

View File

@ -54,7 +54,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::LineComment)
.add(SyntaxKind::BlockComment)
.add(SyntaxKind::DecoratorMarker)
.add(SyntaxKind::Decorator)
.add(SyntaxKind::Text)
.add(SyntaxKind::Linebreak)
.add(SyntaxKind::Escape)