proper decorator parsing

done directly in the lexer.
This commit is contained in:
PgBiel 2024-06-24 15:54:56 -03:00
parent 7f1e2fc513
commit 910af8322c
2 changed files with 191 additions and 16 deletions

View File

@ -138,7 +138,17 @@ impl Lexer<'_> {
self.emit_token(token, start)
}
/// Constructs an error node with the given message.
/// The node's text is taken from the given start position up to and
/// including the current cursor position.
fn emit_error(&self, message: impl Into<EcoString>, start: usize) -> SyntaxNode {
let text = self.s.from(start);
SyntaxNode::error(SyntaxError::new(message), text)
}
/// Converts a token into a syntax node based on its kind.
/// The node's text is taken from the given start position up to and
/// including the current cursor position.
/// Produces an error node if there are errors.
fn emit_token(&mut self, kind: SyntaxKind, start: usize) -> SyntaxNode {
let text = self.s.from(start);
@ -196,34 +206,132 @@ impl Lexer<'_> {
SyntaxKind::BlockComment
}
}
/// Decorator lexing and auxiliary methods.
impl Lexer<'_> {
fn decorator(&mut self, start: usize) -> SyntaxNode {
// TODO: DecoratorMarker node
let mut current_start = start;
let current_start = start;
let mut subtree = vec![];
// Ignore initial non-newline whitespaces
if !self.s.eat_while(is_inline_whitespace).is_empty() {
subtree.push(self.emit_token(SyntaxKind::Space, current_start));
}
// Decorator's name
let current_start = self.s.cursor();
if !self.s.eat_if(is_id_start) {
self.s.eat_until(is_newline);
subtree.push(self.emit_error("expected identifier", current_start));
// Return a single error node until the end of the decorator.
return SyntaxNode::inner(SyntaxKind::Decorator, subtree);
}
self.s.eat_while(is_id_continue);
let ident = self.s.from(current_start);
subtree.push(if ident == "allow" {
self.emit_token(SyntaxKind::Ident, current_start)
} else {
self.emit_error(
eco_format!("expected decorator name 'allow', found '{ident}'"),
current_start,
)
});
// Left parenthesis before decorator arguments
let current_start = self.s.cursor();
if !self.s.eat_if('(') {
self.s.eat_until(is_newline);
subtree.push(self.emit_error("expected left parenthesis", current_start));
// Return a single error node until the end of the decorator.
return SyntaxNode::inner(SyntaxKind::Decorator, subtree);
}
subtree.push(self.emit_token(SyntaxKind::LeftParen, current_start));
// Decorator arguments
// Keep reading until we find a right parenthesis or newline.
// We have to check the newline before eating (through '.peek()') to
// ensure it is not considered part of the decorator.
let mut current_start = self.s.cursor();
let mut expecting_comma = false;
let mut finished = false;
while !self.s.peek().is_some_and(is_newline) {
let token = match self.s.eat() {
Some(c) if is_space(c, self.mode) => self.whitespace(current_start, c),
Some('/') if self.s.eat_if('/') => break,
Some(c) if c.is_whitespace() => {
self.s.eat_while(is_inline_whitespace);
SyntaxKind::Space
}
Some('/') if self.s.eat_if('/') => self.line_comment(),
Some('/') if self.s.eat_if('*') => self.block_comment(),
Some('(') => SyntaxKind::LeftParen,
Some(')') => SyntaxKind::RightParen,
Some('"') => self.string(),
Some(c @ '0'..='9') => self.number(current_start, c),
Some(',') => SyntaxKind::Comma,
Some(c) if is_id_start(c) => self.ident(current_start),
Some(c) => self
.error(eco_format!("the character {c} is not valid in a decorator")),
Some(_) if finished => {
// After we finished specifying arguments, there must only
// be whitespaces until the line ends.
self.s.eat_until(char::is_whitespace);
self.error("expected whitespace")
}
Some('"') if expecting_comma => {
self.s.eat_until(|c| c == ',' || is_newline(c));
self.error("expected comma")
}
Some('"') => {
expecting_comma = true;
self.decorator_string()
}
Some(',') if expecting_comma => {
expecting_comma = false;
SyntaxKind::Comma
}
Some(',') => self.error("unexpected comma"),
Some(')') => {
finished = true;
SyntaxKind::RightParen
}
Some(c) => self.error(eco_format!(
"the character '{c}' is not valid in a decorator"
)),
None => break,
};
let node = self.emit_token(token, current_start);
subtree.push(node);
current_start = self.s.cursor();
}
// Right parenthesis (covered above)
if !finished {
subtree.push(self.emit_error("expected right parenthesis", self.s.cursor()));
}
SyntaxNode::inner(SyntaxKind::Decorator, subtree)
}
fn decorator_string(&mut self) -> SyntaxKind {
// TODO: Allow more characters in decorators' strings, perhaps allowing
// newlines somehow.
// Could perhaps use one //! per line so we can break a decorator into
// multiple lines in a sensible way.
let start = self.s.cursor();
self.s.eat_while(|c| !is_newline(c) && c != '"');
let content = self.s.from(start);
if !self.s.eat_if('"') {
return self.error("unclosed string");
}
if let Some(c) = content.chars().find(|c| !is_valid_in_decorator_string(*c)) {
return self
.error(eco_format!("invalid character '{c}' in a decorator's string"));
}
SyntaxKind::Str
}
}
/// Markup.
@ -849,6 +957,13 @@ fn is_space(character: char, mode: LexMode) -> bool {
}
}
/// Whether a character is a whitespace but not interpreted as a newline by
/// Typst.
#[inline]
pub fn is_inline_whitespace(character: char) -> bool {
character.is_whitespace() && !is_newline(character)
}
/// Whether a character is interpreted as a newline by Typst.
#[inline]
pub fn is_newline(character: char) -> bool {
@ -981,6 +1096,12 @@ fn is_valid_in_label_literal(c: char) -> bool {
is_id_continue(c) || matches!(c, ':' | '.')
}
/// Whether a character can be part of a string in a decorator.
#[inline]
fn is_valid_in_decorator_string(c: char) -> bool {
is_id_continue(c) || c == '@' || c == '/' || c == '-'
}
/// Returns true if this string is valid in a label literal.
pub fn is_valid_label_literal_id(id: &str) -> bool {
!id.is_empty() && id.chars().all(is_valid_in_label_literal)

View File

@ -1,10 +1,9 @@
// Test decorators.
--- decorators ---
--- basic-decorators ---
/! allow()
/! allow("A")
/! allow(5)
/! allow("the")
/! allow("unnecessary-stars")
@ -20,12 +19,67 @@ $
#[*a*]
$
--- decorator-comments ---
/! allow("abc") // this is ok
/! allow("abc") /* this is ok */
/! allow("abc" /* this is ok */, "abc")
/! allow("abc" /*
this is ok
*/, "abc")
--- decorator-strings ---
/! allow("@some/thing-there123")
--- unknown-decorator ---
// Error: 4-12 expected decorator name 'allow', found 'whatever'
/! whatever()
--- invalid-decorator ---
// Error: 1-13 the character * is not valid in a decorator
/! invalid(*)
--- invalid-decorator-syntax ---
// Error: 10-11 the character '*' is not valid in a decorator
/! allow(*)
// Error: 10-11 the character '5' is not valid in a decorator
/! allow(5)
// Error: 4-18 expected identifier
/! 555!**INVALID!
// Error: 9-12 expected left parenthesis
/! allow)")
// Error: 10-14 unclosed string
// Error: 14 expected right parenthesis
/! allow("abc
// Error: 17-20 expected whitespace
/! allow("abc") abc
// Error: 16-26 expected comma
// Error: 26 expected right parenthesis
/! allow("abc" "abc") abc
// Error: 16-21 expected comma
/! allow("abc" "abc", "abc")
// Error: 10-11 unexpected comma
/! allow(, "abc", "abc", "abc")
--- invalid-decorator-strings ---
// Error: 10-15 invalid character ' ' in a decorator's string
/! allow("a b")
// Error: 10-18 invalid character '|' in a decorator's string
/! allow("aaaaa|")
// TODO: Why does this print / instead of \?
// Error: 10-18 invalid character '/' in a decorator's string
/! allow("aaaaa\")
--- allow-suppresses-warns ---