mirror of
https://github.com/typst/typst
synced 2025-07-01 09:42:53 +08:00
proper decorator parsing
done directly in the lexer.
This commit is contained in:
parent
7f1e2fc513
commit
910af8322c
@ -138,7 +138,17 @@ impl Lexer<'_> {
|
||||
self.emit_token(token, start)
|
||||
}
|
||||
|
||||
/// Constructs an error node with the given message.
|
||||
/// The node's text is taken from the given start position up to and
|
||||
/// including the current cursor position.
|
||||
fn emit_error(&self, message: impl Into<EcoString>, start: usize) -> SyntaxNode {
|
||||
let text = self.s.from(start);
|
||||
SyntaxNode::error(SyntaxError::new(message), text)
|
||||
}
|
||||
|
||||
/// Converts a token into a syntax node based on its kind.
|
||||
/// The node's text is taken from the given start position up to and
|
||||
/// including the current cursor position.
|
||||
/// Produces an error node if there are errors.
|
||||
fn emit_token(&mut self, kind: SyntaxKind, start: usize) -> SyntaxNode {
|
||||
let text = self.s.from(start);
|
||||
@ -196,34 +206,132 @@ impl Lexer<'_> {
|
||||
|
||||
SyntaxKind::BlockComment
|
||||
}
|
||||
}
|
||||
|
||||
/// Decorator lexing and auxiliary methods.
|
||||
impl Lexer<'_> {
|
||||
fn decorator(&mut self, start: usize) -> SyntaxNode {
|
||||
// TODO: DecoratorMarker node
|
||||
let mut current_start = start;
|
||||
let current_start = start;
|
||||
let mut subtree = vec![];
|
||||
|
||||
// Ignore initial non-newline whitespaces
|
||||
if !self.s.eat_while(is_inline_whitespace).is_empty() {
|
||||
subtree.push(self.emit_token(SyntaxKind::Space, current_start));
|
||||
}
|
||||
|
||||
// Decorator's name
|
||||
let current_start = self.s.cursor();
|
||||
if !self.s.eat_if(is_id_start) {
|
||||
self.s.eat_until(is_newline);
|
||||
subtree.push(self.emit_error("expected identifier", current_start));
|
||||
|
||||
// Return a single error node until the end of the decorator.
|
||||
return SyntaxNode::inner(SyntaxKind::Decorator, subtree);
|
||||
}
|
||||
|
||||
self.s.eat_while(is_id_continue);
|
||||
let ident = self.s.from(current_start);
|
||||
|
||||
subtree.push(if ident == "allow" {
|
||||
self.emit_token(SyntaxKind::Ident, current_start)
|
||||
} else {
|
||||
self.emit_error(
|
||||
eco_format!("expected decorator name 'allow', found '{ident}'"),
|
||||
current_start,
|
||||
)
|
||||
});
|
||||
|
||||
// Left parenthesis before decorator arguments
|
||||
let current_start = self.s.cursor();
|
||||
if !self.s.eat_if('(') {
|
||||
self.s.eat_until(is_newline);
|
||||
subtree.push(self.emit_error("expected left parenthesis", current_start));
|
||||
|
||||
// Return a single error node until the end of the decorator.
|
||||
return SyntaxNode::inner(SyntaxKind::Decorator, subtree);
|
||||
}
|
||||
|
||||
subtree.push(self.emit_token(SyntaxKind::LeftParen, current_start));
|
||||
|
||||
// Decorator arguments
|
||||
// Keep reading until we find a right parenthesis or newline.
|
||||
// We have to check the newline before eating (through '.peek()') to
|
||||
// ensure it is not considered part of the decorator.
|
||||
let mut current_start = self.s.cursor();
|
||||
let mut expecting_comma = false;
|
||||
let mut finished = false;
|
||||
while !self.s.peek().is_some_and(is_newline) {
|
||||
let token = match self.s.eat() {
|
||||
Some(c) if is_space(c, self.mode) => self.whitespace(current_start, c),
|
||||
Some('/') if self.s.eat_if('/') => break,
|
||||
Some(c) if c.is_whitespace() => {
|
||||
self.s.eat_while(is_inline_whitespace);
|
||||
SyntaxKind::Space
|
||||
}
|
||||
Some('/') if self.s.eat_if('/') => self.line_comment(),
|
||||
Some('/') if self.s.eat_if('*') => self.block_comment(),
|
||||
Some('(') => SyntaxKind::LeftParen,
|
||||
Some(')') => SyntaxKind::RightParen,
|
||||
Some('"') => self.string(),
|
||||
Some(c @ '0'..='9') => self.number(current_start, c),
|
||||
Some(',') => SyntaxKind::Comma,
|
||||
Some(c) if is_id_start(c) => self.ident(current_start),
|
||||
Some(c) => self
|
||||
.error(eco_format!("the character {c} is not valid in a decorator")),
|
||||
Some(_) if finished => {
|
||||
// After we finished specifying arguments, there must only
|
||||
// be whitespaces until the line ends.
|
||||
self.s.eat_until(char::is_whitespace);
|
||||
self.error("expected whitespace")
|
||||
}
|
||||
Some('"') if expecting_comma => {
|
||||
self.s.eat_until(|c| c == ',' || is_newline(c));
|
||||
self.error("expected comma")
|
||||
}
|
||||
Some('"') => {
|
||||
expecting_comma = true;
|
||||
self.decorator_string()
|
||||
}
|
||||
Some(',') if expecting_comma => {
|
||||
expecting_comma = false;
|
||||
SyntaxKind::Comma
|
||||
}
|
||||
Some(',') => self.error("unexpected comma"),
|
||||
Some(')') => {
|
||||
finished = true;
|
||||
SyntaxKind::RightParen
|
||||
}
|
||||
Some(c) => self.error(eco_format!(
|
||||
"the character '{c}' is not valid in a decorator"
|
||||
)),
|
||||
None => break,
|
||||
};
|
||||
|
||||
let node = self.emit_token(token, current_start);
|
||||
subtree.push(node);
|
||||
|
||||
current_start = self.s.cursor();
|
||||
}
|
||||
|
||||
// Right parenthesis (covered above)
|
||||
if !finished {
|
||||
subtree.push(self.emit_error("expected right parenthesis", self.s.cursor()));
|
||||
}
|
||||
|
||||
SyntaxNode::inner(SyntaxKind::Decorator, subtree)
|
||||
}
|
||||
|
||||
fn decorator_string(&mut self) -> SyntaxKind {
|
||||
// TODO: Allow more characters in decorators' strings, perhaps allowing
|
||||
// newlines somehow.
|
||||
// Could perhaps use one //! per line so we can break a decorator into
|
||||
// multiple lines in a sensible way.
|
||||
let start = self.s.cursor();
|
||||
self.s.eat_while(|c| !is_newline(c) && c != '"');
|
||||
|
||||
let content = self.s.from(start);
|
||||
if !self.s.eat_if('"') {
|
||||
return self.error("unclosed string");
|
||||
}
|
||||
|
||||
if let Some(c) = content.chars().find(|c| !is_valid_in_decorator_string(*c)) {
|
||||
return self
|
||||
.error(eco_format!("invalid character '{c}' in a decorator's string"));
|
||||
}
|
||||
|
||||
SyntaxKind::Str
|
||||
}
|
||||
}
|
||||
|
||||
/// Markup.
|
||||
@ -849,6 +957,13 @@ fn is_space(character: char, mode: LexMode) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a character is a whitespace but not interpreted as a newline by
|
||||
/// Typst.
|
||||
#[inline]
|
||||
pub fn is_inline_whitespace(character: char) -> bool {
|
||||
character.is_whitespace() && !is_newline(character)
|
||||
}
|
||||
|
||||
/// Whether a character is interpreted as a newline by Typst.
|
||||
#[inline]
|
||||
pub fn is_newline(character: char) -> bool {
|
||||
@ -981,6 +1096,12 @@ fn is_valid_in_label_literal(c: char) -> bool {
|
||||
is_id_continue(c) || matches!(c, ':' | '.')
|
||||
}
|
||||
|
||||
/// Whether a character can be part of a string in a decorator.
|
||||
#[inline]
|
||||
fn is_valid_in_decorator_string(c: char) -> bool {
|
||||
is_id_continue(c) || c == '@' || c == '/' || c == '-'
|
||||
}
|
||||
|
||||
/// Returns true if this string is valid in a label literal.
|
||||
pub fn is_valid_label_literal_id(id: &str) -> bool {
|
||||
!id.is_empty() && id.chars().all(is_valid_in_label_literal)
|
||||
|
@ -1,10 +1,9 @@
|
||||
// Test decorators.
|
||||
|
||||
--- decorators ---
|
||||
--- basic-decorators ---
|
||||
|
||||
/! allow()
|
||||
/! allow("A")
|
||||
/! allow(5)
|
||||
/! allow("the")
|
||||
|
||||
/! allow("unnecessary-stars")
|
||||
@ -20,12 +19,67 @@ $
|
||||
#[*a*]
|
||||
$
|
||||
|
||||
--- decorator-comments ---
|
||||
|
||||
/! allow("abc") // this is ok
|
||||
|
||||
/! allow("abc") /* this is ok */
|
||||
|
||||
/! allow("abc" /* this is ok */, "abc")
|
||||
|
||||
/! allow("abc" /*
|
||||
this is ok
|
||||
*/, "abc")
|
||||
|
||||
--- decorator-strings ---
|
||||
|
||||
/! allow("@some/thing-there123")
|
||||
|
||||
--- unknown-decorator ---
|
||||
// Error: 4-12 expected decorator name 'allow', found 'whatever'
|
||||
/! whatever()
|
||||
|
||||
--- invalid-decorator ---
|
||||
// Error: 1-13 the character * is not valid in a decorator
|
||||
/! invalid(*)
|
||||
--- invalid-decorator-syntax ---
|
||||
// Error: 10-11 the character '*' is not valid in a decorator
|
||||
/! allow(*)
|
||||
|
||||
// Error: 10-11 the character '5' is not valid in a decorator
|
||||
/! allow(5)
|
||||
|
||||
// Error: 4-18 expected identifier
|
||||
/! 555!**INVALID!
|
||||
|
||||
// Error: 9-12 expected left parenthesis
|
||||
/! allow)")
|
||||
|
||||
// Error: 10-14 unclosed string
|
||||
// Error: 14 expected right parenthesis
|
||||
/! allow("abc
|
||||
|
||||
// Error: 17-20 expected whitespace
|
||||
/! allow("abc") abc
|
||||
|
||||
// Error: 16-26 expected comma
|
||||
// Error: 26 expected right parenthesis
|
||||
/! allow("abc" "abc") abc
|
||||
|
||||
// Error: 16-21 expected comma
|
||||
/! allow("abc" "abc", "abc")
|
||||
|
||||
// Error: 10-11 unexpected comma
|
||||
/! allow(, "abc", "abc", "abc")
|
||||
|
||||
--- invalid-decorator-strings ---
|
||||
|
||||
// Error: 10-15 invalid character ' ' in a decorator's string
|
||||
/! allow("a b")
|
||||
|
||||
// Error: 10-18 invalid character '|' in a decorator's string
|
||||
/! allow("aaaaa|")
|
||||
|
||||
// TODO: Why does this print / instead of \?
|
||||
// Error: 10-18 invalid character '/' in a decorator's string
|
||||
/! allow("aaaaa\")
|
||||
|
||||
--- allow-suppresses-warns ---
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user