mirror of
https://github.com/typst/typst
synced 2025-07-01 17:52:52 +08:00
proper decorator parsing
done directly in the lexer.
This commit is contained in:
parent
7f1e2fc513
commit
910af8322c
@ -138,7 +138,17 @@ impl Lexer<'_> {
|
|||||||
self.emit_token(token, start)
|
self.emit_token(token, start)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Constructs an error node with the given message.
|
||||||
|
/// The node's text is taken from the given start position up to and
|
||||||
|
/// including the current cursor position.
|
||||||
|
fn emit_error(&self, message: impl Into<EcoString>, start: usize) -> SyntaxNode {
|
||||||
|
let text = self.s.from(start);
|
||||||
|
SyntaxNode::error(SyntaxError::new(message), text)
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts a token into a syntax node based on its kind.
|
/// Converts a token into a syntax node based on its kind.
|
||||||
|
/// The node's text is taken from the given start position up to and
|
||||||
|
/// including the current cursor position.
|
||||||
/// Produces an error node if there are errors.
|
/// Produces an error node if there are errors.
|
||||||
fn emit_token(&mut self, kind: SyntaxKind, start: usize) -> SyntaxNode {
|
fn emit_token(&mut self, kind: SyntaxKind, start: usize) -> SyntaxNode {
|
||||||
let text = self.s.from(start);
|
let text = self.s.from(start);
|
||||||
@ -196,34 +206,132 @@ impl Lexer<'_> {
|
|||||||
|
|
||||||
SyntaxKind::BlockComment
|
SyntaxKind::BlockComment
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decorator lexing and auxiliary methods.
|
||||||
|
impl Lexer<'_> {
|
||||||
fn decorator(&mut self, start: usize) -> SyntaxNode {
|
fn decorator(&mut self, start: usize) -> SyntaxNode {
|
||||||
// TODO: DecoratorMarker node
|
// TODO: DecoratorMarker node
|
||||||
let mut current_start = start;
|
let current_start = start;
|
||||||
let mut subtree = vec![];
|
let mut subtree = vec![];
|
||||||
|
|
||||||
|
// Ignore initial non-newline whitespaces
|
||||||
|
if !self.s.eat_while(is_inline_whitespace).is_empty() {
|
||||||
|
subtree.push(self.emit_token(SyntaxKind::Space, current_start));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decorator's name
|
||||||
|
let current_start = self.s.cursor();
|
||||||
|
if !self.s.eat_if(is_id_start) {
|
||||||
|
self.s.eat_until(is_newline);
|
||||||
|
subtree.push(self.emit_error("expected identifier", current_start));
|
||||||
|
|
||||||
|
// Return a single error node until the end of the decorator.
|
||||||
|
return SyntaxNode::inner(SyntaxKind::Decorator, subtree);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.s.eat_while(is_id_continue);
|
||||||
|
let ident = self.s.from(current_start);
|
||||||
|
|
||||||
|
subtree.push(if ident == "allow" {
|
||||||
|
self.emit_token(SyntaxKind::Ident, current_start)
|
||||||
|
} else {
|
||||||
|
self.emit_error(
|
||||||
|
eco_format!("expected decorator name 'allow', found '{ident}'"),
|
||||||
|
current_start,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
// Left parenthesis before decorator arguments
|
||||||
|
let current_start = self.s.cursor();
|
||||||
|
if !self.s.eat_if('(') {
|
||||||
|
self.s.eat_until(is_newline);
|
||||||
|
subtree.push(self.emit_error("expected left parenthesis", current_start));
|
||||||
|
|
||||||
|
// Return a single error node until the end of the decorator.
|
||||||
|
return SyntaxNode::inner(SyntaxKind::Decorator, subtree);
|
||||||
|
}
|
||||||
|
|
||||||
|
subtree.push(self.emit_token(SyntaxKind::LeftParen, current_start));
|
||||||
|
|
||||||
|
// Decorator arguments
|
||||||
|
// Keep reading until we find a right parenthesis or newline.
|
||||||
|
// We have to check the newline before eating (through '.peek()') to
|
||||||
|
// ensure it is not considered part of the decorator.
|
||||||
|
let mut current_start = self.s.cursor();
|
||||||
|
let mut expecting_comma = false;
|
||||||
|
let mut finished = false;
|
||||||
while !self.s.peek().is_some_and(is_newline) {
|
while !self.s.peek().is_some_and(is_newline) {
|
||||||
let token = match self.s.eat() {
|
let token = match self.s.eat() {
|
||||||
Some(c) if is_space(c, self.mode) => self.whitespace(current_start, c),
|
Some(c) if c.is_whitespace() => {
|
||||||
Some('/') if self.s.eat_if('/') => break,
|
self.s.eat_while(is_inline_whitespace);
|
||||||
|
SyntaxKind::Space
|
||||||
|
}
|
||||||
|
Some('/') if self.s.eat_if('/') => self.line_comment(),
|
||||||
Some('/') if self.s.eat_if('*') => self.block_comment(),
|
Some('/') if self.s.eat_if('*') => self.block_comment(),
|
||||||
Some('(') => SyntaxKind::LeftParen,
|
Some(_) if finished => {
|
||||||
Some(')') => SyntaxKind::RightParen,
|
// After we finished specifying arguments, there must only
|
||||||
Some('"') => self.string(),
|
// be whitespaces until the line ends.
|
||||||
Some(c @ '0'..='9') => self.number(current_start, c),
|
self.s.eat_until(char::is_whitespace);
|
||||||
Some(',') => SyntaxKind::Comma,
|
self.error("expected whitespace")
|
||||||
Some(c) if is_id_start(c) => self.ident(current_start),
|
}
|
||||||
Some(c) => self
|
Some('"') if expecting_comma => {
|
||||||
.error(eco_format!("the character {c} is not valid in a decorator")),
|
self.s.eat_until(|c| c == ',' || is_newline(c));
|
||||||
|
self.error("expected comma")
|
||||||
|
}
|
||||||
|
Some('"') => {
|
||||||
|
expecting_comma = true;
|
||||||
|
self.decorator_string()
|
||||||
|
}
|
||||||
|
Some(',') if expecting_comma => {
|
||||||
|
expecting_comma = false;
|
||||||
|
SyntaxKind::Comma
|
||||||
|
}
|
||||||
|
Some(',') => self.error("unexpected comma"),
|
||||||
|
Some(')') => {
|
||||||
|
finished = true;
|
||||||
|
SyntaxKind::RightParen
|
||||||
|
}
|
||||||
|
Some(c) => self.error(eco_format!(
|
||||||
|
"the character '{c}' is not valid in a decorator"
|
||||||
|
)),
|
||||||
None => break,
|
None => break,
|
||||||
};
|
};
|
||||||
|
|
||||||
let node = self.emit_token(token, current_start);
|
let node = self.emit_token(token, current_start);
|
||||||
subtree.push(node);
|
subtree.push(node);
|
||||||
|
|
||||||
current_start = self.s.cursor();
|
current_start = self.s.cursor();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Right parenthesis (covered above)
|
||||||
|
if !finished {
|
||||||
|
subtree.push(self.emit_error("expected right parenthesis", self.s.cursor()));
|
||||||
|
}
|
||||||
|
|
||||||
SyntaxNode::inner(SyntaxKind::Decorator, subtree)
|
SyntaxNode::inner(SyntaxKind::Decorator, subtree)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn decorator_string(&mut self) -> SyntaxKind {
|
||||||
|
// TODO: Allow more characters in decorators' strings, perhaps allowing
|
||||||
|
// newlines somehow.
|
||||||
|
// Could perhaps use one //! per line so we can break a decorator into
|
||||||
|
// multiple lines in a sensible way.
|
||||||
|
let start = self.s.cursor();
|
||||||
|
self.s.eat_while(|c| !is_newline(c) && c != '"');
|
||||||
|
|
||||||
|
let content = self.s.from(start);
|
||||||
|
if !self.s.eat_if('"') {
|
||||||
|
return self.error("unclosed string");
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(c) = content.chars().find(|c| !is_valid_in_decorator_string(*c)) {
|
||||||
|
return self
|
||||||
|
.error(eco_format!("invalid character '{c}' in a decorator's string"));
|
||||||
|
}
|
||||||
|
|
||||||
|
SyntaxKind::Str
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Markup.
|
/// Markup.
|
||||||
@ -849,6 +957,13 @@ fn is_space(character: char, mode: LexMode) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether a character is a whitespace but not interpreted as a newline by
|
||||||
|
/// Typst.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_inline_whitespace(character: char) -> bool {
|
||||||
|
character.is_whitespace() && !is_newline(character)
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether a character is interpreted as a newline by Typst.
|
/// Whether a character is interpreted as a newline by Typst.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_newline(character: char) -> bool {
|
pub fn is_newline(character: char) -> bool {
|
||||||
@ -981,6 +1096,12 @@ fn is_valid_in_label_literal(c: char) -> bool {
|
|||||||
is_id_continue(c) || matches!(c, ':' | '.')
|
is_id_continue(c) || matches!(c, ':' | '.')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether a character can be part of a string in a decorator.
|
||||||
|
#[inline]
|
||||||
|
fn is_valid_in_decorator_string(c: char) -> bool {
|
||||||
|
is_id_continue(c) || c == '@' || c == '/' || c == '-'
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if this string is valid in a label literal.
|
/// Returns true if this string is valid in a label literal.
|
||||||
pub fn is_valid_label_literal_id(id: &str) -> bool {
|
pub fn is_valid_label_literal_id(id: &str) -> bool {
|
||||||
!id.is_empty() && id.chars().all(is_valid_in_label_literal)
|
!id.is_empty() && id.chars().all(is_valid_in_label_literal)
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
// Test decorators.
|
// Test decorators.
|
||||||
|
|
||||||
--- decorators ---
|
--- basic-decorators ---
|
||||||
|
|
||||||
/! allow()
|
/! allow()
|
||||||
/! allow("A")
|
/! allow("A")
|
||||||
/! allow(5)
|
|
||||||
/! allow("the")
|
/! allow("the")
|
||||||
|
|
||||||
/! allow("unnecessary-stars")
|
/! allow("unnecessary-stars")
|
||||||
@ -20,12 +19,67 @@ $
|
|||||||
#[*a*]
|
#[*a*]
|
||||||
$
|
$
|
||||||
|
|
||||||
|
--- decorator-comments ---
|
||||||
|
|
||||||
|
/! allow("abc") // this is ok
|
||||||
|
|
||||||
|
/! allow("abc") /* this is ok */
|
||||||
|
|
||||||
|
/! allow("abc" /* this is ok */, "abc")
|
||||||
|
|
||||||
|
/! allow("abc" /*
|
||||||
|
this is ok
|
||||||
|
*/, "abc")
|
||||||
|
|
||||||
|
--- decorator-strings ---
|
||||||
|
|
||||||
|
/! allow("@some/thing-there123")
|
||||||
|
|
||||||
--- unknown-decorator ---
|
--- unknown-decorator ---
|
||||||
|
// Error: 4-12 expected decorator name 'allow', found 'whatever'
|
||||||
/! whatever()
|
/! whatever()
|
||||||
|
|
||||||
--- invalid-decorator ---
|
--- invalid-decorator-syntax ---
|
||||||
// Error: 1-13 the character * is not valid in a decorator
|
// Error: 10-11 the character '*' is not valid in a decorator
|
||||||
/! invalid(*)
|
/! allow(*)
|
||||||
|
|
||||||
|
// Error: 10-11 the character '5' is not valid in a decorator
|
||||||
|
/! allow(5)
|
||||||
|
|
||||||
|
// Error: 4-18 expected identifier
|
||||||
|
/! 555!**INVALID!
|
||||||
|
|
||||||
|
// Error: 9-12 expected left parenthesis
|
||||||
|
/! allow)")
|
||||||
|
|
||||||
|
// Error: 10-14 unclosed string
|
||||||
|
// Error: 14 expected right parenthesis
|
||||||
|
/! allow("abc
|
||||||
|
|
||||||
|
// Error: 17-20 expected whitespace
|
||||||
|
/! allow("abc") abc
|
||||||
|
|
||||||
|
// Error: 16-26 expected comma
|
||||||
|
// Error: 26 expected right parenthesis
|
||||||
|
/! allow("abc" "abc") abc
|
||||||
|
|
||||||
|
// Error: 16-21 expected comma
|
||||||
|
/! allow("abc" "abc", "abc")
|
||||||
|
|
||||||
|
// Error: 10-11 unexpected comma
|
||||||
|
/! allow(, "abc", "abc", "abc")
|
||||||
|
|
||||||
|
--- invalid-decorator-strings ---
|
||||||
|
|
||||||
|
// Error: 10-15 invalid character ' ' in a decorator's string
|
||||||
|
/! allow("a b")
|
||||||
|
|
||||||
|
// Error: 10-18 invalid character '|' in a decorator's string
|
||||||
|
/! allow("aaaaa|")
|
||||||
|
|
||||||
|
// TODO: Why does this print / instead of \?
|
||||||
|
// Error: 10-18 invalid character '/' in a decorator's string
|
||||||
|
/! allow("aaaaa\")
|
||||||
|
|
||||||
--- allow-suppresses-warns ---
|
--- allow-suppresses-warns ---
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user