mirror of
https://github.com/typst/typst
synced 2025-05-16 18:15:29 +08:00
use lexer subtrees to parse decorators
This commit is contained in:
parent
3e69560267
commit
9f9e8770b8
@ -286,7 +286,6 @@ pub fn highlight(node: &LinkedNode) -> Option<Tag> {
|
|||||||
SyntaxKind::Destructuring => None,
|
SyntaxKind::Destructuring => None,
|
||||||
SyntaxKind::DestructAssignment => None,
|
SyntaxKind::DestructAssignment => None,
|
||||||
|
|
||||||
SyntaxKind::DecoratorMarker => Some(Tag::Comment),
|
|
||||||
SyntaxKind::Decorator => None,
|
SyntaxKind::Decorator => None,
|
||||||
|
|
||||||
SyntaxKind::LineComment => Some(Tag::Comment),
|
SyntaxKind::LineComment => Some(Tag::Comment),
|
||||||
|
@ -13,8 +13,8 @@ pub enum SyntaxKind {
|
|||||||
LineComment,
|
LineComment,
|
||||||
/// A block comment: `/* ... */`.
|
/// A block comment: `/* ... */`.
|
||||||
BlockComment,
|
BlockComment,
|
||||||
/// A decorator's marker: `/!`
|
/// A decorator: `/! allow("warning")`
|
||||||
DecoratorMarker,
|
Decorator,
|
||||||
|
|
||||||
/// The contents of a file or content block.
|
/// The contents of a file or content block.
|
||||||
Markup,
|
Markup,
|
||||||
@ -280,9 +280,6 @@ pub enum SyntaxKind {
|
|||||||
Destructuring,
|
Destructuring,
|
||||||
/// A destructuring assignment expression: `(x, y) = (1, 2)`.
|
/// A destructuring assignment expression: `(x, y) = (1, 2)`.
|
||||||
DestructAssignment,
|
DestructAssignment,
|
||||||
|
|
||||||
/// A decorator: `/! allow("amogus")`
|
|
||||||
Decorator,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SyntaxKind {
|
impl SyntaxKind {
|
||||||
@ -360,7 +357,11 @@ impl SyntaxKind {
|
|||||||
pub fn is_trivia(self) -> bool {
|
pub fn is_trivia(self) -> bool {
|
||||||
matches!(
|
matches!(
|
||||||
self,
|
self,
|
||||||
Self::LineComment | Self::BlockComment | Self::Space | Self::Parbreak
|
Self::LineComment
|
||||||
|
| Self::BlockComment
|
||||||
|
| Self::Space
|
||||||
|
| Self::Parbreak
|
||||||
|
| Self::Decorator
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -376,7 +377,7 @@ impl SyntaxKind {
|
|||||||
Self::Error => "syntax error",
|
Self::Error => "syntax error",
|
||||||
Self::LineComment => "line comment",
|
Self::LineComment => "line comment",
|
||||||
Self::BlockComment => "block comment",
|
Self::BlockComment => "block comment",
|
||||||
Self::DecoratorMarker => "decorator marker",
|
Self::Decorator => "decorator",
|
||||||
Self::Markup => "markup",
|
Self::Markup => "markup",
|
||||||
Self::Text => "text",
|
Self::Text => "text",
|
||||||
Self::Space => "space",
|
Self::Space => "space",
|
||||||
@ -504,7 +505,6 @@ impl SyntaxKind {
|
|||||||
Self::FuncReturn => "`return` expression",
|
Self::FuncReturn => "`return` expression",
|
||||||
Self::Destructuring => "destructuring pattern",
|
Self::Destructuring => "destructuring pattern",
|
||||||
Self::DestructAssignment => "destructuring assignment expression",
|
Self::DestructAssignment => "destructuring assignment expression",
|
||||||
Self::Decorator => "decorator",
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,8 +18,10 @@ pub(super) struct Lexer<'s> {
|
|||||||
newline: bool,
|
newline: bool,
|
||||||
/// The state held by raw line lexing.
|
/// The state held by raw line lexing.
|
||||||
raw: Vec<(SyntaxKind, usize)>,
|
raw: Vec<(SyntaxKind, usize)>,
|
||||||
/// The state held by decorator lexing.
|
/// The subtree of tokens associated with this token.
|
||||||
decorator: Vec<(SyntaxKind, usize)>,
|
/// The parser is responsible for converting this subtree into syntax nodes
|
||||||
|
/// matching this structure.
|
||||||
|
subtree: Vec<(SyntaxKind, usize)>,
|
||||||
/// An error for the last token.
|
/// An error for the last token.
|
||||||
error: Option<SyntaxError>,
|
error: Option<SyntaxError>,
|
||||||
}
|
}
|
||||||
@ -35,8 +37,6 @@ pub(super) enum LexMode {
|
|||||||
Code,
|
Code,
|
||||||
/// The contents of a raw block.
|
/// The contents of a raw block.
|
||||||
Raw,
|
Raw,
|
||||||
/// The contents of a decorator.
|
|
||||||
Decorator,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Lexer<'s> {
|
impl<'s> Lexer<'s> {
|
||||||
@ -49,7 +49,7 @@ impl<'s> Lexer<'s> {
|
|||||||
newline: false,
|
newline: false,
|
||||||
error: None,
|
error: None,
|
||||||
raw: Vec::new(),
|
raw: Vec::new(),
|
||||||
decorator: Vec::new(),
|
subtree: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,16 +113,9 @@ impl Lexer<'_> {
|
|||||||
return kind;
|
return kind;
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.mode == LexMode::Decorator {
|
|
||||||
let Some((kind, end)) = self.decorator.pop() else {
|
|
||||||
return SyntaxKind::End;
|
|
||||||
};
|
|
||||||
self.s.jump(end);
|
|
||||||
return kind;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.newline = false;
|
self.newline = false;
|
||||||
self.error = None;
|
self.error = None;
|
||||||
|
self.subtree.clear();
|
||||||
let start = self.s.cursor();
|
let start = self.s.cursor();
|
||||||
match self.s.eat() {
|
match self.s.eat() {
|
||||||
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
|
Some(c) if is_space(c, self.mode) => self.whitespace(start, c),
|
||||||
@ -142,13 +135,17 @@ impl Lexer<'_> {
|
|||||||
LexMode::Math => self.math(start, c),
|
LexMode::Math => self.math(start, c),
|
||||||
LexMode::Code => self.code(start, c),
|
LexMode::Code => self.code(start, c),
|
||||||
LexMode::Raw => unreachable!(),
|
LexMode::Raw => unreachable!(),
|
||||||
LexMode::Decorator => unreachable!(),
|
|
||||||
},
|
},
|
||||||
|
|
||||||
None => SyntaxKind::End,
|
None => SyntaxKind::End,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Takes the subtree associated with the latest token.
|
||||||
|
pub fn take_subtree(&mut self) -> Vec<(SyntaxKind, usize)> {
|
||||||
|
std::mem::take(&mut self.subtree)
|
||||||
|
}
|
||||||
|
|
||||||
/// Eat whitespace characters greedily.
|
/// Eat whitespace characters greedily.
|
||||||
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
|
fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind {
|
||||||
let more = self.s.eat_while(|c| is_space(c, self.mode));
|
let more = self.s.eat_while(|c| is_space(c, self.mode));
|
||||||
@ -194,15 +191,8 @@ impl Lexer<'_> {
|
|||||||
|
|
||||||
SyntaxKind::BlockComment
|
SyntaxKind::BlockComment
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Decorators.
|
|
||||||
impl Lexer<'_> {
|
|
||||||
fn decorator(&mut self) -> SyntaxKind {
|
fn decorator(&mut self) -> SyntaxKind {
|
||||||
let start = self.s.cursor() - 2;
|
|
||||||
|
|
||||||
self.decorator.clear();
|
|
||||||
|
|
||||||
while !self.s.eat_newline() {
|
while !self.s.eat_newline() {
|
||||||
let start = self.s.cursor();
|
let start = self.s.cursor();
|
||||||
let token = match self.s.eat() {
|
let token = match self.s.eat() {
|
||||||
@ -215,25 +205,20 @@ impl Lexer<'_> {
|
|||||||
Some(c @ '0'..='9') => self.number(start, c),
|
Some(c @ '0'..='9') => self.number(start, c),
|
||||||
Some(',') => SyntaxKind::Comma,
|
Some(',') => SyntaxKind::Comma,
|
||||||
Some(c) if is_id_start(c) => self.ident(start),
|
Some(c) if is_id_start(c) => self.ident(start),
|
||||||
Some(c) => {
|
Some(c) => self
|
||||||
return self.error(eco_format!(
|
.error(eco_format!("the character {c} is not valid in a decorator")),
|
||||||
"the character {c} is not valid in a decorator"
|
|
||||||
))
|
|
||||||
}
|
|
||||||
None => break,
|
None => break,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if token.is_error() {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
let end = self.s.cursor();
|
let end = self.s.cursor();
|
||||||
self.decorator.push((token, end));
|
self.subtree.push((token, end));
|
||||||
}
|
}
|
||||||
|
|
||||||
// The saved tokens will be removed in reverse.
|
SyntaxKind::Decorator
|
||||||
self.decorator.reverse();
|
|
||||||
|
|
||||||
// Already collected all we need from the decorator.
|
|
||||||
self.s.jump(start + 2);
|
|
||||||
|
|
||||||
SyntaxKind::DecoratorMarker
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,7 +108,8 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
|
|||||||
SyntaxKind::Space
|
SyntaxKind::Space
|
||||||
| SyntaxKind::Parbreak
|
| SyntaxKind::Parbreak
|
||||||
| SyntaxKind::LineComment
|
| SyntaxKind::LineComment
|
||||||
| SyntaxKind::BlockComment => {
|
| SyntaxKind::BlockComment
|
||||||
|
| SyntaxKind::Decorator => {
|
||||||
p.eat();
|
p.eat();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -121,7 +122,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
|
|||||||
| SyntaxKind::Link
|
| SyntaxKind::Link
|
||||||
| SyntaxKind::Label => p.eat(),
|
| SyntaxKind::Label => p.eat(),
|
||||||
|
|
||||||
SyntaxKind::DecoratorMarker => decorator(p),
|
|
||||||
SyntaxKind::Hash => embedded_code_expr(p),
|
SyntaxKind::Hash => embedded_code_expr(p),
|
||||||
SyntaxKind::Star => strong(p),
|
SyntaxKind::Star => strong(p),
|
||||||
SyntaxKind::Underscore => emph(p),
|
SyntaxKind::Underscore => emph(p),
|
||||||
@ -147,19 +147,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
|
|||||||
*at_start = false;
|
*at_start = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decorator(p: &mut Parser) {
|
|
||||||
let m = p.marker();
|
|
||||||
p.enter(LexMode::Decorator);
|
|
||||||
p.assert(SyntaxKind::DecoratorMarker);
|
|
||||||
|
|
||||||
while !p.end() {
|
|
||||||
p.eat();
|
|
||||||
}
|
|
||||||
|
|
||||||
p.exit();
|
|
||||||
p.wrap(m, SyntaxKind::Decorator);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses strong content: `*Strong*`.
|
/// Parses strong content: `*Strong*`.
|
||||||
fn strong(p: &mut Parser) {
|
fn strong(p: &mut Parser) {
|
||||||
const END: SyntaxSet = SyntaxSet::new()
|
const END: SyntaxSet = SyntaxSet::new()
|
||||||
@ -1775,9 +1762,23 @@ impl<'s> Parser<'s> {
|
|||||||
|
|
||||||
fn save(&mut self) {
|
fn save(&mut self) {
|
||||||
let text = self.current_text();
|
let text = self.current_text();
|
||||||
|
let subtree = self.lexer.take_subtree();
|
||||||
if self.at(SyntaxKind::Error) {
|
if self.at(SyntaxKind::Error) {
|
||||||
let error = self.lexer.take_error().unwrap();
|
let error = self.lexer.take_error().unwrap();
|
||||||
self.nodes.push(SyntaxNode::error(error, text));
|
self.nodes.push(SyntaxNode::error(error, text));
|
||||||
|
} else if !subtree.is_empty() {
|
||||||
|
let mut text_cursor = self.current_start;
|
||||||
|
let mut children = Vec::with_capacity(subtree.len());
|
||||||
|
|
||||||
|
for (kind, end) in subtree {
|
||||||
|
// Ensure no errors in the subtree
|
||||||
|
assert!(!kind.is_error());
|
||||||
|
|
||||||
|
children.push(SyntaxNode::leaf(kind, &self.text[text_cursor..end]));
|
||||||
|
text_cursor = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.nodes.push(SyntaxNode::inner(self.current, children));
|
||||||
} else {
|
} else {
|
||||||
self.nodes.push(SyntaxNode::leaf(self.current, text));
|
self.nodes.push(SyntaxNode::leaf(self.current, text));
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
|
|||||||
.add(SyntaxKind::Parbreak)
|
.add(SyntaxKind::Parbreak)
|
||||||
.add(SyntaxKind::LineComment)
|
.add(SyntaxKind::LineComment)
|
||||||
.add(SyntaxKind::BlockComment)
|
.add(SyntaxKind::BlockComment)
|
||||||
.add(SyntaxKind::DecoratorMarker)
|
.add(SyntaxKind::Decorator)
|
||||||
.add(SyntaxKind::Text)
|
.add(SyntaxKind::Text)
|
||||||
.add(SyntaxKind::Linebreak)
|
.add(SyntaxKind::Linebreak)
|
||||||
.add(SyntaxKind::Escape)
|
.add(SyntaxKind::Escape)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user