diff --git a/crates/typst-syntax/src/highlight.rs b/crates/typst-syntax/src/highlight.rs index 59cad25ea..83c15b14b 100644 --- a/crates/typst-syntax/src/highlight.rs +++ b/crates/typst-syntax/src/highlight.rs @@ -286,7 +286,6 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::Destructuring => None, SyntaxKind::DestructAssignment => None, - SyntaxKind::DecoratorMarker => Some(Tag::Comment), SyntaxKind::Decorator => None, SyntaxKind::LineComment => Some(Tag::Comment), diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs index b9ccf45c4..50d40960a 100644 --- a/crates/typst-syntax/src/kind.rs +++ b/crates/typst-syntax/src/kind.rs @@ -13,8 +13,8 @@ pub enum SyntaxKind { LineComment, /// A block comment: `/* ... */`. BlockComment, - /// A decorator's marker: `/!` - DecoratorMarker, + /// A decorator: `/! allow("warning")` + Decorator, /// The contents of a file or content block. Markup, @@ -280,9 +280,6 @@ pub enum SyntaxKind { Destructuring, /// A destructuring assignment expression: `(x, y) = (1, 2)`. DestructAssignment, - - /// A decorator: `/! allow("amogus")` - Decorator, } impl SyntaxKind { @@ -360,7 +357,11 @@ impl SyntaxKind { pub fn is_trivia(self) -> bool { matches!( self, - Self::LineComment | Self::BlockComment | Self::Space | Self::Parbreak + Self::LineComment + | Self::BlockComment + | Self::Space + | Self::Parbreak + | Self::Decorator ) } @@ -376,7 +377,7 @@ impl SyntaxKind { Self::Error => "syntax error", Self::LineComment => "line comment", Self::BlockComment => "block comment", - Self::DecoratorMarker => "decorator marker", + Self::Decorator => "decorator", Self::Markup => "markup", Self::Text => "text", Self::Space => "space", @@ -504,7 +505,6 @@ impl SyntaxKind { Self::FuncReturn => "`return` expression", Self::Destructuring => "destructuring pattern", Self::DestructAssignment => "destructuring assignment expression", - Self::Decorator => "decorator", } } } diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index 05b125709..341484c7f 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -18,8 +18,10 @@ pub(super) struct Lexer<'s> { newline: bool, /// The state held by raw line lexing. raw: Vec<(SyntaxKind, usize)>, - /// The state held by decorator lexing. - decorator: Vec<(SyntaxKind, usize)>, + /// The subtree of tokens associated with this token. + /// The parser is responsible for converting this subtree into syntax nodes + /// matching this structure. + subtree: Vec<(SyntaxKind, usize)>, /// An error for the last token. error: Option, } @@ -35,8 +37,6 @@ pub(super) enum LexMode { Code, /// The contents of a raw block. Raw, - /// The contents of a decorator. - Decorator, } impl<'s> Lexer<'s> { @@ -49,7 +49,7 @@ impl<'s> Lexer<'s> { newline: false, error: None, raw: Vec::new(), - decorator: Vec::new(), + subtree: Vec::new(), } } @@ -113,16 +113,9 @@ impl Lexer<'_> { return kind; } - if self.mode == LexMode::Decorator { - let Some((kind, end)) = self.decorator.pop() else { - return SyntaxKind::End; - }; - self.s.jump(end); - return kind; - } - self.newline = false; self.error = None; + self.subtree.clear(); let start = self.s.cursor(); match self.s.eat() { Some(c) if is_space(c, self.mode) => self.whitespace(start, c), @@ -142,13 +135,17 @@ impl Lexer<'_> { LexMode::Math => self.math(start, c), LexMode::Code => self.code(start, c), LexMode::Raw => unreachable!(), - LexMode::Decorator => unreachable!(), }, None => SyntaxKind::End, } } + /// Takes the subtree associated with the latest token. + pub fn take_subtree(&mut self) -> Vec<(SyntaxKind, usize)> { + std::mem::take(&mut self.subtree) + } + /// Eat whitespace characters greedily. fn whitespace(&mut self, start: usize, c: char) -> SyntaxKind { let more = self.s.eat_while(|c| is_space(c, self.mode)); @@ -194,15 +191,8 @@ impl Lexer<'_> { SyntaxKind::BlockComment } -} -/// Decorators. -impl Lexer<'_> { fn decorator(&mut self) -> SyntaxKind { - let start = self.s.cursor() - 2; - - self.decorator.clear(); - while !self.s.eat_newline() { let start = self.s.cursor(); let token = match self.s.eat() { @@ -215,25 +205,20 @@ impl Lexer<'_> { Some(c @ '0'..='9') => self.number(start, c), Some(',') => SyntaxKind::Comma, Some(c) if is_id_start(c) => self.ident(start), - Some(c) => { - return self.error(eco_format!( - "the character {c} is not valid in a decorator" - )) - } + Some(c) => self + .error(eco_format!("the character {c} is not valid in a decorator")), None => break, }; + if token.is_error() { + return token; + } + let end = self.s.cursor(); - self.decorator.push((token, end)); + self.subtree.push((token, end)); } - // The saved tokens will be removed in reverse. - self.decorator.reverse(); - - // Already collected all we need from the decorator. - self.s.jump(start + 2); - - SyntaxKind::DecoratorMarker + SyntaxKind::Decorator } } diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index 54417e01f..e1d4bb951 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -108,7 +108,8 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { SyntaxKind::Space | SyntaxKind::Parbreak | SyntaxKind::LineComment - | SyntaxKind::BlockComment => { + | SyntaxKind::BlockComment + | SyntaxKind::Decorator => { p.eat(); return; } @@ -121,7 +122,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { | SyntaxKind::Link | SyntaxKind::Label => p.eat(), - SyntaxKind::DecoratorMarker => decorator(p), SyntaxKind::Hash => embedded_code_expr(p), SyntaxKind::Star => strong(p), SyntaxKind::Underscore => emph(p), @@ -147,19 +147,6 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { *at_start = false; } -fn decorator(p: &mut Parser) { - let m = p.marker(); - p.enter(LexMode::Decorator); - p.assert(SyntaxKind::DecoratorMarker); - - while !p.end() { - p.eat(); - } - - p.exit(); - p.wrap(m, SyntaxKind::Decorator); -} - /// Parses strong content: `*Strong*`. fn strong(p: &mut Parser) { const END: SyntaxSet = SyntaxSet::new() @@ -1775,9 +1762,23 @@ impl<'s> Parser<'s> { fn save(&mut self) { let text = self.current_text(); + let subtree = self.lexer.take_subtree(); if self.at(SyntaxKind::Error) { let error = self.lexer.take_error().unwrap(); self.nodes.push(SyntaxNode::error(error, text)); + } else if !subtree.is_empty() { + let mut text_cursor = self.current_start; + let mut children = Vec::with_capacity(subtree.len()); + + for (kind, end) in subtree { + // Ensure no errors in the subtree + assert!(!kind.is_error()); + + children.push(SyntaxNode::leaf(kind, &self.text[text_cursor..end])); + text_cursor = end; + } + + self.nodes.push(SyntaxNode::inner(self.current, children)); } else { self.nodes.push(SyntaxNode::leaf(self.current, text)); } diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index b5b19b466..3f5a26061 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -54,7 +54,7 @@ pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new() .add(SyntaxKind::Parbreak) .add(SyntaxKind::LineComment) .add(SyntaxKind::BlockComment) - .add(SyntaxKind::DecoratorMarker) + .add(SyntaxKind::Decorator) .add(SyntaxKind::Text) .add(SyntaxKind::Linebreak) .add(SyntaxKind::Escape)