From b8034a343831e8609aec2ec81eb7eeda57aa5d81 Mon Sep 17 00:00:00 2001 From: Ian Wrzesinski <133046678+wrzian@users.noreply.github.com> Date: Tue, 22 Oct 2024 09:25:52 -0400 Subject: [PATCH] Syntax set macro (#5138) Co-authored-by: Laurenz --- crates/typst-syntax/src/parser.rs | 52 +++------- crates/typst-syntax/src/set.rs | 153 +++++++++++------------------- tests/suite/model/heading.typ | 36 +++++++ 3 files changed, 106 insertions(+), 135 deletions(-) diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index 20ff58f0b..dba6d69dd 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -5,7 +5,7 @@ use std::ops::{Index, IndexMut, Range}; use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; -use crate::set::SyntaxSet; +use crate::set::{syntax_set, SyntaxSet}; use crate::{ ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode, }; @@ -60,11 +60,7 @@ fn markup( continue; } - if p.at_set(set::MARKUP_EXPR) { - markup_expr(p, &mut at_start); - } else { - p.unexpected(); - } + markup_expr(p, &mut at_start); } p.wrap(m, SyntaxKind::Markup); } @@ -92,11 +88,7 @@ pub(super) fn reparse_markup( continue; } - if p.at_set(set::MARKUP_EXPR) { - markup_expr(&mut p, at_start); - } else { - p.unexpected(); - } + markup_expr(&mut p, at_start); } (p.balanced && p.current_start() == range.end).then(|| p.finish()) } @@ -140,7 +132,10 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { | SyntaxKind::TermMarker | SyntaxKind::Colon => p.convert(SyntaxKind::Text), - _ => {} + _ => { + p.unexpected(); + return; // Don't set `at_start` + } } *at_start = false; @@ -148,28 +143,18 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) { /// Parses strong content: `*Strong*`. fn strong(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Star) - .add(SyntaxKind::Parbreak) - .add(SyntaxKind::RightBracket); - let m = p.marker(); p.assert(SyntaxKind::Star); - markup(p, false, 0, |p| p.at_set(END)); + markup(p, false, 0, |p| p.at_set(syntax_set!(Star, Parbreak, RightBracket))); p.expect_closing_delimiter(m, SyntaxKind::Star); p.wrap(m, SyntaxKind::Strong); } /// Parses emphasized content: `_Emphasized_`. fn emph(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Underscore) - .add(SyntaxKind::Parbreak) - .add(SyntaxKind::RightBracket); - let m = p.marker(); p.assert(SyntaxKind::Underscore); - markup(p, false, 0, |p| p.at_set(END)); + markup(p, false, 0, |p| p.at_set(syntax_set!(Underscore, Parbreak, RightBracket))); p.expect_closing_delimiter(m, SyntaxKind::Underscore); p.wrap(m, SyntaxKind::Emph); } @@ -192,16 +177,11 @@ fn raw(p: &mut Parser) { /// Parses a section heading: `= Introduction`. fn heading(p: &mut Parser) { - const END: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Label) - .add(SyntaxKind::RightBracket) - .add(SyntaxKind::Space); - let m = p.marker(); p.assert(SyntaxKind::HeadingMarker); whitespace_line(p); markup(p, false, usize::MAX, |p| { - p.at_set(END) + p.at_set(syntax_set!(Label, Space, RightBracket)) && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label) }); p.wrap(m, SyntaxKind::Heading); @@ -229,14 +209,11 @@ fn enum_item(p: &mut Parser) { /// Parses an item in a term list: `/ Term: Details`. fn term_item(p: &mut Parser) { - const TERM_END: SyntaxSet = - SyntaxSet::new().add(SyntaxKind::Colon).add(SyntaxKind::RightBracket); - let m = p.marker(); p.assert(SyntaxKind::TermMarker); let min_indent = p.column(p.prev_end()); whitespace_line(p); - markup(p, false, usize::MAX, |p| p.at_set(TERM_END)); + markup(p, false, usize::MAX, |p| p.at_set(syntax_set!(Colon, RightBracket))); p.expect(SyntaxKind::Colon); whitespace_line(p); markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket)); @@ -825,16 +802,11 @@ pub(super) fn reparse_block(text: &str, range: Range) -> Option u128 { 1 << (kind as usize) } -/// Syntax kinds that can start a statement. -pub const STMT: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Let) - .add(SyntaxKind::Set) - .add(SyntaxKind::Show) - .add(SyntaxKind::Import) - .add(SyntaxKind::Include) - .add(SyntaxKind::Return); +/// Generate a compile-time constant `SyntaxSet` of the given kinds. +macro_rules! syntax_set { + ($($kind:ident),* $(,)?) => {{ + const SET: crate::set::SyntaxSet = crate::set::SyntaxSet::new() + $(.add(crate::SyntaxKind:: $kind))*; + SET + }} +} -/// Syntax kinds that can start a markup expression. -pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Space) - .add(SyntaxKind::Parbreak) - .add(SyntaxKind::LineComment) - .add(SyntaxKind::BlockComment) - .add(SyntaxKind::Text) - .add(SyntaxKind::Linebreak) - .add(SyntaxKind::Escape) - .add(SyntaxKind::Shorthand) - .add(SyntaxKind::SmartQuote) - .add(SyntaxKind::RawDelim) - .add(SyntaxKind::Link) - .add(SyntaxKind::Label) - .add(SyntaxKind::Hash) - .add(SyntaxKind::Star) - .add(SyntaxKind::Underscore) - .add(SyntaxKind::HeadingMarker) - .add(SyntaxKind::ListMarker) - .add(SyntaxKind::EnumMarker) - .add(SyntaxKind::TermMarker) - .add(SyntaxKind::RefMarker) - .add(SyntaxKind::Dollar) - .add(SyntaxKind::LeftBracket) - .add(SyntaxKind::RightBracket) - .add(SyntaxKind::Colon); +// Export so other modules can import as: `use set::syntax_set` +pub(crate) use syntax_set; + +/// Syntax kinds that can start a statement. +pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return); /// Syntax kinds that can start a math expression. -pub const MATH_EXPR: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Hash) - .add(SyntaxKind::MathIdent) - .add(SyntaxKind::Text) - .add(SyntaxKind::MathShorthand) - .add(SyntaxKind::Linebreak) - .add(SyntaxKind::MathAlignPoint) - .add(SyntaxKind::Escape) - .add(SyntaxKind::Str) - .add(SyntaxKind::Root) - .add(SyntaxKind::Prime); +pub const MATH_EXPR: SyntaxSet = syntax_set!( + Hash, + MathIdent, + Text, + MathShorthand, + Linebreak, + MathAlignPoint, + Escape, + Str, + Root, + Prime, +); /// Syntax kinds that can start a code expression. pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP); @@ -98,60 +78,43 @@ pub const ATOMIC_CODE_EXPR: SyntaxSet = ATOMIC_CODE_PRIMARY; pub const CODE_PRIMARY: SyntaxSet = ATOMIC_CODE_PRIMARY.add(SyntaxKind::Underscore); /// Syntax kinds that can start an atomic code primary. -pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Ident) - .add(SyntaxKind::LeftBrace) - .add(SyntaxKind::LeftBracket) - .add(SyntaxKind::LeftParen) - .add(SyntaxKind::Dollar) - .add(SyntaxKind::Let) - .add(SyntaxKind::Set) - .add(SyntaxKind::Show) - .add(SyntaxKind::Context) - .add(SyntaxKind::If) - .add(SyntaxKind::While) - .add(SyntaxKind::For) - .add(SyntaxKind::Import) - .add(SyntaxKind::Include) - .add(SyntaxKind::Break) - .add(SyntaxKind::Continue) - .add(SyntaxKind::Return) - .add(SyntaxKind::None) - .add(SyntaxKind::Auto) - .add(SyntaxKind::Int) - .add(SyntaxKind::Float) - .add(SyntaxKind::Bool) - .add(SyntaxKind::Numeric) - .add(SyntaxKind::Str) - .add(SyntaxKind::Label) - .add(SyntaxKind::RawDelim); +pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!( + Ident, + LeftBrace, + LeftBracket, + LeftParen, + Dollar, + Let, + Set, + Show, + Context, + If, + While, + For, + Import, + Include, + Break, + Continue, + Return, + None, + Auto, + Int, + Float, + Bool, + Numeric, + Str, + Label, + RawDelim, +); /// Syntax kinds that are unary operators. -pub const UNARY_OP: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Plus) - .add(SyntaxKind::Minus) - .add(SyntaxKind::Not); +pub const UNARY_OP: SyntaxSet = syntax_set!(Plus, Minus, Not); /// Syntax kinds that are binary operators. -pub const BINARY_OP: SyntaxSet = SyntaxSet::new() - .add(SyntaxKind::Plus) - .add(SyntaxKind::Minus) - .add(SyntaxKind::Star) - .add(SyntaxKind::Slash) - .add(SyntaxKind::And) - .add(SyntaxKind::Or) - .add(SyntaxKind::EqEq) - .add(SyntaxKind::ExclEq) - .add(SyntaxKind::Lt) - .add(SyntaxKind::LtEq) - .add(SyntaxKind::Gt) - .add(SyntaxKind::GtEq) - .add(SyntaxKind::Eq) - .add(SyntaxKind::In) - .add(SyntaxKind::PlusEq) - .add(SyntaxKind::HyphEq) - .add(SyntaxKind::StarEq) - .add(SyntaxKind::SlashEq); +pub const BINARY_OP: SyntaxSet = syntax_set!( + Plus, Minus, Star, Slash, And, Or, EqEq, ExclEq, Lt, LtEq, Gt, GtEq, Eq, In, PlusEq, + HyphEq, StarEq, SlashEq, +); /// Syntax kinds that can start an argument in a function call. pub const ARRAY_OR_DICT_ITEM: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots); diff --git a/tests/suite/model/heading.typ b/tests/suite/model/heading.typ index 3be9e52bc..884f203d2 100644 --- a/tests/suite/model/heading.typ +++ b/tests/suite/model/heading.typ @@ -35,6 +35,42 @@ multiline. = This is not. +--- heading-trailing-whitespace --- +// Whether headings contain trailing whitespace with or without comments/labels. +// Labels are special cased to immediately end headings in the parser, but also +// have unique whitespace behavior. + +#let join(..xs) = xs.pos().join() +#let head(h) = heading(depth: 1, h) + +// No whitespace. +#test(head[h], [= h]) +#test(head[h], [= h/**/]) +#test(head[h], [= h]) +#test(head[h], [= h/**/]) + +// Label behaves differently than normal trailing space and comment. +#test(head(join[h][ ]), [= h ]) +#test(head(join[h][ ]), [= h /**/]) +#test(join(head[h])[ ], [= h ]) + +// Combinations. +#test(head(join[h][ ][ ]), [= h /**/ ]) +#test(join(head[h])[ ][ ], [= h ]) +#test(head(join[h][ ]), [= h /**/]) +#test(join(head[h])[ ], [= h/**/ ]) + +// The first space attaches, but not the second +#test(join(head(join[h][ ]))[ ], [= h /**/ ]) + +--- heading-leading-whitespace --- +// Test that leading whitespace and comments don't matter. +#test[= h][= h] +#test[= h][= /**/ /**/ h] +#test[= h][= /* +comment spans lines +*/ h] + --- heading-show-where --- // Test styling. #show heading.where(level: 5): it => block(