Syntax set macro (#5138)

Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
Ian Wrzesinski 2024-10-22 09:25:52 -04:00 committed by GitHub
parent 33b9d1c7db
commit b8034a3438
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 106 additions and 135 deletions

View File

@ -5,7 +5,7 @@ use std::ops::{Index, IndexMut, Range};
use ecow::{eco_format, EcoString}; use ecow::{eco_format, EcoString};
use unicode_math_class::MathClass; use unicode_math_class::MathClass;
use crate::set::SyntaxSet; use crate::set::{syntax_set, SyntaxSet};
use crate::{ use crate::{
ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode, ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode,
}; };
@ -60,11 +60,7 @@ fn markup(
continue; continue;
} }
if p.at_set(set::MARKUP_EXPR) {
markup_expr(p, &mut at_start); markup_expr(p, &mut at_start);
} else {
p.unexpected();
}
} }
p.wrap(m, SyntaxKind::Markup); p.wrap(m, SyntaxKind::Markup);
} }
@ -92,11 +88,7 @@ pub(super) fn reparse_markup(
continue; continue;
} }
if p.at_set(set::MARKUP_EXPR) {
markup_expr(&mut p, at_start); markup_expr(&mut p, at_start);
} else {
p.unexpected();
}
} }
(p.balanced && p.current_start() == range.end).then(|| p.finish()) (p.balanced && p.current_start() == range.end).then(|| p.finish())
} }
@ -140,7 +132,10 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::TermMarker | SyntaxKind::TermMarker
| SyntaxKind::Colon => p.convert(SyntaxKind::Text), | SyntaxKind::Colon => p.convert(SyntaxKind::Text),
_ => {} _ => {
p.unexpected();
return; // Don't set `at_start`
}
} }
*at_start = false; *at_start = false;
@ -148,28 +143,18 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
/// Parses strong content: `*Strong*`. /// Parses strong content: `*Strong*`.
fn strong(p: &mut Parser) { fn strong(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Star)
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::RightBracket);
let m = p.marker(); let m = p.marker();
p.assert(SyntaxKind::Star); p.assert(SyntaxKind::Star);
markup(p, false, 0, |p| p.at_set(END)); markup(p, false, 0, |p| p.at_set(syntax_set!(Star, Parbreak, RightBracket)));
p.expect_closing_delimiter(m, SyntaxKind::Star); p.expect_closing_delimiter(m, SyntaxKind::Star);
p.wrap(m, SyntaxKind::Strong); p.wrap(m, SyntaxKind::Strong);
} }
/// Parses emphasized content: `_Emphasized_`. /// Parses emphasized content: `_Emphasized_`.
fn emph(p: &mut Parser) { fn emph(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Underscore)
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::RightBracket);
let m = p.marker(); let m = p.marker();
p.assert(SyntaxKind::Underscore); p.assert(SyntaxKind::Underscore);
markup(p, false, 0, |p| p.at_set(END)); markup(p, false, 0, |p| p.at_set(syntax_set!(Underscore, Parbreak, RightBracket)));
p.expect_closing_delimiter(m, SyntaxKind::Underscore); p.expect_closing_delimiter(m, SyntaxKind::Underscore);
p.wrap(m, SyntaxKind::Emph); p.wrap(m, SyntaxKind::Emph);
} }
@ -192,16 +177,11 @@ fn raw(p: &mut Parser) {
/// Parses a section heading: `= Introduction`. /// Parses a section heading: `= Introduction`.
fn heading(p: &mut Parser) { fn heading(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Label)
.add(SyntaxKind::RightBracket)
.add(SyntaxKind::Space);
let m = p.marker(); let m = p.marker();
p.assert(SyntaxKind::HeadingMarker); p.assert(SyntaxKind::HeadingMarker);
whitespace_line(p); whitespace_line(p);
markup(p, false, usize::MAX, |p| { markup(p, false, usize::MAX, |p| {
p.at_set(END) p.at_set(syntax_set!(Label, Space, RightBracket))
&& (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label) && (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label)
}); });
p.wrap(m, SyntaxKind::Heading); p.wrap(m, SyntaxKind::Heading);
@ -229,14 +209,11 @@ fn enum_item(p: &mut Parser) {
/// Parses an item in a term list: `/ Term: Details`. /// Parses an item in a term list: `/ Term: Details`.
fn term_item(p: &mut Parser) { fn term_item(p: &mut Parser) {
const TERM_END: SyntaxSet =
SyntaxSet::new().add(SyntaxKind::Colon).add(SyntaxKind::RightBracket);
let m = p.marker(); let m = p.marker();
p.assert(SyntaxKind::TermMarker); p.assert(SyntaxKind::TermMarker);
let min_indent = p.column(p.prev_end()); let min_indent = p.column(p.prev_end());
whitespace_line(p); whitespace_line(p);
markup(p, false, usize::MAX, |p| p.at_set(TERM_END)); markup(p, false, usize::MAX, |p| p.at_set(syntax_set!(Colon, RightBracket)));
p.expect(SyntaxKind::Colon); p.expect(SyntaxKind::Colon);
whitespace_line(p); whitespace_line(p);
markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket)); markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
@ -825,16 +802,11 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod
/// Parses a code block: `{ let x = 1; x + 2 }`. /// Parses a code block: `{ let x = 1; x + 2 }`.
fn code_block(p: &mut Parser) { fn code_block(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::RightBrace)
.add(SyntaxKind::RightBracket)
.add(SyntaxKind::RightParen);
let m = p.marker(); let m = p.marker();
p.enter(LexMode::Code); p.enter(LexMode::Code);
p.enter_newline_mode(NewlineMode::Continue); p.enter_newline_mode(NewlineMode::Continue);
p.assert(SyntaxKind::LeftBrace); p.assert(SyntaxKind::LeftBrace);
code(p, |p| p.at_set(END)); code(p, |p| p.at_set(syntax_set!(RightBrace, RightBracket, RightParen)));
p.expect_closing_delimiter(m, SyntaxKind::RightBrace); p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
p.exit(); p.exit();
p.exit_newline_mode(); p.exit_newline_mode();

View File

@ -39,54 +39,34 @@ const fn bit(kind: SyntaxKind) -> u128 {
1 << (kind as usize) 1 << (kind as usize)
} }
/// Syntax kinds that can start a statement. /// Generate a compile-time constant `SyntaxSet` of the given kinds.
pub const STMT: SyntaxSet = SyntaxSet::new() macro_rules! syntax_set {
.add(SyntaxKind::Let) ($($kind:ident),* $(,)?) => {{
.add(SyntaxKind::Set) const SET: crate::set::SyntaxSet = crate::set::SyntaxSet::new()
.add(SyntaxKind::Show) $(.add(crate::SyntaxKind:: $kind))*;
.add(SyntaxKind::Import) SET
.add(SyntaxKind::Include) }}
.add(SyntaxKind::Return); }
/// Syntax kinds that can start a markup expression. // Export so other modules can import as: `use set::syntax_set`
pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new() pub(crate) use syntax_set;
.add(SyntaxKind::Space)
.add(SyntaxKind::Parbreak) /// Syntax kinds that can start a statement.
.add(SyntaxKind::LineComment) pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return);
.add(SyntaxKind::BlockComment)
.add(SyntaxKind::Text)
.add(SyntaxKind::Linebreak)
.add(SyntaxKind::Escape)
.add(SyntaxKind::Shorthand)
.add(SyntaxKind::SmartQuote)
.add(SyntaxKind::RawDelim)
.add(SyntaxKind::Link)
.add(SyntaxKind::Label)
.add(SyntaxKind::Hash)
.add(SyntaxKind::Star)
.add(SyntaxKind::Underscore)
.add(SyntaxKind::HeadingMarker)
.add(SyntaxKind::ListMarker)
.add(SyntaxKind::EnumMarker)
.add(SyntaxKind::TermMarker)
.add(SyntaxKind::RefMarker)
.add(SyntaxKind::Dollar)
.add(SyntaxKind::LeftBracket)
.add(SyntaxKind::RightBracket)
.add(SyntaxKind::Colon);
/// Syntax kinds that can start a math expression. /// Syntax kinds that can start a math expression.
pub const MATH_EXPR: SyntaxSet = SyntaxSet::new() pub const MATH_EXPR: SyntaxSet = syntax_set!(
.add(SyntaxKind::Hash) Hash,
.add(SyntaxKind::MathIdent) MathIdent,
.add(SyntaxKind::Text) Text,
.add(SyntaxKind::MathShorthand) MathShorthand,
.add(SyntaxKind::Linebreak) Linebreak,
.add(SyntaxKind::MathAlignPoint) MathAlignPoint,
.add(SyntaxKind::Escape) Escape,
.add(SyntaxKind::Str) Str,
.add(SyntaxKind::Root) Root,
.add(SyntaxKind::Prime); Prime,
);
/// Syntax kinds that can start a code expression. /// Syntax kinds that can start a code expression.
pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP); pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP);
@ -98,60 +78,43 @@ pub const ATOMIC_CODE_EXPR: SyntaxSet = ATOMIC_CODE_PRIMARY;
pub const CODE_PRIMARY: SyntaxSet = ATOMIC_CODE_PRIMARY.add(SyntaxKind::Underscore); pub const CODE_PRIMARY: SyntaxSet = ATOMIC_CODE_PRIMARY.add(SyntaxKind::Underscore);
/// Syntax kinds that can start an atomic code primary. /// Syntax kinds that can start an atomic code primary.
pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new() pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
.add(SyntaxKind::Ident) Ident,
.add(SyntaxKind::LeftBrace) LeftBrace,
.add(SyntaxKind::LeftBracket) LeftBracket,
.add(SyntaxKind::LeftParen) LeftParen,
.add(SyntaxKind::Dollar) Dollar,
.add(SyntaxKind::Let) Let,
.add(SyntaxKind::Set) Set,
.add(SyntaxKind::Show) Show,
.add(SyntaxKind::Context) Context,
.add(SyntaxKind::If) If,
.add(SyntaxKind::While) While,
.add(SyntaxKind::For) For,
.add(SyntaxKind::Import) Import,
.add(SyntaxKind::Include) Include,
.add(SyntaxKind::Break) Break,
.add(SyntaxKind::Continue) Continue,
.add(SyntaxKind::Return) Return,
.add(SyntaxKind::None) None,
.add(SyntaxKind::Auto) Auto,
.add(SyntaxKind::Int) Int,
.add(SyntaxKind::Float) Float,
.add(SyntaxKind::Bool) Bool,
.add(SyntaxKind::Numeric) Numeric,
.add(SyntaxKind::Str) Str,
.add(SyntaxKind::Label) Label,
.add(SyntaxKind::RawDelim); RawDelim,
);
/// Syntax kinds that are unary operators. /// Syntax kinds that are unary operators.
pub const UNARY_OP: SyntaxSet = SyntaxSet::new() pub const UNARY_OP: SyntaxSet = syntax_set!(Plus, Minus, Not);
.add(SyntaxKind::Plus)
.add(SyntaxKind::Minus)
.add(SyntaxKind::Not);
/// Syntax kinds that are binary operators. /// Syntax kinds that are binary operators.
pub const BINARY_OP: SyntaxSet = SyntaxSet::new() pub const BINARY_OP: SyntaxSet = syntax_set!(
.add(SyntaxKind::Plus) Plus, Minus, Star, Slash, And, Or, EqEq, ExclEq, Lt, LtEq, Gt, GtEq, Eq, In, PlusEq,
.add(SyntaxKind::Minus) HyphEq, StarEq, SlashEq,
.add(SyntaxKind::Star) );
.add(SyntaxKind::Slash)
.add(SyntaxKind::And)
.add(SyntaxKind::Or)
.add(SyntaxKind::EqEq)
.add(SyntaxKind::ExclEq)
.add(SyntaxKind::Lt)
.add(SyntaxKind::LtEq)
.add(SyntaxKind::Gt)
.add(SyntaxKind::GtEq)
.add(SyntaxKind::Eq)
.add(SyntaxKind::In)
.add(SyntaxKind::PlusEq)
.add(SyntaxKind::HyphEq)
.add(SyntaxKind::StarEq)
.add(SyntaxKind::SlashEq);
/// Syntax kinds that can start an argument in a function call. /// Syntax kinds that can start an argument in a function call.
pub const ARRAY_OR_DICT_ITEM: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots); pub const ARRAY_OR_DICT_ITEM: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots);

View File

@ -35,6 +35,42 @@ multiline.
= This = This
is not. is not.
--- heading-trailing-whitespace ---
// Whether headings contain trailing whitespace with or without comments/labels.
// Labels are special cased to immediately end headings in the parser, but also
// have unique whitespace behavior.
#let join(..xs) = xs.pos().join()
#let head(h) = heading(depth: 1, h)
// No whitespace.
#test(head[h], [= h])
#test(head[h], [= h/**/])
#test(head[h], [= h<a>])
#test(head[h], [= h/**/<b>])
// Label behaves differently than normal trailing space and comment.
#test(head(join[h][ ]), [= h ])
#test(head(join[h][ ]), [= h /**/])
#test(join(head[h])[ ], [= h <c>])
// Combinations.
#test(head(join[h][ ][ ]), [= h /**/ ])
#test(join(head[h])[ ][ ], [= h <d> ])
#test(head(join[h][ ]), [= h /**/<e>])
#test(join(head[h])[ ], [= h/**/ <f>])
// The first space attaches, but not the second
#test(join(head(join[h][ ]))[ ], [= h /**/ <g>])
--- heading-leading-whitespace ---
// Test that leading whitespace and comments don't matter.
#test[= h][= h]
#test[= h][= /**/ /**/ h]
#test[= h][= /*
comment spans lines
*/ h]
--- heading-show-where --- --- heading-show-where ---
// Test styling. // Test styling.
#show heading.where(level: 5): it => block( #show heading.where(level: 5): it => block(