Syntax set macro (#5138)

Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
Ian Wrzesinski 2024-10-22 09:25:52 -04:00 committed by GitHub
parent 33b9d1c7db
commit b8034a3438
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 106 additions and 135 deletions

View File

@ -5,7 +5,7 @@ use std::ops::{Index, IndexMut, Range};
use ecow::{eco_format, EcoString};
use unicode_math_class::MathClass;
use crate::set::SyntaxSet;
use crate::set::{syntax_set, SyntaxSet};
use crate::{
ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode,
};
@ -60,11 +60,7 @@ fn markup(
continue;
}
if p.at_set(set::MARKUP_EXPR) {
markup_expr(p, &mut at_start);
} else {
p.unexpected();
}
markup_expr(p, &mut at_start);
}
p.wrap(m, SyntaxKind::Markup);
}
@ -92,11 +88,7 @@ pub(super) fn reparse_markup(
continue;
}
if p.at_set(set::MARKUP_EXPR) {
markup_expr(&mut p, at_start);
} else {
p.unexpected();
}
markup_expr(&mut p, at_start);
}
(p.balanced && p.current_start() == range.end).then(|| p.finish())
}
@ -140,7 +132,10 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
| SyntaxKind::TermMarker
| SyntaxKind::Colon => p.convert(SyntaxKind::Text),
_ => {}
_ => {
p.unexpected();
return; // Don't set `at_start`
}
}
*at_start = false;
@ -148,28 +143,18 @@ fn markup_expr(p: &mut Parser, at_start: &mut bool) {
/// Parses strong content: `*Strong*`.
fn strong(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Star)
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::RightBracket);
let m = p.marker();
p.assert(SyntaxKind::Star);
markup(p, false, 0, |p| p.at_set(END));
markup(p, false, 0, |p| p.at_set(syntax_set!(Star, Parbreak, RightBracket)));
p.expect_closing_delimiter(m, SyntaxKind::Star);
p.wrap(m, SyntaxKind::Strong);
}
/// Parses emphasized content: `_Emphasized_`.
fn emph(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Underscore)
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::RightBracket);
let m = p.marker();
p.assert(SyntaxKind::Underscore);
markup(p, false, 0, |p| p.at_set(END));
markup(p, false, 0, |p| p.at_set(syntax_set!(Underscore, Parbreak, RightBracket)));
p.expect_closing_delimiter(m, SyntaxKind::Underscore);
p.wrap(m, SyntaxKind::Emph);
}
@ -192,16 +177,11 @@ fn raw(p: &mut Parser) {
/// Parses a section heading: `= Introduction`.
fn heading(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Label)
.add(SyntaxKind::RightBracket)
.add(SyntaxKind::Space);
let m = p.marker();
p.assert(SyntaxKind::HeadingMarker);
whitespace_line(p);
markup(p, false, usize::MAX, |p| {
p.at_set(END)
p.at_set(syntax_set!(Label, Space, RightBracket))
&& (!p.at(SyntaxKind::Space) || p.lexer.clone().next() == SyntaxKind::Label)
});
p.wrap(m, SyntaxKind::Heading);
@ -229,14 +209,11 @@ fn enum_item(p: &mut Parser) {
/// Parses an item in a term list: `/ Term: Details`.
fn term_item(p: &mut Parser) {
const TERM_END: SyntaxSet =
SyntaxSet::new().add(SyntaxKind::Colon).add(SyntaxKind::RightBracket);
let m = p.marker();
p.assert(SyntaxKind::TermMarker);
let min_indent = p.column(p.prev_end());
whitespace_line(p);
markup(p, false, usize::MAX, |p| p.at_set(TERM_END));
markup(p, false, usize::MAX, |p| p.at_set(syntax_set!(Colon, RightBracket)));
p.expect(SyntaxKind::Colon);
whitespace_line(p);
markup(p, false, min_indent, |p| p.at(SyntaxKind::RightBracket));
@ -825,16 +802,11 @@ pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNod
/// Parses a code block: `{ let x = 1; x + 2 }`.
fn code_block(p: &mut Parser) {
const END: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::RightBrace)
.add(SyntaxKind::RightBracket)
.add(SyntaxKind::RightParen);
let m = p.marker();
p.enter(LexMode::Code);
p.enter_newline_mode(NewlineMode::Continue);
p.assert(SyntaxKind::LeftBrace);
code(p, |p| p.at_set(END));
code(p, |p| p.at_set(syntax_set!(RightBrace, RightBracket, RightParen)));
p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
p.exit();
p.exit_newline_mode();

View File

@ -39,54 +39,34 @@ const fn bit(kind: SyntaxKind) -> u128 {
1 << (kind as usize)
}
/// Syntax kinds that can start a statement.
pub const STMT: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Let)
.add(SyntaxKind::Set)
.add(SyntaxKind::Show)
.add(SyntaxKind::Import)
.add(SyntaxKind::Include)
.add(SyntaxKind::Return);
/// Generate a compile-time constant `SyntaxSet` of the given kinds.
macro_rules! syntax_set {
($($kind:ident),* $(,)?) => {{
const SET: crate::set::SyntaxSet = crate::set::SyntaxSet::new()
$(.add(crate::SyntaxKind:: $kind))*;
SET
}}
}
/// Syntax kinds that can start a markup expression.
pub const MARKUP_EXPR: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Space)
.add(SyntaxKind::Parbreak)
.add(SyntaxKind::LineComment)
.add(SyntaxKind::BlockComment)
.add(SyntaxKind::Text)
.add(SyntaxKind::Linebreak)
.add(SyntaxKind::Escape)
.add(SyntaxKind::Shorthand)
.add(SyntaxKind::SmartQuote)
.add(SyntaxKind::RawDelim)
.add(SyntaxKind::Link)
.add(SyntaxKind::Label)
.add(SyntaxKind::Hash)
.add(SyntaxKind::Star)
.add(SyntaxKind::Underscore)
.add(SyntaxKind::HeadingMarker)
.add(SyntaxKind::ListMarker)
.add(SyntaxKind::EnumMarker)
.add(SyntaxKind::TermMarker)
.add(SyntaxKind::RefMarker)
.add(SyntaxKind::Dollar)
.add(SyntaxKind::LeftBracket)
.add(SyntaxKind::RightBracket)
.add(SyntaxKind::Colon);
// Export so other modules can import as: `use set::syntax_set`
pub(crate) use syntax_set;
/// Syntax kinds that can start a statement.
pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return);
/// Syntax kinds that can start a math expression.
pub const MATH_EXPR: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Hash)
.add(SyntaxKind::MathIdent)
.add(SyntaxKind::Text)
.add(SyntaxKind::MathShorthand)
.add(SyntaxKind::Linebreak)
.add(SyntaxKind::MathAlignPoint)
.add(SyntaxKind::Escape)
.add(SyntaxKind::Str)
.add(SyntaxKind::Root)
.add(SyntaxKind::Prime);
pub const MATH_EXPR: SyntaxSet = syntax_set!(
Hash,
MathIdent,
Text,
MathShorthand,
Linebreak,
MathAlignPoint,
Escape,
Str,
Root,
Prime,
);
/// Syntax kinds that can start a code expression.
pub const CODE_EXPR: SyntaxSet = CODE_PRIMARY.union(UNARY_OP);
@ -98,60 +78,43 @@ pub const ATOMIC_CODE_EXPR: SyntaxSet = ATOMIC_CODE_PRIMARY;
pub const CODE_PRIMARY: SyntaxSet = ATOMIC_CODE_PRIMARY.add(SyntaxKind::Underscore);
/// Syntax kinds that can start an atomic code primary.
pub const ATOMIC_CODE_PRIMARY: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Ident)
.add(SyntaxKind::LeftBrace)
.add(SyntaxKind::LeftBracket)
.add(SyntaxKind::LeftParen)
.add(SyntaxKind::Dollar)
.add(SyntaxKind::Let)
.add(SyntaxKind::Set)
.add(SyntaxKind::Show)
.add(SyntaxKind::Context)
.add(SyntaxKind::If)
.add(SyntaxKind::While)
.add(SyntaxKind::For)
.add(SyntaxKind::Import)
.add(SyntaxKind::Include)
.add(SyntaxKind::Break)
.add(SyntaxKind::Continue)
.add(SyntaxKind::Return)
.add(SyntaxKind::None)
.add(SyntaxKind::Auto)
.add(SyntaxKind::Int)
.add(SyntaxKind::Float)
.add(SyntaxKind::Bool)
.add(SyntaxKind::Numeric)
.add(SyntaxKind::Str)
.add(SyntaxKind::Label)
.add(SyntaxKind::RawDelim);
pub const ATOMIC_CODE_PRIMARY: SyntaxSet = syntax_set!(
Ident,
LeftBrace,
LeftBracket,
LeftParen,
Dollar,
Let,
Set,
Show,
Context,
If,
While,
For,
Import,
Include,
Break,
Continue,
Return,
None,
Auto,
Int,
Float,
Bool,
Numeric,
Str,
Label,
RawDelim,
);
/// Syntax kinds that are unary operators.
pub const UNARY_OP: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Plus)
.add(SyntaxKind::Minus)
.add(SyntaxKind::Not);
pub const UNARY_OP: SyntaxSet = syntax_set!(Plus, Minus, Not);
/// Syntax kinds that are binary operators.
pub const BINARY_OP: SyntaxSet = SyntaxSet::new()
.add(SyntaxKind::Plus)
.add(SyntaxKind::Minus)
.add(SyntaxKind::Star)
.add(SyntaxKind::Slash)
.add(SyntaxKind::And)
.add(SyntaxKind::Or)
.add(SyntaxKind::EqEq)
.add(SyntaxKind::ExclEq)
.add(SyntaxKind::Lt)
.add(SyntaxKind::LtEq)
.add(SyntaxKind::Gt)
.add(SyntaxKind::GtEq)
.add(SyntaxKind::Eq)
.add(SyntaxKind::In)
.add(SyntaxKind::PlusEq)
.add(SyntaxKind::HyphEq)
.add(SyntaxKind::StarEq)
.add(SyntaxKind::SlashEq);
pub const BINARY_OP: SyntaxSet = syntax_set!(
Plus, Minus, Star, Slash, And, Or, EqEq, ExclEq, Lt, LtEq, Gt, GtEq, Eq, In, PlusEq,
HyphEq, StarEq, SlashEq,
);
/// Syntax kinds that can start an argument in a function call.
pub const ARRAY_OR_DICT_ITEM: SyntaxSet = CODE_EXPR.add(SyntaxKind::Dots);

View File

@ -35,6 +35,42 @@ multiline.
= This
is not.
--- heading-trailing-whitespace ---
// Whether headings contain trailing whitespace with or without comments/labels.
// Labels are special cased to immediately end headings in the parser, but also
// have unique whitespace behavior.
#let join(..xs) = xs.pos().join()
#let head(h) = heading(depth: 1, h)
// No whitespace.
#test(head[h], [= h])
#test(head[h], [= h/**/])
#test(head[h], [= h<a>])
#test(head[h], [= h/**/<b>])
// Label behaves differently than normal trailing space and comment.
#test(head(join[h][ ]), [= h ])
#test(head(join[h][ ]), [= h /**/])
#test(join(head[h])[ ], [= h <c>])
// Combinations.
#test(head(join[h][ ][ ]), [= h /**/ ])
#test(join(head[h])[ ][ ], [= h <d> ])
#test(head(join[h][ ]), [= h /**/<e>])
#test(join(head[h])[ ], [= h/**/ <f>])
// The first space attaches, but not the second
#test(join(head(join[h][ ]))[ ], [= h /**/ <g>])
--- heading-leading-whitespace ---
// Test that leading whitespace and comments don't matter.
#test[= h][= h]
#test[= h][= /**/ /**/ h]
#test[= h][= /*
comment spans lines
*/ h]
--- heading-show-where ---
// Test styling.
#show heading.where(level: 5): it => block(