9. Parse math field access in the lexer

This commit is contained in:
Ian Wrzesinski 2024-10-21 22:18:23 -04:00
parent 09975d1133
commit 88d86714a1
4 changed files with 71 additions and 20 deletions

View File

@ -109,7 +109,10 @@ impl Lexer<'_> {
Some('`') if self.mode != LexMode::Math => return self.raw(),
Some(c) => match self.mode {
LexMode::Markup => self.markup(start, c),
LexMode::Math => self.math(start, c),
LexMode::Math => match self.math(start, c) {
(kind, None) => kind,
(kind, Some(node)) => return (kind, node),
},
LexMode::Code => self.code(start, c),
},
@ -507,8 +510,8 @@ impl Lexer<'_> {
/// Math.
impl Lexer<'_> {
fn math(&mut self, start: usize, c: char) -> SyntaxKind {
match c {
fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option<SyntaxNode>) {
let kind = match c {
'\\' => self.backslash(),
'"' => self.string(),
@ -561,11 +564,41 @@ impl Lexer<'_> {
// Identifiers.
c if is_math_id_start(c) && self.s.at(is_math_id_continue) => {
self.s.eat_while(is_math_id_continue);
SyntaxKind::MathIdent
let (kind, node) = self.math_ident_or_field(start);
return (kind, Some(node));
}
// Other math atoms.
_ => self.math_text(start, c),
};
(kind, None)
}
/// Parse a single `MathIdent` or an entire `FieldAccess`.
fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) {
let mut kind = SyntaxKind::MathIdent;
let mut node = SyntaxNode::leaf(kind, self.s.from(start));
while let Some(ident) = self.maybe_dot_ident() {
kind = SyntaxKind::FieldAccess;
let field_children = vec![
node,
SyntaxNode::leaf(SyntaxKind::Dot, '.'),
SyntaxNode::leaf(SyntaxKind::Ident, ident),
];
node = SyntaxNode::inner(kind, field_children);
}
(kind, node)
}
/// If at a dot and a math identifier, eat and return the identifier.
fn maybe_dot_ident(&mut self) -> Option<&str> {
if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') {
let ident_start = self.s.cursor();
self.s.eat();
self.s.eat_while(is_math_id_continue);
Some(self.s.from(ident_start))
} else {
None
}
}

View File

@ -6,9 +6,7 @@ use ecow::{eco_format, EcoString};
use unicode_math_class::MathClass;
use crate::set::{syntax_set, SyntaxSet};
use crate::{
ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode,
};
use crate::{ast, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode};
/// Parses a source file as top-level markup.
pub fn parse(text: &str) -> SyntaxNode {
@ -261,21 +259,11 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
let mut continuable = false;
match p.current() {
SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::MathIdent => {
// The lexer manages creating full FieldAccess nodes if needed.
SyntaxKind::MathIdent | SyntaxKind::FieldAccess => {
continuable = true;
p.eat();
while p.directly_at(SyntaxKind::Text) && p.current_text() == "." && {
let mut copy = p.lexer.clone();
let start = copy.cursor();
let next = copy.next().0;
let end = copy.cursor();
matches!(next, SyntaxKind::MathIdent | SyntaxKind::Text)
&& is_ident(&p.text[start..end])
} {
p.convert_and_eat(SyntaxKind::Dot);
p.convert_and_eat(SyntaxKind::Ident);
p.wrap(m, SyntaxKind::FieldAccess);
}
// Parse a function call for an identifier or field access.
if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "("
{
math_args(p);

View File

@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return)
pub const MATH_EXPR: SyntaxSet = syntax_set!(
Hash,
MathIdent,
FieldAccess,
Text,
MathShorthand,
Linebreak,

View File

@ -0,0 +1,29 @@
// Test math symbol edge cases.
--- math-symbol-basic ---
#let sym = symbol("s", ("basic", "s"))
#test($sym.basic$, $#"s"$)
--- math-symbol-underscore ---
#let sym = symbol("s", ("test_underscore", "s"))
// Error: 6-10 unknown symbol modifier
$sym.test_underscore$
--- math-symbol-dash ---
#let sym = symbol("s", ("test-dash", "s"))
// Error: 6-10 unknown symbol modifier
$sym.test-dash$
--- math-symbol-double ---
#let sym = symbol("s", ("test.basic", "s"))
#test($sym.test.basic$, $#"s"$)
--- math-symbol-double-underscore ---
#let sym = symbol("s", ("one.test_underscore", "s"))
// Error: 10-14 unknown symbol modifier
$sym.one.test_underscore$
--- math-symbol-double-dash ---
#let sym = symbol("s", ("one.test-dash", "s"))
// Error: 10-14 unknown symbol modifier
$sym.one.test-dash$