From 88d86714a1e8c2f9ef8b77d4bcf7d44fa4e4dd26 Mon Sep 17 00:00:00 2001 From: Ian Wrzesinski Date: Mon, 21 Oct 2024 22:18:23 -0400 Subject: [PATCH] 9. Parse math field access in the lexer --- crates/typst-syntax/src/lexer.rs | 41 ++++++++++++++++++++++++++++--- crates/typst-syntax/src/parser.rs | 20 +++------------ crates/typst-syntax/src/set.rs | 1 + tests/suite/math/symbols.typ | 29 ++++++++++++++++++++++ 4 files changed, 71 insertions(+), 20 deletions(-) create mode 100644 tests/suite/math/symbols.typ diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index d2173f505..4a43c15ff 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -109,7 +109,10 @@ impl Lexer<'_> { Some('`') if self.mode != LexMode::Math => return self.raw(), Some(c) => match self.mode { LexMode::Markup => self.markup(start, c), - LexMode::Math => self.math(start, c), + LexMode::Math => match self.math(start, c) { + (kind, None) => kind, + (kind, Some(node)) => return (kind, node), + }, LexMode::Code => self.code(start, c), }, @@ -507,8 +510,8 @@ impl Lexer<'_> { /// Math. impl Lexer<'_> { - fn math(&mut self, start: usize, c: char) -> SyntaxKind { - match c { + fn math(&mut self, start: usize, c: char) -> (SyntaxKind, Option) { + let kind = match c { '\\' => self.backslash(), '"' => self.string(), @@ -561,11 +564,41 @@ impl Lexer<'_> { // Identifiers. c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { self.s.eat_while(is_math_id_continue); - SyntaxKind::MathIdent + let (kind, node) = self.math_ident_or_field(start); + return (kind, Some(node)); } // Other math atoms. _ => self.math_text(start, c), + }; + (kind, None) + } + + /// Parse a single `MathIdent` or an entire `FieldAccess`. + fn math_ident_or_field(&mut self, start: usize) -> (SyntaxKind, SyntaxNode) { + let mut kind = SyntaxKind::MathIdent; + let mut node = SyntaxNode::leaf(kind, self.s.from(start)); + while let Some(ident) = self.maybe_dot_ident() { + kind = SyntaxKind::FieldAccess; + let field_children = vec![ + node, + SyntaxNode::leaf(SyntaxKind::Dot, '.'), + SyntaxNode::leaf(SyntaxKind::Ident, ident), + ]; + node = SyntaxNode::inner(kind, field_children); + } + (kind, node) + } + + /// If at a dot and a math identifier, eat and return the identifier. + fn maybe_dot_ident(&mut self) -> Option<&str> { + if self.s.scout(1).is_some_and(is_math_id_start) && self.s.eat_if('.') { + let ident_start = self.s.cursor(); + self.s.eat(); + self.s.eat_while(is_math_id_continue); + Some(self.s.from(ident_start)) + } else { + None } } diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index 6fd0878df..be065ca60 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -6,9 +6,7 @@ use ecow::{eco_format, EcoString}; use unicode_math_class::MathClass; use crate::set::{syntax_set, SyntaxSet}; -use crate::{ - ast, is_ident, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode, -}; +use crate::{ast, is_newline, set, LexMode, Lexer, SyntaxError, SyntaxKind, SyntaxNode}; /// Parses a source file as top-level markup. pub fn parse(text: &str) -> SyntaxNode { @@ -261,21 +259,11 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { let mut continuable = false; match p.current() { SyntaxKind::Hash => embedded_code_expr(p), - SyntaxKind::MathIdent => { + // The lexer manages creating full FieldAccess nodes if needed. + SyntaxKind::MathIdent | SyntaxKind::FieldAccess => { continuable = true; p.eat(); - while p.directly_at(SyntaxKind::Text) && p.current_text() == "." && { - let mut copy = p.lexer.clone(); - let start = copy.cursor(); - let next = copy.next().0; - let end = copy.cursor(); - matches!(next, SyntaxKind::MathIdent | SyntaxKind::Text) - && is_ident(&p.text[start..end]) - } { - p.convert_and_eat(SyntaxKind::Dot); - p.convert_and_eat(SyntaxKind::Ident); - p.wrap(m, SyntaxKind::FieldAccess); - } + // Parse a function call for an identifier or field access. if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "(" { math_args(p); diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index f3f1ba240..014aaf2f7 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -58,6 +58,7 @@ pub const STMT: SyntaxSet = syntax_set!(Let, Set, Show, Import, Include, Return) pub const MATH_EXPR: SyntaxSet = syntax_set!( Hash, MathIdent, + FieldAccess, Text, MathShorthand, Linebreak, diff --git a/tests/suite/math/symbols.typ b/tests/suite/math/symbols.typ new file mode 100644 index 000000000..65a483162 --- /dev/null +++ b/tests/suite/math/symbols.typ @@ -0,0 +1,29 @@ +// Test math symbol edge cases. + +--- math-symbol-basic --- +#let sym = symbol("s", ("basic", "s")) +#test($sym.basic$, $#"s"$) + +--- math-symbol-underscore --- +#let sym = symbol("s", ("test_underscore", "s")) +// Error: 6-10 unknown symbol modifier +$sym.test_underscore$ + +--- math-symbol-dash --- +#let sym = symbol("s", ("test-dash", "s")) +// Error: 6-10 unknown symbol modifier +$sym.test-dash$ + +--- math-symbol-double --- +#let sym = symbol("s", ("test.basic", "s")) +#test($sym.test.basic$, $#"s"$) + +--- math-symbol-double-underscore --- +#let sym = symbol("s", ("one.test_underscore", "s")) +// Error: 10-14 unknown symbol modifier +$sym.one.test_underscore$ + +--- math-symbol-double-dash --- +#let sym = symbol("s", ("one.test-dash", "s")) +// Error: 10-14 unknown symbol modifier +$sym.one.test-dash$