From 0b8b7d0f233f748a5c12d1b8e31f657803122eba Mon Sep 17 00:00:00 2001 From: Ian Wrzesinski Date: Sat, 20 Jul 2024 21:21:53 -0500 Subject: [PATCH] Just add MathText SyntaxKind --- crates/typst-eval/src/code.rs | 1 + crates/typst-eval/src/math.rs | 14 +++++++++++- crates/typst-syntax/src/ast.rs | 32 ++++++++++++++++++++++++++++ crates/typst-syntax/src/highlight.rs | 1 + crates/typst-syntax/src/kind.rs | 3 +++ crates/typst-syntax/src/lexer.rs | 9 +++++++- crates/typst-syntax/src/parser.rs | 14 ++++++------ crates/typst-syntax/src/set.rs | 1 + 8 files changed, 67 insertions(+), 8 deletions(-) diff --git a/crates/typst-eval/src/code.rs b/crates/typst-eval/src/code.rs index 34373fd4a..2baf4ea9e 100644 --- a/crates/typst-eval/src/code.rs +++ b/crates/typst-eval/src/code.rs @@ -99,6 +99,7 @@ impl Eval for ast::Expr<'_> { Self::Term(v) => v.eval(vm).map(Value::Content), Self::Equation(v) => v.eval(vm).map(Value::Content), Self::Math(v) => v.eval(vm).map(Value::Content), + Self::MathText(v) => v.eval(vm).map(Value::Content), Self::MathIdent(v) => v.eval(vm), Self::MathShorthand(v) => v.eval(vm), Self::MathAlignPoint(v) => v.eval(vm).map(Value::Content), diff --git a/crates/typst-eval/src/math.rs b/crates/typst-eval/src/math.rs index 51dc0a3d5..f93f147eb 100644 --- a/crates/typst-eval/src/math.rs +++ b/crates/typst-eval/src/math.rs @@ -5,7 +5,7 @@ use typst_library::math::{ AlignPointElem, AttachElem, FracElem, LrElem, PrimesElem, RootElem, }; use typst_library::text::TextElem; -use typst_syntax::ast::{self, AstNode}; +use typst_syntax::ast::{self, AstNode, MathTextKind}; use crate::{Eval, Vm}; @@ -20,6 +20,18 @@ impl Eval for ast::Math<'_> { } } +impl Eval for ast::MathText<'_> { + type Output = Content; + + fn eval(self, _: &mut Vm) -> SourceResult { + match self.get() { + // TODO: change to `SymbolElem` when added + MathTextKind::Character(c) => Ok(Value::Symbol(Symbol::single(c)).display()), + MathTextKind::Number(text) => Ok(TextElem::packed(text.clone())), + } + } +} + impl Eval for ast::MathIdent<'_> { type Output = Value; diff --git a/crates/typst-syntax/src/ast.rs b/crates/typst-syntax/src/ast.rs index 19e123727..014e8392e 100644 --- a/crates/typst-syntax/src/ast.rs +++ b/crates/typst-syntax/src/ast.rs @@ -123,6 +123,8 @@ pub enum Expr<'a> { Equation(Equation<'a>), /// The contents of a mathematical equation: `x^2 + 1`. Math(Math<'a>), + /// A lone text fragment in math: `x`, `25`, `3.1415`, `=`, `[`. + MathText(MathText<'a>), /// An identifier in math: `pi`. MathIdent(MathIdent<'a>), /// A shorthand for a unicode codepoint in math: `a <= b`. @@ -233,6 +235,7 @@ impl<'a> AstNode<'a> for Expr<'a> { SyntaxKind::TermItem => node.cast().map(Self::Term), SyntaxKind::Equation => node.cast().map(Self::Equation), SyntaxKind::Math => node.cast().map(Self::Math), + SyntaxKind::MathText => node.cast().map(Self::MathText), SyntaxKind::MathIdent => node.cast().map(Self::MathIdent), SyntaxKind::MathShorthand => node.cast().map(Self::MathShorthand), SyntaxKind::MathAlignPoint => node.cast().map(Self::MathAlignPoint), @@ -297,6 +300,7 @@ impl<'a> AstNode<'a> for Expr<'a> { Self::Term(v) => v.to_untyped(), Self::Equation(v) => v.to_untyped(), Self::Math(v) => v.to_untyped(), + Self::MathText(v) => v.to_untyped(), Self::MathIdent(v) => v.to_untyped(), Self::MathShorthand(v) => v.to_untyped(), Self::MathAlignPoint(v) => v.to_untyped(), @@ -706,6 +710,34 @@ impl<'a> Math<'a> { } } +node! { + /// A lone text fragment in math: `x`, `25`, `3.1415`, `=`, `[`. + MathText +} + +/// The underlying text kind. +pub enum MathTextKind<'a> { + Character(char), + Number(&'a EcoString), +} + +impl<'a> MathText<'a> { + /// Return the underlying text. + pub fn get(self) -> MathTextKind<'a> { + let text = self.0.text(); + let mut chars = text.chars(); + let c = chars.next().unwrap(); + if c.is_numeric() { + // Numbers are potentially grouped as multiple characters. This is + // done in `Lexer::math_text()`. + MathTextKind::Number(text) + } else { + assert!(chars.next().is_none()); + MathTextKind::Character(c) + } + } +} + node! { /// An identifier in math: `pi`. MathIdent diff --git a/crates/typst-syntax/src/highlight.rs b/crates/typst-syntax/src/highlight.rs index c59a03384..cd815694d 100644 --- a/crates/typst-syntax/src/highlight.rs +++ b/crates/typst-syntax/src/highlight.rs @@ -171,6 +171,7 @@ pub fn highlight(node: &LinkedNode) -> Option { SyntaxKind::Equation => None, SyntaxKind::Math => None, + SyntaxKind::MathText => None, SyntaxKind::MathIdent => highlight_ident(node), SyntaxKind::MathShorthand => Some(Tag::Escape), SyntaxKind::MathAlignPoint => Some(Tag::MathOperator), diff --git a/crates/typst-syntax/src/kind.rs b/crates/typst-syntax/src/kind.rs index b4a97a3e0..c24b47fe7 100644 --- a/crates/typst-syntax/src/kind.rs +++ b/crates/typst-syntax/src/kind.rs @@ -75,6 +75,8 @@ pub enum SyntaxKind { /// The contents of a mathematical equation: `x^2 + 1`. Math, + /// A lone text fragment in math: `x`, `25`, `3.1415`, `=`, `|`, `[`. + MathText, /// An identifier in math: `pi`. MathIdent, /// A shorthand for a unicode codepoint in math: `a <= b`. @@ -408,6 +410,7 @@ impl SyntaxKind { Self::TermMarker => "term marker", Self::Equation => "equation", Self::Math => "math", + Self::MathText => "math text", Self::MathIdent => "math identifier", Self::MathShorthand => "math shorthand", Self::MathAlignPoint => "math alignment point", diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index 17401044f..b8f2bf25f 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -685,6 +685,7 @@ impl Lexer<'_> { if s.eat_if('.') && !s.eat_while(char::is_numeric).is_empty() { self.s = s; } + SyntaxKind::MathText } else { let len = self .s @@ -693,8 +694,14 @@ impl Lexer<'_> { .next() .map_or(0, str::len); self.s.jump(start + len); + if len > c.len_utf8() { + // Grapheme clusters are treated as normal text and stay grouped + // This may need to change in the future. + SyntaxKind::Text + } else { + SyntaxKind::MathText + } } - SyntaxKind::Text } /// Handle named arguments in math function call. diff --git a/crates/typst-syntax/src/parser.rs b/crates/typst-syntax/src/parser.rs index 5de71cafc..55d5550b6 100644 --- a/crates/typst-syntax/src/parser.rs +++ b/crates/typst-syntax/src/parser.rs @@ -252,7 +252,9 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { continuable = true; p.eat(); // Parse a function call for an identifier or field access. - if min_prec < 3 && p.directly_at(SyntaxKind::Text) && p.current_text() == "(" + if min_prec < 3 + && p.directly_at(SyntaxKind::MathText) + && p.current_text() == "(" { math_args(p); p.wrap(m, SyntaxKind::FuncCall); @@ -264,10 +266,10 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { | SyntaxKind::Comma | SyntaxKind::Semicolon | SyntaxKind::RightParen => { - p.convert_and_eat(SyntaxKind::Text); + p.convert_and_eat(SyntaxKind::MathText); } - SyntaxKind::Text | SyntaxKind::MathShorthand => { + SyntaxKind::Text | SyntaxKind::MathText | SyntaxKind::MathShorthand => { continuable = matches!( math_class(p.current_text()), None | Some(MathClass::Alphabetic) @@ -316,7 +318,7 @@ fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) { let mut primed = false; while !p.end() && !p.at(stop) { - if p.directly_at(SyntaxKind::Text) && p.current_text() == "!" { + if p.directly_at(SyntaxKind::MathText) && p.current_text() == "!" { p.eat(); p.wrap(m, SyntaxKind::Math); continue; @@ -414,7 +416,7 @@ fn math_delimited(p: &mut Parser) { // We could be at the shorthand `|]`, which shouldn't be converted // to a `Text` kind. if p.at(SyntaxKind::RightParen) { - p.convert_and_eat(SyntaxKind::Text); + p.convert_and_eat(SyntaxKind::MathText); } else { p.eat(); } @@ -535,7 +537,7 @@ fn math_arg<'s>(p: &mut Parser<'s>, seen: &mut HashSet<&'s str>) -> bool { } let mut positional = true; - if p.at_set(syntax_set!(Text, MathIdent, Underscore)) { + if p.at_set(syntax_set!(MathText, MathIdent, Underscore)) { // Parses a named argument: `thickness: #12pt`. if let Some(named) = p.lexer.maybe_math_named_arg(start) { p.token.node = named; diff --git a/crates/typst-syntax/src/set.rs b/crates/typst-syntax/src/set.rs index 9eb457b84..a7b9a594a 100644 --- a/crates/typst-syntax/src/set.rs +++ b/crates/typst-syntax/src/set.rs @@ -64,6 +64,7 @@ pub const MATH_EXPR: SyntaxSet = syntax_set!( Semicolon, RightParen, Text, + MathText, MathShorthand, Linebreak, MathAlignPoint,