From 4d8a9863d78a9151e744ed8468a0463234cf06b9 Mon Sep 17 00:00:00 2001 From: T0mstone Date: Mon, 23 Jun 2025 15:34:25 +0200 Subject: [PATCH] Allow multi-character symbols/variants --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/typst-eval/src/markup.rs | 4 +- crates/typst-eval/src/math.rs | 2 +- crates/typst-layout/src/math/text.rs | 43 +++++---- .../typst-library/src/foundations/symbol.rs | 90 ++++++++++--------- crates/typst-library/src/math/accent.rs | 4 +- crates/typst-library/src/math/matrix.rs | 2 +- crates/typst-library/src/symbols.rs | 2 +- crates/typst-realize/src/lib.rs | 8 +- 10 files changed, 85 insertions(+), 74 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 550c4141a..509cfd0da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -413,7 +413,7 @@ dependencies = [ [[package]] name = "codex" version = "0.1.1" -source = "git+https://github.com/typst/codex?rev=a5428cb#a5428cb9c81a41354d44b44dbd5a16a710bbd928" +source = "git+https://github.com/typst/codex?rev=2f7efc3#2f7efc3b824632bcc917cebf4ae91caeca224fbc" [[package]] name = "color-print" diff --git a/Cargo.toml b/Cargo.toml index 6cc59ee89..00f1220fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,7 +47,7 @@ clap = { version = "4.4", features = ["derive", "env", "wrap_help"] } clap_complete = "4.2.1" clap_mangen = "0.2.10" codespan-reporting = "0.11" -codex = { git = "https://github.com/typst/codex", rev = "a5428cb" } +codex = { git = "https://github.com/typst/codex", rev = "2f7efc3" } color-print = "0.3.6" comemo = "0.4" csv = "1" diff --git a/crates/typst-eval/src/markup.rs b/crates/typst-eval/src/markup.rs index 9118ded56..6b5daf5b6 100644 --- a/crates/typst-eval/src/markup.rs +++ b/crates/typst-eval/src/markup.rs @@ -123,7 +123,7 @@ impl Eval for ast::Escape<'_> { type Output = Value; fn eval(self, _: &mut Vm) -> SourceResult { - Ok(Value::Symbol(Symbol::single(self.get()))) + Ok(Value::Symbol(Symbol::runtime_char(self.get()))) } } @@ -131,7 +131,7 @@ impl Eval for ast::Shorthand<'_> { type Output = Value; fn eval(self, _: &mut Vm) -> SourceResult { - Ok(Value::Symbol(Symbol::single(self.get()))) + Ok(Value::Symbol(Symbol::runtime_char(self.get()))) } } diff --git a/crates/typst-eval/src/math.rs b/crates/typst-eval/src/math.rs index 0e271a089..50e396212 100644 --- a/crates/typst-eval/src/math.rs +++ b/crates/typst-eval/src/math.rs @@ -49,7 +49,7 @@ impl Eval for ast::MathShorthand<'_> { type Output = Value; fn eval(self, _: &mut Vm) -> SourceResult { - Ok(Value::Symbol(Symbol::single(self.get()))) + Ok(Value::Symbol(Symbol::runtime_char(self.get()))) } } diff --git a/crates/typst-layout/src/math/text.rs b/crates/typst-layout/src/math/text.rs index 67dc0a2c8..0db9f0f1d 100644 --- a/crates/typst-layout/src/math/text.rs +++ b/crates/typst-layout/src/math/text.rs @@ -120,25 +120,32 @@ pub fn layout_symbol( // Switch dotless char to normal when we have the dtls OpenType feature. // This should happen before the main styling pass. let dtls = style_dtls(); - let (unstyled_c, symbol_styles) = match try_dotless(elem.text) { - Some(c) if has_dtls_feat(ctx.font) => (c, styles.chain(&dtls)), - _ => (elem.text, styles), - }; - let c = styled_char(styles, unstyled_c, true); - let fragment: MathFragment = - match GlyphFragment::new_char(ctx.font, symbol_styles, c, elem.span()) { - Ok(mut glyph) => { - adjust_glyph_layout(&mut glyph, ctx, styles); - glyph.into() - } - Err(_) => { - // Not in the math font, fallback to normal inline text layout. - // TODO: Should replace this with proper fallback in [`GlyphFragment::new`]. - layout_inline_text(c.encode_utf8(&mut [0; 4]), elem.span(), ctx, styles)? - .into() - } + for c in elem.text.chars() { + let (unstyled_c, symbol_styles) = match try_dotless(c) { + Some(c) if has_dtls_feat(ctx.font) => (c, styles.chain(&dtls)), + _ => (c, styles), }; - ctx.push(fragment); + let c = styled_char(styles, unstyled_c, true); + let fragment: MathFragment = + match GlyphFragment::new_char(ctx.font, symbol_styles, c, elem.span()) { + Ok(mut glyph) => { + adjust_glyph_layout(&mut glyph, ctx, styles); + glyph.into() + } + Err(_) => { + // Not in the math font, fallback to normal inline text layout. + // TODO: Should replace this with proper fallback in [`GlyphFragment::new`]. + layout_inline_text( + c.encode_utf8(&mut [0; 4]), + elem.span(), + ctx, + styles, + )? + .into() + } + }; + ctx.push(fragment); + } Ok(()) } diff --git a/crates/typst-library/src/foundations/symbol.rs b/crates/typst-library/src/foundations/symbol.rs index f57bb0c2a..84682bfe7 100644 --- a/crates/typst-library/src/foundations/symbol.rs +++ b/crates/typst-library/src/foundations/symbol.rs @@ -1,5 +1,5 @@ use std::collections::{BTreeSet, HashMap}; -use std::fmt::{self, Debug, Display, Formatter, Write}; +use std::fmt::{self, Debug, Display, Formatter}; use std::sync::Arc; use codex::ModifierSet; @@ -52,7 +52,7 @@ pub struct Symbol(Repr); #[derive(Clone, Eq, PartialEq, Hash)] enum Repr { /// A native symbol that has no named variant. - Single(char), + Single(&'static str), /// A native symbol with multiple named variants. Complex(&'static [Variant<&'static str>]), /// A symbol with multiple named variants, where some modifiers may have @@ -61,9 +61,9 @@ enum Repr { Modified(Arc<(List, ModifierSet)>), } -/// A symbol variant, consisting of a set of modifiers, a character, and an +/// A symbol variant, consisting of a set of modifiers, the variant's value, and an /// optional deprecation message. -type Variant = (ModifierSet, char, Option); +type Variant = (ModifierSet, S, Option); /// A collection of symbols. #[derive(Clone, Eq, PartialEq, Hash)] @@ -73,9 +73,9 @@ enum List { } impl Symbol { - /// Create a new symbol from a single character. - pub const fn single(c: char) -> Self { - Self(Repr::Single(c)) + /// Create a new symbol from a single value. + pub const fn single(value: &'static str) -> Self { + Self(Repr::Single(value)) } /// Create a symbol with a static variant list. @@ -85,6 +85,11 @@ impl Symbol { Self(Repr::Complex(list)) } + /// Create a symbol from a runtime char. + pub fn runtime_char(c: char) -> Self { + Self::runtime(Box::new([(ModifierSet::default(), c.into(), None)])) + } + /// Create a symbol with a runtime variant list. #[track_caller] pub fn runtime(list: Box<[Variant]>) -> Self { @@ -92,10 +97,10 @@ impl Symbol { Self(Repr::Modified(Arc::new((List::Runtime(list), ModifierSet::default())))) } - /// Get the symbol's character. - pub fn get(&self) -> char { + /// Get the symbol's value. + pub fn get(&self) -> &str { match &self.0 { - Repr::Single(c) => *c, + Repr::Single(value) => value, Repr::Complex(_) => ModifierSet::<&'static str>::default() .best_match_in(self.variants().map(|(m, c, _)| (m, c))) .unwrap(), @@ -108,27 +113,27 @@ impl Symbol { /// Try to get the function associated with the symbol, if any. pub fn func(&self) -> StrResult { match self.get() { - '⌈' => Ok(crate::math::ceil::func()), - '⌊' => Ok(crate::math::floor::func()), - '–' => Ok(crate::math::accent::dash::func()), - '⋅' | '\u{0307}' => Ok(crate::math::accent::dot::func()), - '¨' => Ok(crate::math::accent::dot_double::func()), - '\u{20db}' => Ok(crate::math::accent::dot_triple::func()), - '\u{20dc}' => Ok(crate::math::accent::dot_quad::func()), - '∼' => Ok(crate::math::accent::tilde::func()), - '´' => Ok(crate::math::accent::acute::func()), - '˝' => Ok(crate::math::accent::acute_double::func()), - '˘' => Ok(crate::math::accent::breve::func()), - 'ˇ' => Ok(crate::math::accent::caron::func()), - '^' => Ok(crate::math::accent::hat::func()), - '`' => Ok(crate::math::accent::grave::func()), - '¯' => Ok(crate::math::accent::macron::func()), - '○' => Ok(crate::math::accent::circle::func()), - '→' => Ok(crate::math::accent::arrow::func()), - '←' => Ok(crate::math::accent::arrow_l::func()), - '↔' => Ok(crate::math::accent::arrow_l_r::func()), - '⇀' => Ok(crate::math::accent::harpoon::func()), - '↼' => Ok(crate::math::accent::harpoon_lt::func()), + "⌈" => Ok(crate::math::ceil::func()), + "⌊" => Ok(crate::math::floor::func()), + "–" => Ok(crate::math::accent::dash::func()), + "⋅" | "\u{0307}" => Ok(crate::math::accent::dot::func()), + "¨" => Ok(crate::math::accent::dot_double::func()), + "\u{20db}" => Ok(crate::math::accent::dot_triple::func()), + "\u{20dc}" => Ok(crate::math::accent::dot_quad::func()), + "∼" => Ok(crate::math::accent::tilde::func()), + "´" => Ok(crate::math::accent::acute::func()), + "˝" => Ok(crate::math::accent::acute_double::func()), + "˘" => Ok(crate::math::accent::breve::func()), + "ˇ" => Ok(crate::math::accent::caron::func()), + "^" => Ok(crate::math::accent::hat::func()), + "`" => Ok(crate::math::accent::grave::func()), + "¯" => Ok(crate::math::accent::macron::func()), + "○" => Ok(crate::math::accent::circle::func()), + "→" => Ok(crate::math::accent::arrow::func()), + "←" => Ok(crate::math::accent::arrow_l::func()), + "↔" => Ok(crate::math::accent::arrow_l_r::func()), + "⇀" => Ok(crate::math::accent::harpoon::func()), + "↼" => Ok(crate::math::accent::harpoon_lt::func()), _ => bail!("symbol {self} is not callable"), } } @@ -163,7 +168,7 @@ impl Symbol { /// The characters that are covered by this symbol. pub fn variants(&self) -> impl Iterator> { match &self.0 { - Repr::Single(c) => Variants::Single(Some(*c).into_iter()), + Repr::Single(value) => Variants::Single(Some(*value).into_iter()), Repr::Complex(list) => Variants::Static(list.iter()), Repr::Modified(arc) => arc.0.variants(), } @@ -279,7 +284,7 @@ impl Symbol { impl Display for Symbol { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.write_char(self.get()) + f.write_str(self.get()) } } @@ -362,7 +367,7 @@ impl Serialize for Symbol { where S: Serializer, { - serializer.serialize_char(self.get()) + serializer.serialize_str(self.get()) } } @@ -377,11 +382,12 @@ impl List { } /// A value that can be cast to a symbol. -pub struct SymbolVariant(EcoString, char); +pub struct SymbolVariant(EcoString, EcoString); cast! { SymbolVariant, - c: char => Self(EcoString::new(), c), + c: char => Self(EcoString::new(), c.into()), + s: EcoString => Self(EcoString::new(), s), array: Array => { let mut iter = array.into_iter(); match (iter.next(), iter.next(), iter.next()) { @@ -393,7 +399,7 @@ cast! { /// Iterator over variants. enum Variants<'a> { - Single(std::option::IntoIter), + Single(std::option::IntoIter<&'static str>), Static(std::slice::Iter<'static, Variant<&'static str>>), Runtime(std::slice::Iter<'a, Variant>), } @@ -406,7 +412,7 @@ impl<'a> Iterator for Variants<'a> { Self::Single(iter) => Some((ModifierSet::default(), iter.next()?, None)), Self::Static(list) => list.next().copied(), Self::Runtime(list) => { - list.next().map(|(m, c, d)| (m.as_deref(), *c, d.as_deref())) + list.next().map(|(m, s, d)| (m.as_deref(), s.as_str(), d.as_deref())) } } } @@ -415,21 +421,21 @@ impl<'a> Iterator for Variants<'a> { /// A single character. #[elem(Repr, PlainText)] pub struct SymbolElem { - /// The symbol's character. + /// The symbol's value. #[required] - pub text: char, // This is called `text` for consistency with `TextElem`. + pub text: EcoString, // This is called `text` for consistency with `TextElem`. } impl SymbolElem { /// Create a new packed symbol element. - pub fn packed(text: impl Into) -> Content { + pub fn packed(text: impl Into) -> Content { Self::new(text.into()).pack() } } impl PlainText for Packed { fn plain_text(&self, text: &mut EcoString) { - text.push(self.text); + text.push_str(&self.text); } } diff --git a/crates/typst-library/src/math/accent.rs b/crates/typst-library/src/math/accent.rs index c8569ea23..e22fddd93 100644 --- a/crates/typst-library/src/math/accent.rs +++ b/crates/typst-library/src/math/accent.rs @@ -189,7 +189,7 @@ cast! { self => self.0.into_value(), v: char => Self::new(v), v: Content => match v.to_packed::() { - Some(elem) => Self::new(elem.text), - None => bail!("expected a symbol"), + Some(elem) if elem.text.chars().count() == 1 => Self::new(elem.text.chars().next().unwrap()), + _ => bail!("expected a single-character symbol"), }, } diff --git a/crates/typst-library/src/math/matrix.rs b/crates/typst-library/src/math/matrix.rs index b6c4654ed..823aa02c1 100644 --- a/crates/typst-library/src/math/matrix.rs +++ b/crates/typst-library/src/math/matrix.rs @@ -281,7 +281,7 @@ cast! { Delimiter, self => self.0.into_value(), _: NoneValue => Self::none(), - v: Symbol => Self::char(v.get())?, + v: Symbol => Self::char(v.get().parse::().map_err(|_| "symbol value is longer than one character")?)?, v: char => Self::char(v)?, } diff --git a/crates/typst-library/src/symbols.rs b/crates/typst-library/src/symbols.rs index 0588ace95..92f847e0b 100644 --- a/crates/typst-library/src/symbols.rs +++ b/crates/typst-library/src/symbols.rs @@ -39,7 +39,7 @@ impl From for Scope { impl From for Symbol { fn from(symbol: codex::Symbol) -> Self { match symbol { - codex::Symbol::Single(c) => Symbol::single(c), + codex::Symbol::Single(value) => Symbol::single(value), codex::Symbol::Multi(list) => Symbol::list(list), } } diff --git a/crates/typst-realize/src/lib.rs b/crates/typst-realize/src/lib.rs index 526f4631a..09090abc8 100644 --- a/crates/typst-realize/src/lib.rs +++ b/crates/typst-realize/src/lib.rs @@ -302,9 +302,7 @@ fn visit_kind_rules<'a>( // textual elements via `TEXTUAL` grouping. However, in math, this is // not desirable, so we just do it on a per-element basis. if let Some(elem) = content.to_packed::() { - if let Some(m) = - find_regex_match_in_str(elem.text.encode_utf8(&mut [0; 4]), styles) - { + if let Some(m) = find_regex_match_in_str(elem.text.as_str(), styles) { visit_regex_match(s, &[(content, styles)], m)?; return Ok(true); } @@ -325,7 +323,7 @@ fn visit_kind_rules<'a>( // Symbols in non-math content transparently convert to `TextElem` so we // don't have to handle them in non-math layout. if let Some(elem) = content.to_packed::() { - let mut text = TextElem::packed(elem.text).spanned(elem.span()); + let mut text = TextElem::packed(elem.text.clone()).spanned(elem.span()); if let Some(label) = elem.label() { text.set_label(label); } @@ -1240,7 +1238,7 @@ fn visit_regex_match<'a>( let len = if let Some(elem) = content.to_packed::() { elem.text.len() } else if let Some(elem) = content.to_packed::() { - elem.text.len_utf8() + elem.text.len() } else { 1 // The rest are Ascii, so just one byte. };