Allow multi-character symbols/variants

This commit is contained in:
T0mstone 2025-06-23 15:34:25 +02:00
parent 74b1b10986
commit 4d8a9863d7
10 changed files with 85 additions and 74 deletions

2
Cargo.lock generated
View File

@ -413,7 +413,7 @@ dependencies = [
[[package]]
name = "codex"
version = "0.1.1"
source = "git+https://github.com/typst/codex?rev=a5428cb#a5428cb9c81a41354d44b44dbd5a16a710bbd928"
source = "git+https://github.com/typst/codex?rev=2f7efc3#2f7efc3b824632bcc917cebf4ae91caeca224fbc"
[[package]]
name = "color-print"

View File

@ -47,7 +47,7 @@ clap = { version = "4.4", features = ["derive", "env", "wrap_help"] }
clap_complete = "4.2.1"
clap_mangen = "0.2.10"
codespan-reporting = "0.11"
codex = { git = "https://github.com/typst/codex", rev = "a5428cb" }
codex = { git = "https://github.com/typst/codex", rev = "2f7efc3" }
color-print = "0.3.6"
comemo = "0.4"
csv = "1"

View File

@ -123,7 +123,7 @@ impl Eval for ast::Escape<'_> {
type Output = Value;
fn eval(self, _: &mut Vm) -> SourceResult<Self::Output> {
Ok(Value::Symbol(Symbol::single(self.get())))
Ok(Value::Symbol(Symbol::runtime_char(self.get())))
}
}
@ -131,7 +131,7 @@ impl Eval for ast::Shorthand<'_> {
type Output = Value;
fn eval(self, _: &mut Vm) -> SourceResult<Self::Output> {
Ok(Value::Symbol(Symbol::single(self.get())))
Ok(Value::Symbol(Symbol::runtime_char(self.get())))
}
}

View File

@ -49,7 +49,7 @@ impl Eval for ast::MathShorthand<'_> {
type Output = Value;
fn eval(self, _: &mut Vm) -> SourceResult<Self::Output> {
Ok(Value::Symbol(Symbol::single(self.get())))
Ok(Value::Symbol(Symbol::runtime_char(self.get())))
}
}

View File

@ -120,25 +120,32 @@ pub fn layout_symbol(
// Switch dotless char to normal when we have the dtls OpenType feature.
// This should happen before the main styling pass.
let dtls = style_dtls();
let (unstyled_c, symbol_styles) = match try_dotless(elem.text) {
Some(c) if has_dtls_feat(ctx.font) => (c, styles.chain(&dtls)),
_ => (elem.text, styles),
};
let c = styled_char(styles, unstyled_c, true);
let fragment: MathFragment =
match GlyphFragment::new_char(ctx.font, symbol_styles, c, elem.span()) {
Ok(mut glyph) => {
adjust_glyph_layout(&mut glyph, ctx, styles);
glyph.into()
}
Err(_) => {
// Not in the math font, fallback to normal inline text layout.
// TODO: Should replace this with proper fallback in [`GlyphFragment::new`].
layout_inline_text(c.encode_utf8(&mut [0; 4]), elem.span(), ctx, styles)?
.into()
}
for c in elem.text.chars() {
let (unstyled_c, symbol_styles) = match try_dotless(c) {
Some(c) if has_dtls_feat(ctx.font) => (c, styles.chain(&dtls)),
_ => (c, styles),
};
ctx.push(fragment);
let c = styled_char(styles, unstyled_c, true);
let fragment: MathFragment =
match GlyphFragment::new_char(ctx.font, symbol_styles, c, elem.span()) {
Ok(mut glyph) => {
adjust_glyph_layout(&mut glyph, ctx, styles);
glyph.into()
}
Err(_) => {
// Not in the math font, fallback to normal inline text layout.
// TODO: Should replace this with proper fallback in [`GlyphFragment::new`].
layout_inline_text(
c.encode_utf8(&mut [0; 4]),
elem.span(),
ctx,
styles,
)?
.into()
}
};
ctx.push(fragment);
}
Ok(())
}

View File

@ -1,5 +1,5 @@
use std::collections::{BTreeSet, HashMap};
use std::fmt::{self, Debug, Display, Formatter, Write};
use std::fmt::{self, Debug, Display, Formatter};
use std::sync::Arc;
use codex::ModifierSet;
@ -52,7 +52,7 @@ pub struct Symbol(Repr);
#[derive(Clone, Eq, PartialEq, Hash)]
enum Repr {
/// A native symbol that has no named variant.
Single(char),
Single(&'static str),
/// A native symbol with multiple named variants.
Complex(&'static [Variant<&'static str>]),
/// A symbol with multiple named variants, where some modifiers may have
@ -61,9 +61,9 @@ enum Repr {
Modified(Arc<(List, ModifierSet<EcoString>)>),
}
/// A symbol variant, consisting of a set of modifiers, a character, and an
/// A symbol variant, consisting of a set of modifiers, the variant's value, and an
/// optional deprecation message.
type Variant<S> = (ModifierSet<S>, char, Option<S>);
type Variant<S> = (ModifierSet<S>, S, Option<S>);
/// A collection of symbols.
#[derive(Clone, Eq, PartialEq, Hash)]
@ -73,9 +73,9 @@ enum List {
}
impl Symbol {
/// Create a new symbol from a single character.
pub const fn single(c: char) -> Self {
Self(Repr::Single(c))
/// Create a new symbol from a single value.
pub const fn single(value: &'static str) -> Self {
Self(Repr::Single(value))
}
/// Create a symbol with a static variant list.
@ -85,6 +85,11 @@ impl Symbol {
Self(Repr::Complex(list))
}
/// Create a symbol from a runtime char.
pub fn runtime_char(c: char) -> Self {
Self::runtime(Box::new([(ModifierSet::default(), c.into(), None)]))
}
/// Create a symbol with a runtime variant list.
#[track_caller]
pub fn runtime(list: Box<[Variant<EcoString>]>) -> Self {
@ -92,10 +97,10 @@ impl Symbol {
Self(Repr::Modified(Arc::new((List::Runtime(list), ModifierSet::default()))))
}
/// Get the symbol's character.
pub fn get(&self) -> char {
/// Get the symbol's value.
pub fn get(&self) -> &str {
match &self.0 {
Repr::Single(c) => *c,
Repr::Single(value) => value,
Repr::Complex(_) => ModifierSet::<&'static str>::default()
.best_match_in(self.variants().map(|(m, c, _)| (m, c)))
.unwrap(),
@ -108,27 +113,27 @@ impl Symbol {
/// Try to get the function associated with the symbol, if any.
pub fn func(&self) -> StrResult<Func> {
match self.get() {
'⌈' => Ok(crate::math::ceil::func()),
'⌊' => Ok(crate::math::floor::func()),
'' => Ok(crate::math::accent::dash::func()),
'⋅' | '\u{0307}' => Ok(crate::math::accent::dot::func()),
'¨' => Ok(crate::math::accent::dot_double::func()),
'\u{20db}' => Ok(crate::math::accent::dot_triple::func()),
'\u{20dc}' => Ok(crate::math::accent::dot_quad::func()),
'' => Ok(crate::math::accent::tilde::func()),
'´' => Ok(crate::math::accent::acute::func()),
'˝' => Ok(crate::math::accent::acute_double::func()),
'˘' => Ok(crate::math::accent::breve::func()),
'ˇ' => Ok(crate::math::accent::caron::func()),
'^' => Ok(crate::math::accent::hat::func()),
'`' => Ok(crate::math::accent::grave::func()),
'¯' => Ok(crate::math::accent::macron::func()),
'○' => Ok(crate::math::accent::circle::func()),
'→' => Ok(crate::math::accent::arrow::func()),
'←' => Ok(crate::math::accent::arrow_l::func()),
'↔' => Ok(crate::math::accent::arrow_l_r::func()),
'⇀' => Ok(crate::math::accent::harpoon::func()),
'↼' => Ok(crate::math::accent::harpoon_lt::func()),
"" => Ok(crate::math::ceil::func()),
"" => Ok(crate::math::floor::func()),
"" => Ok(crate::math::accent::dash::func()),
"" | "\u{0307}" => Ok(crate::math::accent::dot::func()),
"¨" => Ok(crate::math::accent::dot_double::func()),
"\u{20db}" => Ok(crate::math::accent::dot_triple::func()),
"\u{20dc}" => Ok(crate::math::accent::dot_quad::func()),
"" => Ok(crate::math::accent::tilde::func()),
"´" => Ok(crate::math::accent::acute::func()),
"˝" => Ok(crate::math::accent::acute_double::func()),
"˘" => Ok(crate::math::accent::breve::func()),
"ˇ" => Ok(crate::math::accent::caron::func()),
"^" => Ok(crate::math::accent::hat::func()),
"`" => Ok(crate::math::accent::grave::func()),
"¯" => Ok(crate::math::accent::macron::func()),
"" => Ok(crate::math::accent::circle::func()),
"" => Ok(crate::math::accent::arrow::func()),
"" => Ok(crate::math::accent::arrow_l::func()),
"" => Ok(crate::math::accent::arrow_l_r::func()),
"" => Ok(crate::math::accent::harpoon::func()),
"" => Ok(crate::math::accent::harpoon_lt::func()),
_ => bail!("symbol {self} is not callable"),
}
}
@ -163,7 +168,7 @@ impl Symbol {
/// The characters that are covered by this symbol.
pub fn variants(&self) -> impl Iterator<Item = Variant<&str>> {
match &self.0 {
Repr::Single(c) => Variants::Single(Some(*c).into_iter()),
Repr::Single(value) => Variants::Single(Some(*value).into_iter()),
Repr::Complex(list) => Variants::Static(list.iter()),
Repr::Modified(arc) => arc.0.variants(),
}
@ -279,7 +284,7 @@ impl Symbol {
impl Display for Symbol {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.write_char(self.get())
f.write_str(self.get())
}
}
@ -362,7 +367,7 @@ impl Serialize for Symbol {
where
S: Serializer,
{
serializer.serialize_char(self.get())
serializer.serialize_str(self.get())
}
}
@ -377,11 +382,12 @@ impl List {
}
/// A value that can be cast to a symbol.
pub struct SymbolVariant(EcoString, char);
pub struct SymbolVariant(EcoString, EcoString);
cast! {
SymbolVariant,
c: char => Self(EcoString::new(), c),
c: char => Self(EcoString::new(), c.into()),
s: EcoString => Self(EcoString::new(), s),
array: Array => {
let mut iter = array.into_iter();
match (iter.next(), iter.next(), iter.next()) {
@ -393,7 +399,7 @@ cast! {
/// Iterator over variants.
enum Variants<'a> {
Single(std::option::IntoIter<char>),
Single(std::option::IntoIter<&'static str>),
Static(std::slice::Iter<'static, Variant<&'static str>>),
Runtime(std::slice::Iter<'a, Variant<EcoString>>),
}
@ -406,7 +412,7 @@ impl<'a> Iterator for Variants<'a> {
Self::Single(iter) => Some((ModifierSet::default(), iter.next()?, None)),
Self::Static(list) => list.next().copied(),
Self::Runtime(list) => {
list.next().map(|(m, c, d)| (m.as_deref(), *c, d.as_deref()))
list.next().map(|(m, s, d)| (m.as_deref(), s.as_str(), d.as_deref()))
}
}
}
@ -415,21 +421,21 @@ impl<'a> Iterator for Variants<'a> {
/// A single character.
#[elem(Repr, PlainText)]
pub struct SymbolElem {
/// The symbol's character.
/// The symbol's value.
#[required]
pub text: char, // This is called `text` for consistency with `TextElem`.
pub text: EcoString, // This is called `text` for consistency with `TextElem`.
}
impl SymbolElem {
/// Create a new packed symbol element.
pub fn packed(text: impl Into<char>) -> Content {
pub fn packed(text: impl Into<EcoString>) -> Content {
Self::new(text.into()).pack()
}
}
impl PlainText for Packed<SymbolElem> {
fn plain_text(&self, text: &mut EcoString) {
text.push(self.text);
text.push_str(&self.text);
}
}

View File

@ -189,7 +189,7 @@ cast! {
self => self.0.into_value(),
v: char => Self::new(v),
v: Content => match v.to_packed::<SymbolElem>() {
Some(elem) => Self::new(elem.text),
None => bail!("expected a symbol"),
Some(elem) if elem.text.chars().count() == 1 => Self::new(elem.text.chars().next().unwrap()),
_ => bail!("expected a single-character symbol"),
},
}

View File

@ -281,7 +281,7 @@ cast! {
Delimiter,
self => self.0.into_value(),
_: NoneValue => Self::none(),
v: Symbol => Self::char(v.get())?,
v: Symbol => Self::char(v.get().parse::<char>().map_err(|_| "symbol value is longer than one character")?)?,
v: char => Self::char(v)?,
}

View File

@ -39,7 +39,7 @@ impl From<codex::Module> for Scope {
impl From<codex::Symbol> for Symbol {
fn from(symbol: codex::Symbol) -> Self {
match symbol {
codex::Symbol::Single(c) => Symbol::single(c),
codex::Symbol::Single(value) => Symbol::single(value),
codex::Symbol::Multi(list) => Symbol::list(list),
}
}

View File

@ -302,9 +302,7 @@ fn visit_kind_rules<'a>(
// textual elements via `TEXTUAL` grouping. However, in math, this is
// not desirable, so we just do it on a per-element basis.
if let Some(elem) = content.to_packed::<SymbolElem>() {
if let Some(m) =
find_regex_match_in_str(elem.text.encode_utf8(&mut [0; 4]), styles)
{
if let Some(m) = find_regex_match_in_str(elem.text.as_str(), styles) {
visit_regex_match(s, &[(content, styles)], m)?;
return Ok(true);
}
@ -325,7 +323,7 @@ fn visit_kind_rules<'a>(
// Symbols in non-math content transparently convert to `TextElem` so we
// don't have to handle them in non-math layout.
if let Some(elem) = content.to_packed::<SymbolElem>() {
let mut text = TextElem::packed(elem.text).spanned(elem.span());
let mut text = TextElem::packed(elem.text.clone()).spanned(elem.span());
if let Some(label) = elem.label() {
text.set_label(label);
}
@ -1240,7 +1238,7 @@ fn visit_regex_match<'a>(
let len = if let Some(elem) = content.to_packed::<TextElem>() {
elem.text.len()
} else if let Some(elem) = content.to_packed::<SymbolElem>() {
elem.text.len_utf8()
elem.text.len()
} else {
1 // The rest are Ascii, so just one byte.
};