Max 899de6d5d5
Use ICU data to check if accent is bottom (#6393)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
2025-06-24 10:03:10 +00:00

196 lines
6.3 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::sync::LazyLock;
use icu_properties::maps::CodePointMapData;
use icu_properties::CanonicalCombiningClass;
use icu_provider::AsDeserializingBufferProvider;
use icu_provider_blob::BlobDataProvider;
use crate::diag::bail;
use crate::foundations::{cast, elem, func, Content, NativeElement, SymbolElem};
use crate::layout::{Length, Rel};
use crate::math::Mathy;
/// Attaches an accent to a base.
///
/// # Example
/// ```example
/// $grave(a) = accent(a, `)$ \
/// $arrow(a) = accent(a, arrow)$ \
/// $tilde(a) = accent(a, \u{0303})$
/// ```
#[elem(Mathy)]
pub struct AccentElem {
/// The base to which the accent is applied. May consist of multiple
/// letters.
///
/// ```example
/// $arrow(A B C)$
/// ```
#[required]
pub base: Content,
/// The accent to apply to the base.
///
/// Supported accents include:
///
/// | Accent | Name | Codepoint |
/// | ------------- | --------------- | --------- |
/// | Grave | `grave` | <code>&DiacriticalGrave;</code> |
/// | Acute | `acute` | `´` |
/// | Circumflex | `hat` | `^` |
/// | Tilde | `tilde` | `~` |
/// | Macron | `macron` | `¯` |
/// | Dash | `dash` | `‾` |
/// | Breve | `breve` | `˘` |
/// | Dot | `dot` | `.` |
/// | Double dot, Diaeresis | `dot.double`, `diaer` | `¨` |
/// | Triple dot | `dot.triple` | <code>&tdot;</code> |
/// | Quadruple dot | `dot.quad` | <code>&DotDot;</code> |
/// | Circle | `circle` | `∘` |
/// | Double acute | `acute.double` | `˝` |
/// | Caron | `caron` | `ˇ` |
/// | Right arrow | `arrow`, `->` | `→` |
/// | Left arrow | `arrow.l`, `<-` | `←` |
/// | Left/Right arrow | `arrow.l.r` | `↔` |
/// | Right harpoon | `harpoon` | `⇀` |
/// | Left harpoon | `harpoon.lt` | `↼` |
#[required]
pub accent: Accent,
/// The size of the accent, relative to the width of the base.
///
/// ```example
/// $dash(A, size: #150%)$
/// ```
#[resolve]
#[default(Rel::one())]
pub size: Rel<Length>,
/// Whether to remove the dot on top of lowercase i and j when adding a top
/// accent.
///
/// This enables the `dtls` OpenType feature.
///
/// ```example
/// $hat(dotless: #false, i)$
/// ```
#[default(true)]
pub dotless: bool,
}
/// An accent character.
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)]
pub struct Accent(pub char);
impl Accent {
/// Normalize a character into an accent.
pub fn new(c: char) -> Self {
Self(Self::combine(c).unwrap_or(c))
}
/// Whether this accent is a bottom accent or not.
pub fn is_bottom(&self) -> bool {
static COMBINING_CLASS_DATA: LazyLock<CodePointMapData<CanonicalCombiningClass>> =
LazyLock::new(|| {
icu_properties::maps::load_canonical_combining_class(
&BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU)
.unwrap()
.as_deserializing(),
)
.unwrap()
});
matches!(
COMBINING_CLASS_DATA.as_borrowed().get(self.0),
CanonicalCombiningClass::Below
)
}
}
/// This macro generates accent-related functions.
///
/// ```ignore
/// accents! {
/// '\u{0300}' | '`' => grave,
/// // ^^^^^^^^^ ^^^ ^^^^^
/// // | | |
/// // | | +-- The name of the function.
/// // | +--------- The alternative characters that represent the accent.
/// // +---------------------- The primary character that represents the accent.
/// }
/// ```
///
/// When combined with the `Accent::combine` function, accent characters can be normalized
/// to the primary character.
macro_rules! accents {
($($primary:literal $(| $alt:literal)* => $name:ident),* $(,)?) => {
impl Accent {
/// Normalize an accent to a combining one.
pub fn combine(c: char) -> Option<char> {
Some(match c {
$($primary $(| $alt)* => $primary,)*
_ => return None,
})
}
}
$(
/// The accent function for callable symbol definitions.
#[func]
pub fn $name(
/// The base to which the accent is applied.
base: Content,
/// The size of the accent, relative to the width of the base.
#[named]
size: Option<Rel<Length>>,
/// Whether to remove the dot on top of lowercase i and j when
/// adding a top accent.
#[named]
dotless: Option<bool>,
) -> Content {
let mut accent = AccentElem::new(base, Accent::new($primary));
if let Some(size) = size {
accent = accent.with_size(size);
}
if let Some(dotless) = dotless {
accent = accent.with_dotless(dotless);
}
accent.pack()
}
)+
};
}
// Keep it synced with the documenting table above.
accents! {
'\u{0300}' | '`' => grave,
'\u{0301}' | '´' => acute,
'\u{0302}' | '^' | 'ˆ' => hat,
'\u{0303}' | '~' | '' | '˜' => tilde,
'\u{0304}' | '¯' => macron,
'\u{0305}' | '-' | '‾' | '' => dash,
'\u{0306}' | '˘' => breve,
'\u{0307}' | '.' | '˙' | '⋅' => dot,
'\u{0308}' | '¨' => dot_double,
'\u{20db}' => dot_triple,
'\u{20dc}' => dot_quad,
'\u{030a}' | '∘' | '○' => circle,
'\u{030b}' | '˝' => acute_double,
'\u{030c}' | 'ˇ' => caron,
'\u{20d6}' | '←' => arrow_l,
'\u{20d7}' | '→' | '⟶' => arrow,
'\u{20e1}' | '↔' | '⟷' => arrow_l_r,
'\u{20d0}' | '↼' => harpoon_lt,
'\u{20d1}' | '⇀' => harpoon,
}
cast! {
Accent,
self => self.0.into_value(),
v: char => Self::new(v),
v: Content => match v.to_packed::<SymbolElem>() {
Some(elem) => Self::new(elem.text),
None => bail!("expected a symbol"),
},
}