From 23313b0af0e9a70f313863db6bb1f5f5beca7de4 Mon Sep 17 00:00:00 2001 From: LU Jialin Date: Fri, 1 Nov 2024 02:20:10 -0700 Subject: [PATCH] Support Greek Numbering (#4273) Co-authored-by: Laurenz --- crates/typst-library/src/model/numbering.rs | 167 +++++++++++++++++++- tests/suite/model/numbering.typ | 42 +++++ 2 files changed, 202 insertions(+), 7 deletions(-) diff --git a/crates/typst-library/src/model/numbering.rs b/crates/typst-library/src/model/numbering.rs index f0aa06e5a..b7f27bb9b 100644 --- a/crates/typst-library/src/model/numbering.rs +++ b/crates/typst-library/src/model/numbering.rs @@ -59,9 +59,9 @@ pub fn numbering( context: Tracked, /// Defines how the numbering works. /// - /// **Counting symbols** are `1`, `a`, `A`, `i`, `I`, `一`, `壹`, `あ`, `い`, - /// `ア`, `イ`, `א`, `가`, `ㄱ`, `*`, `①`, and `⓵`. They are replaced by the - /// number in the sequence, preserving the original case. + /// **Counting symbols** are `1`, `a`, `A`, `i`, `I`, `α`, `Α`, `一`, `壹`, + /// `あ`, `い`, `ア`, `イ`, `א`, `가`, `ㄱ`, `*`, `①`, and `⓵`. They are + /// replaced by the number in the sequence, preserving the original case. /// /// The `*` character means that symbols should be used to count, in the /// order of `*`, `†`, `‡`, `§`, `¶`, `‖`. If there are more than six @@ -141,9 +141,8 @@ cast! { /// How to turn a number into text. /// -/// A pattern consists of a prefix, followed by one of `1`, `a`, `A`, `i`, `I`, -/// `一`, `壹`, `あ`, `い`, `ア`, `イ`, `א`, `가`, `ㄱ`, `*`, `①`, or `⓵`, and then a -/// suffix. +/// A pattern consists of a prefix, followed by one of the counter symbols (see +/// [`numbering()`] docs), and then a suffix. /// /// Examples of valid patterns: /// - `1)` @@ -263,7 +262,12 @@ pub enum NumberingKind { LowerRoman, /// Uppercase Roman numerals (I, II, III, etc.). UpperRoman, - /// Paragraph/note-like symbols: *, †, ‡, §, ¶, and ‖. Further items use repeated symbols. + /// Lowercase Greek numerals (Α, Β, Γ, etc.). + LowerGreek, + /// Uppercase Greek numerals (α, β, γ, etc.). + UpperGreek, + /// Paragraph/note-like symbols: *, †, ‡, §, ¶, and ‖. Further items use + /// repeated symbols. Symbol, /// Hebrew numerals, including Geresh/Gershayim. Hebrew, @@ -322,6 +326,8 @@ impl NumberingKind { 'A' => NumberingKind::UpperLatin, 'i' => NumberingKind::LowerRoman, 'I' => NumberingKind::UpperRoman, + 'α' => NumberingKind::LowerGreek, + 'Α' => NumberingKind::UpperGreek, '*' => NumberingKind::Symbol, 'א' => NumberingKind::Hebrew, '一' => NumberingKind::LowerSimplifiedChinese, @@ -351,6 +357,8 @@ impl NumberingKind { Self::UpperLatin => 'A', Self::LowerRoman => 'i', Self::UpperRoman => 'I', + Self::LowerGreek => 'α', + Self::UpperGreek => 'Α', Self::Symbol => '*', Self::Hebrew => 'א', Self::LowerSimplifiedChinese | Self::LowerTraditionalChinese => '一', @@ -377,6 +385,8 @@ impl NumberingKind { Self::Arabic => eco_format!("{n}"), Self::LowerRoman => roman_numeral(n, Case::Lower), Self::UpperRoman => roman_numeral(n, Case::Upper), + Self::LowerGreek => greek_numeral(n, Case::Lower), + Self::UpperGreek => greek_numeral(n, Case::Upper), Self::Symbol => { if n == 0 { return '-'.into(); @@ -502,6 +512,7 @@ impl NumberingKind { } } +/// Stringify an integer to a Hebrew number. fn hebrew_numeral(mut n: usize) -> EcoString { if n == 0 { return '-'.into(); @@ -555,6 +566,7 @@ fn hebrew_numeral(mut n: usize) -> EcoString { fmt } +/// Stringify an integer to a Roman numeral. fn roman_numeral(mut n: usize, case: Case) -> EcoString { if n == 0 { return match case { @@ -602,6 +614,147 @@ fn roman_numeral(mut n: usize, case: Case) -> EcoString { fmt } +/// Stringify an integer to Greek numbers. +/// +/// Greek numbers use the Greek Alphabet to represent numbers; it is based on 10 +/// (decimal). Here we implement the single digit M power representation from +/// [The Greek Number Converter][convert] and also described in +/// [Greek Numbers][numbers]. +/// +/// [converter]: https://www.russellcottrell.com/greek/utilities/GreekNumberConverter.htm +/// [numbers]: https://mathshistory.st-andrews.ac.uk/HistTopics/Greek_numbers/ +fn greek_numeral(n: usize, case: Case) -> EcoString { + let thousands = [ + ["͵α", "͵Α"], + ["͵β", "͵Β"], + ["͵γ", "͵Γ"], + ["͵δ", "͵Δ"], + ["͵ε", "͵Ε"], + ["͵ϛ", "͵Ϛ"], + ["͵ζ", "͵Ζ"], + ["͵η", "͵Η"], + ["͵θ", "͵Θ"], + ]; + let hundreds = [ + ["ρ", "Ρ"], + ["σ", "Σ"], + ["τ", "Τ"], + ["υ", "Υ"], + ["φ", "Φ"], + ["χ", "Χ"], + ["ψ", "Ψ"], + ["ω", "Ω"], + ["ϡ", "Ϡ"], + ]; + let tens = [ + ["ι", "Ι"], + ["κ", "Κ"], + ["λ", "Λ"], + ["μ", "Μ"], + ["ν", "Ν"], + ["ξ", "Ξ"], + ["ο", "Ο"], + ["π", "Π"], + ["ϙ", "Ϟ"], + ]; + let ones = [ + ["α", "Α"], + ["β", "Β"], + ["γ", "Γ"], + ["δ", "Δ"], + ["ε", "Ε"], + ["ϛ", "Ϛ"], + ["ζ", "Ζ"], + ["η", "Η"], + ["θ", "Θ"], + ]; + + if n == 0 { + // Greek Zero Sign + return '𐆊'.into(); + } + + let mut fmt = EcoString::new(); + let case = match case { + Case::Lower => 0, + Case::Upper => 1, + }; + + // Extract a list of decimal digits from the number + let mut decimal_digits: Vec = Vec::new(); + let mut n = n; + while n > 0 { + decimal_digits.push(n % 10); + n /= 10; + } + + // Pad the digits with leading zeros to ensure we can form groups of 4 + while decimal_digits.len() % 4 != 0 { + decimal_digits.push(0); + } + decimal_digits.reverse(); + + let mut m_power = decimal_digits.len() / 4; + + // M are used to represent 10000, M_power = 2 means 10000^2 = 10000 0000 + // The prefix of M is also made of Greek numerals but only be single digits, so it is 9 at max. This enables us + // to represent up to (10000)^(9 + 1) - 1 = 10^40 -1 (9,999,999,999,999,999,999,999,999,999,999,999,999,999) + let get_m_prefix = |m_power: usize| { + if m_power == 0 { + None + } else { + assert!(m_power <= 9); + // the prefix of M is a single digit lowercase + Some(ones[m_power - 1][0]) + } + }; + + let mut previous_has_number = false; + for chunk in decimal_digits.chunks_exact(4) { + // chunk must be exact 4 item + assert_eq!(chunk.len(), 4); + + m_power = m_power.saturating_sub(1); + + // `th`ousan, `h`undred, `t`en and `o`ne + let (th, h, t, o) = (chunk[0], chunk[1], chunk[2], chunk[3]); + if th + h + t + o == 0 { + continue; + } + + if previous_has_number { + fmt.push_str(", "); + } + + if let Some(m_prefix) = get_m_prefix(m_power) { + fmt.push_str(m_prefix); + fmt.push_str("Μ"); + } + if th != 0 { + let thousand_digit = thousands[th - 1][case]; + fmt.push_str(thousand_digit); + } + if h != 0 { + let hundred_digit = hundreds[h - 1][case]; + fmt.push_str(hundred_digit); + } + if t != 0 { + let ten_digit = tens[t - 1][case]; + fmt.push_str(ten_digit); + } + if o != 0 { + let one_digit = ones[o - 1][case]; + fmt.push_str(one_digit); + } + // if we do not have thousan, we need to append 'ʹ' at the end. + if th == 0 { + fmt.push_str("ʹ"); + } + previous_has_number = true; + } + fmt +} + /// Stringify a number using a base-N counting system with no zero digit. /// /// This is best explained by example. Suppose our digits are 'A', 'B', and 'C'. diff --git a/tests/suite/model/numbering.typ b/tests/suite/model/numbering.typ index 241ef3ea7..6af989ff1 100644 --- a/tests/suite/model/numbering.typ +++ b/tests/suite/model/numbering.typ @@ -16,6 +16,48 @@ // Arabic. #t(pat: "1", "0", "1", "2", "3", "4", "5", "6", 107, "107", "108") +// Greek. +#t( + pat: "α", + "𐆊", "αʹ", "βʹ", "γʹ", "δʹ", "εʹ", "ϛʹ", "ζʹ", "ηʹ", "θʹ", "ιʹ", + "ιαʹ", "ιβʹ", "ιγʹ", "ιδʹ", "ιεʹ", "ιϛʹ", "ιζʹ", "ιηʹ", "ιθʹ", "κʹ", + 241, "σμαʹ", + 999, "ϡϙθʹ", + 1005, "͵αε", + 1999, "͵αϡϙθ", + 2999, "͵βϡϙθ", + 3000, "͵γ", + 3398, "͵γτϙη", + 4444, "͵δυμδ", + 5683, "͵εχπγ", + 9184, "͵θρπδ", + 9999, "͵θϡϙθ", + 20000, "αΜβʹ", + 20001, "αΜβʹ, αʹ", + 97554, "αΜθʹ, ͵ζφνδ", + 99999, "αΜθʹ, ͵θϡϙθ", + 1000000, "αΜρʹ", + 1000001, "αΜρʹ, αʹ", + 1999999, "αΜρϙθʹ, ͵θϡϙθ", + 2345678, "αΜσλδʹ, ͵εχοη", + 9999999, "αΜϡϙθʹ, ͵θϡϙθ", + 10000000, "αΜ͵α", + 90000001, "αΜ͵θ, αʹ", + 100000000, "βΜαʹ", + 1000000000, "βΜιʹ", + 2000000000, "βΜκʹ", + 2000000001, "βΜκʹ, αʹ", + 2000010001, "βΜκʹ, αΜαʹ, αʹ", + 2056839184, "βΜκʹ, αΜ͵εχπγ, ͵θρπδ", + 12312398676, "βΜρκγʹ, αΜ͵ασλθ, ͵ηχοϛ", +) +#t( + pat: sym.Alpha, + "𐆊", "Αʹ", "Βʹ", "Γʹ", "Δʹ", "Εʹ", "Ϛʹ", "Ζʹ", "Ηʹ", "Θʹ", "Ιʹ", + "ΙΑʹ", "ΙΒʹ", "ΙΓʹ", "ΙΔʹ", "ΙΕʹ", "ΙϚʹ", "ΙΖʹ", "ΙΗʹ", "ΙΘʹ", "Κʹ", + 241, "ΣΜΑʹ", +) + // Symbols. #t(pat: "*", "-", "*", "†", "‡", "§", "¶", "‖", "**")