initial refactor to use standard numbering fns

This commit is contained in:
Samuel Ireson 2025-03-31 15:23:00 +01:00
parent 012e14d40c
commit 29e1d7fd5d

View File

@ -9,7 +9,6 @@ use ecow::{eco_format, EcoString, EcoVec};
use crate::diag::SourceResult; use crate::diag::SourceResult;
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{cast, func, Context, Func, Str, Value}; use crate::foundations::{cast, func, Context, Func, Str, Value};
use crate::text::Case;
/// Applies a numbering to a sequence of numbers. /// Applies a numbering to a sequence of numbers.
/// ///
@ -382,38 +381,180 @@ impl NumberingKind {
pub fn apply(self, n: u64) -> EcoString { pub fn apply(self, n: u64) -> EcoString {
match self { match self {
Self::Arabic => eco_format!("{n}"), Self::Arabic => eco_format!("{n}"),
Self::LowerRoman => roman_numeral(n, Case::Lower), Self::LowerRoman => additive(
Self::UpperRoman => roman_numeral(n, Case::Upper), [
Self::LowerGreek => greek_numeral(n, Case::Lower), ("", 1000000),
Self::UpperGreek => greek_numeral(n, Case::Upper), ("", 500000),
Self::Symbol => { ("", 100000),
if n == 0 { ("", 50000),
return '-'.into(); ("", 10000),
} ("", 5000),
("i̅v̅", 4000),
const SYMBOLS: &[char] = &['*', '†', '‡', '§', '¶', '‖']; ("m", 1000),
let n_symbols = SYMBOLS.len() as u64; ("cm", 900),
let symbol = SYMBOLS[((n - 1) % n_symbols) as usize]; ("d", 500),
let amount = ((n - 1) / n_symbols) + 1; ("cd", 400),
std::iter::repeat_n(symbol, amount.try_into().unwrap()).collect() ("c", 100),
} ("xc", 90),
Self::Hebrew => hebrew_numeral(n), ("l", 50),
("xl", 40),
Self::LowerLatin => zeroless( ("x", 10),
("ix", 9),
("v", 5),
("iv", 4),
("i", 1),
],
n,
),
Self::UpperRoman => additive(
[
("", 1000000),
("", 500000),
("", 100000),
("", 50000),
("", 10000),
("", 5000),
("I̅V̅", 4000),
("M", 1000),
("CM", 900),
("D", 500),
("CD", 400),
("C", 100),
("XC", 90),
("L", 50),
("XL", 40),
("X", 10),
("IX", 9),
("V", 5),
("IV", 4),
("I", 1),
],
n,
),
Self::LowerGreek => additive(
[
("ϡ", 900),
("ω", 800),
("ψ", 700),
("χ", 600),
("φ", 500),
("υ", 400),
("τ", 300),
("σ", 200),
("ρ", 100),
("ϟ", 90),
("π", 80),
("ο", 70),
("ξ", 60),
("ν", 50),
("μ", 40),
("λ", 30),
("κ", 20),
("ι", 10),
("θ", 9),
("η", 8),
("ζ", 7),
("ϛ", 6),
("ε", 5),
("δ", 4),
("γ", 3),
("β", 2),
("α", 1),
("𐆊", 0),
],
n,
),
Self::UpperGreek => additive(
[
("Ϡ", 900),
("Ω", 800),
("Ψ", 700),
("Χ", 600),
("Φ", 500),
("Υ", 400),
("Τ", 300),
("Σ", 200),
("Ρ", 100),
("Ϟ", 90),
("Π", 80),
("Ο", 70),
("Ξ", 60),
("Ν", 50),
("Μ", 40),
("Λ", 30),
("Κ", 20),
("Ι", 10),
("Θ", 9),
("Η", 8),
("Ζ", 7),
("Ϛ", 6),
("Ε", 5),
("Δ", 4),
("Γ", 3),
("Β", 2),
("Α", 1),
("𐆊", 0),
],
n,
),
Self::Symbol => symbolic(['*', '†', '‡', '§', '¶', '‖'], n),
Self::Hebrew => additive(
[
("י׳", 10000),
("ט׳", 9000),
("ח׳", 8000),
("ז׳", 7000),
("ו׳", 6000),
("ה׳", 5000),
("ד׳", 4000),
("ג׳", 3000),
("ב׳", 2000),
("א׳", 1000),
("ת", 400),
("ש", 300),
("ר", 200),
("ק", 100),
("צ", 90),
("פ", 80),
("ע", 70),
("ס", 60),
("נ", 50),
("מ", 40),
("ל", 30),
("כ", 20),
("יט", 19),
("יח", 18),
("יז", 17),
("טז", 16),
("טו", 15),
("י", 10),
("ט", 9),
("ח", 8),
("ז", 7),
("ו", 6),
("ה", 5),
("ד", 4),
("ג", 3),
("ב", 2),
("א", 1),
],
n,
),
Self::LowerLatin => alphabetic(
[ [
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
], ],
n, n,
), ),
Self::UpperLatin => zeroless( Self::UpperLatin => alphabetic(
[ [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
], ],
n, n,
), ),
Self::HiraganaAiueo => zeroless( Self::HiraganaAiueo => alphabetic(
[ [
'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ',
'し', 'す', 'せ', 'そ', 'た', 'ち', 'つ', 'て', 'と', 'な', 'に', 'し', 'す', 'せ', 'そ', 'た', 'ち', 'つ', 'て', 'と', 'な', 'に',
@ -423,7 +564,7 @@ impl NumberingKind {
], ],
n, n,
), ),
Self::HiraganaIroha => zeroless( Self::HiraganaIroha => alphabetic(
[ [
'い', 'ろ', 'は', 'に', 'ほ', 'へ', 'と', 'ち', 'り', 'ぬ', 'る', 'い', 'ろ', 'は', 'に', 'ほ', 'へ', 'と', 'ち', 'り', 'ぬ', 'る',
'を', 'わ', 'か', 'よ', 'た', 'れ', 'そ', 'つ', 'ね', 'な', 'ら', 'を', 'わ', 'か', 'よ', 'た', 'れ', 'そ', 'つ', 'ね', 'な', 'ら',
@ -433,7 +574,7 @@ impl NumberingKind {
], ],
n, n,
), ),
Self::KatakanaAiueo => zeroless( Self::KatakanaAiueo => alphabetic(
[ [
'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'ア', 'イ', 'ウ', 'エ', 'オ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ',
'シ', 'ス', 'セ', 'ソ', 'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ', 'シ', 'ス', 'セ', 'ソ', 'タ', 'チ', 'ツ', 'テ', 'ト', 'ナ', 'ニ',
@ -443,7 +584,7 @@ impl NumberingKind {
], ],
n, n,
), ),
Self::KatakanaIroha => zeroless( Self::KatakanaIroha => alphabetic(
[ [
'イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'リ', 'ヌ', 'ル', 'イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'リ', 'ヌ', 'ル',
'ヲ', 'ワ', 'カ', 'ヨ', 'タ', 'レ', 'ソ', 'ツ', 'ネ', 'ナ', 'ラ', 'ヲ', 'ワ', 'カ', 'ヨ', 'タ', 'レ', 'ソ', 'ツ', 'ネ', 'ナ', 'ラ',
@ -453,21 +594,21 @@ impl NumberingKind {
], ],
n, n,
), ),
Self::KoreanJamo => zeroless( Self::KoreanJamo => alphabetic(
[ [
'ㄱ', 'ㄴ', 'ㄷ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅅ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㄱ', 'ㄴ', 'ㄷ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅅ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ',
'ㅌ', 'ㅍ', 'ㅎ', 'ㅌ', 'ㅍ', 'ㅎ',
], ],
n, n,
), ),
Self::KoreanSyllable => zeroless( Self::KoreanSyllable => alphabetic(
[ [
'가', '나', '다', '라', '마', '바', '사', '아', '자', '차', '카', '가', '나', '다', '라', '마', '바', '사', '아', '자', '차', '카',
'타', '파', '하', '타', '파', '하',
], ],
n, n,
), ),
Self::BengaliLetter => zeroless( Self::BengaliLetter => alphabetic(
[ [
'ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট', 'ঠ', 'ড', 'ঢ', 'ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট', 'ঠ', 'ড', 'ঢ',
'ণ', 'ত', 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'র', 'ল', 'ণ', 'ত', 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'র', 'ল',
@ -475,7 +616,7 @@ impl NumberingKind {
], ],
n, n,
), ),
Self::CircledNumber => zeroless( Self::CircledNumber => fixed(
[ [
'①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '⑪', '⑫', '⑬', '⑭', '①', '②', '③', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '⑪', '⑫', '⑬', '⑭',
'⑮', '⑯', '⑰', '⑱', '⑲', '⑳', '㉑', '㉒', '㉓', '㉔', '㉕', '㉖', '⑮', '⑯', '⑰', '⑱', '⑲', '⑳', '㉑', '㉒', '㉓', '㉔', '㉕', '㉖',
@ -486,7 +627,7 @@ impl NumberingKind {
n, n,
), ),
Self::DoubleCircledNumber => { Self::DoubleCircledNumber => {
zeroless(['⓵', '⓶', '⓷', '⓸', '⓹', '⓺', '⓻', '⓼', '⓽', '⓾'], n) fixed(['⓵', '⓶', '⓷', '⓸', '⓹', '⓺', '⓻', '⓼', '⓽', '⓾'], n)
} }
Self::LowerSimplifiedChinese => { Self::LowerSimplifiedChinese => {
@ -502,306 +643,93 @@ impl NumberingKind {
u64_to_chinese(ChineseVariant::Traditional, ChineseCase::Upper, n).into() u64_to_chinese(ChineseVariant::Traditional, ChineseCase::Upper, n).into()
} }
Self::EasternArabic => decimal('\u{0660}', n), Self::EasternArabic => {
Self::EasternArabicPersian => decimal('\u{06F0}', n), numeric(['٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨', '٩'], n)
Self::DevanagariNumber => decimal('\u{0966}', n),
Self::BengaliNumber => decimal('\u{09E6}', n),
}
}
}
/// Stringify an integer to a Hebrew number.
fn hebrew_numeral(mut n: u64) -> EcoString {
if n == 0 {
return '-'.into();
}
let mut fmt = EcoString::new();
'outer: for (name, value) in [
('ת', 400),
('ש', 300),
('ר', 200),
('ק', 100),
('צ', 90),
('פ', 80),
('ע', 70),
('ס', 60),
('נ', 50),
('מ', 40),
('ל', 30),
('כ', 20),
('י', 10),
('ט', 9),
('ח', 8),
('ז', 7),
('ו', 6),
('ה', 5),
('ד', 4),
('ג', 3),
('ב', 2),
('א', 1),
] {
while n >= value {
match n {
15 => fmt.push_str("ט״ו"),
16 => fmt.push_str("ט״ז"),
_ => {
let append_geresh = n == value && fmt.is_empty();
if n == value && !fmt.is_empty() {
fmt.push('״');
}
fmt.push(name);
if append_geresh {
fmt.push('׳');
}
n -= value;
continue;
}
} }
break 'outer; Self::EasternArabicPersian => {
} numeric(['۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹'], n)
} }
fmt Self::DevanagariNumber => {
} numeric(['', '१', '२', '३', '४', '५', '६', '७', '८', '९'], n)
}
/// Stringify an integer to a Roman numeral. Self::BengaliNumber => {
fn roman_numeral(mut n: u64, case: Case) -> EcoString { numeric(['', '১', '২', '৩', '', '৫', '৬', '', '৮', '৯'], n)
if n == 0 {
return match case {
Case::Lower => 'n'.into(),
Case::Upper => 'N'.into(),
};
}
// Adapted from Yann Villessuzanne's roman.rs under the
// Unlicense, at https://github.com/linfir/roman.rs/
let mut fmt = EcoString::new();
for &(name, value) in &[
("", 1000000),
("", 500000),
("", 100000),
("", 50000),
("", 10000),
("", 5000),
("I̅V̅", 4000),
("M", 1000),
("CM", 900),
("D", 500),
("CD", 400),
("C", 100),
("XC", 90),
("L", 50),
("XL", 40),
("X", 10),
("IX", 9),
("V", 5),
("IV", 4),
("I", 1),
] {
while n >= value {
n -= value;
for c in name.chars() {
match case {
Case::Lower => fmt.extend(c.to_lowercase()),
Case::Upper => fmt.push(c),
}
} }
} }
} }
fmt
} }
/// Stringify an integer to Greek numbers. fn additive<const N_DIGITS: usize>(
/// symbols: [(&str, u64); N_DIGITS],
/// Greek numbers use the Greek Alphabet to represent numbers; it is based on 10 mut n: u64,
/// (decimal). Here we implement the single digit M power representation from ) -> EcoString {
/// [The Greek Number Converter][convert] and also described in
/// [Greek Numbers][numbers].
///
/// [converter]: https://www.russellcottrell.com/greek/utilities/GreekNumberConverter.htm
/// [numbers]: https://mathshistory.st-andrews.ac.uk/HistTopics/Greek_numbers/
fn greek_numeral(n: u64, case: Case) -> EcoString {
let thousands = [
["͵α", "͵Α"],
["͵β", "͵Β"],
["͵γ", "͵Γ"],
["͵δ", "͵Δ"],
["͵ε", "͵Ε"],
["͵ϛ", "͵Ϛ"],
["͵ζ", "͵Ζ"],
["͵η", "͵Η"],
["͵θ", "͵Θ"],
];
let hundreds = [
["ρ", "Ρ"],
["σ", "Σ"],
["τ", "Τ"],
["υ", "Υ"],
["φ", "Φ"],
["χ", "Χ"],
["ψ", "Ψ"],
["ω", "Ω"],
["ϡ", "Ϡ"],
];
let tens = [
["ι", "Ι"],
["κ", "Κ"],
["λ", "Λ"],
["μ", "Μ"],
["ν", "Ν"],
["ξ", "Ξ"],
["ο", "Ο"],
["π", "Π"],
["ϙ", "Ϟ"],
];
let ones = [
["α", "Α"],
["β", "Β"],
["γ", "Γ"],
["δ", "Δ"],
["ε", "Ε"],
["ϛ", "Ϛ"],
["ζ", "Ζ"],
["η", "Η"],
["θ", "Θ"],
];
if n == 0 { if n == 0 {
// Greek Zero Sign for (symbol, weight) in symbols {
return '𐆊'.into(); if weight == 0 {
} return (*symbol).into();
}
let mut fmt = EcoString::new();
let case = match case {
Case::Lower => 0,
Case::Upper => 1,
};
// Extract a list of decimal digits from the number
let mut decimal_digits: Vec<usize> = Vec::new();
let mut n = n;
while n > 0 {
decimal_digits.push((n % 10) as usize);
n /= 10;
}
// Pad the digits with leading zeros to ensure we can form groups of 4
while decimal_digits.len() % 4 != 0 {
decimal_digits.push(0);
}
decimal_digits.reverse();
let mut m_power = decimal_digits.len() / 4;
// M are used to represent 10000, M_power = 2 means 10000^2 = 10000 0000
// The prefix of M is also made of Greek numerals but only be single digits, so it is 9 at max. This enables us
// to represent up to (10000)^(9 + 1) - 1 = 10^40 -1 (9,999,999,999,999,999,999,999,999,999,999,999,999,999)
let get_m_prefix = |m_power: usize| {
if m_power == 0 {
None
} else {
assert!(m_power <= 9);
// the prefix of M is a single digit lowercase
Some(ones[m_power - 1][0])
} }
}; return '0'.into();
}
let mut previous_has_number = false; let mut s = EcoString::new();
for chunk in decimal_digits.chunks_exact(4) { for (symbol, weight) in symbols {
// chunk must be exact 4 item if weight == 0 || weight > n {
assert_eq!(chunk.len(), 4);
m_power = m_power.saturating_sub(1);
// `th`ousan, `h`undred, `t`en and `o`ne
let (th, h, t, o) = (chunk[0], chunk[1], chunk[2], chunk[3]);
if th + h + t + o == 0 {
continue; continue;
} }
let reps = n / weight;
if previous_has_number { for _ in 0..reps {
fmt.push_str(", "); s.push_str(symbol);
} }
if let Some(m_prefix) = get_m_prefix(m_power) { n -= weight * reps;
fmt.push_str(m_prefix); if n == 0 {
fmt.push_str("Μ"); return s;
} }
if th != 0 {
let thousand_digit = thousands[th - 1][case];
fmt.push_str(thousand_digit);
}
if h != 0 {
let hundred_digit = hundreds[h - 1][case];
fmt.push_str(hundred_digit);
}
if t != 0 {
let ten_digit = tens[t - 1][case];
fmt.push_str(ten_digit);
}
if o != 0 {
let one_digit = ones[o - 1][case];
fmt.push_str(one_digit);
}
// if we do not have thousan, we need to append 'ʹ' at the end.
if th == 0 {
fmt.push_str("ʹ");
}
previous_has_number = true;
} }
fmt s
} }
/// Stringify a number using a base-N counting system with no zero digit. fn alphabetic<const N_DIGITS: usize>(symbols: [char; N_DIGITS], mut n: u64) -> EcoString {
/// let n_digits = N_DIGITS as u64;
/// This is best explained by example. Suppose our digits are 'A', 'B', and 'C'.
/// We would get the following:
///
/// ```text
/// 1 => "A"
/// 2 => "B"
/// 3 => "C"
/// 4 => "AA"
/// 5 => "AB"
/// 6 => "AC"
/// 7 => "BA"
/// 8 => "BB"
/// 9 => "BC"
/// 10 => "CA"
/// 11 => "CB"
/// 12 => "CC"
/// 13 => "AAA"
/// etc.
/// ```
///
/// You might be familiar with this scheme from the way spreadsheet software
/// tends to label its columns.
fn zeroless<const N_DIGITS: usize>(alphabet: [char; N_DIGITS], mut n: u64) -> EcoString {
if n == 0 { if n == 0 {
return '-'.into(); return '-'.into();
} }
let n_digits = N_DIGITS as u64; let mut s = EcoString::new();
let mut cs = EcoString::new(); while n != 0 {
while n > 0 {
n -= 1; n -= 1;
cs.push(alphabet[(n % n_digits) as usize]); s.push(symbols[(n % n_digits) as usize]);
n /= n_digits; n /= n_digits;
} }
cs.chars().rev().collect() s.chars().rev().collect()
} }
/// Stringify a number using a base-10 counting system with a zero digit. fn fixed<const N_DIGITS: usize>(symbols: [char; N_DIGITS], n: u64) -> EcoString {
/// let n_digits = N_DIGITS as u64;
/// This function assumes that the digits occupy contiguous codepoints. if n - 1 < n_digits {
fn decimal(start: char, mut n: u64) -> EcoString { return symbols[(n - 1) as usize].into();
if n == 0 {
return start.into();
} }
let mut cs = EcoString::new(); eco_format!("{n}")
while n > 0 { }
cs.push(char::from_u32((start as u32) + ((n % 10) as u32)).unwrap());
n /= 10; fn numeric<const N_DIGITS: usize>(symbols: [char; N_DIGITS], mut n: u64) -> EcoString {
} let n_digits = N_DIGITS as u64;
cs.chars().rev().collect() if n == 0 {
return symbols[0].into();
}
let mut s = EcoString::new();
while n != 0 {
s.push(symbols[(n % n_digits) as usize]);
n /= n_digits;
}
s.chars().rev().collect()
}
fn symbolic<const N_DIGITS: usize>(symbols: [char; N_DIGITS], n: u64) -> EcoString {
let n_digits = N_DIGITS as u64;
if n == 0 {
return '-'.into();
}
EcoString::from(symbols[((n - 1) % n_digits) as usize])
.repeat((n.div_ceil(n_digits)) as usize)
} }