Add support for Eastern Arabic numerals (#4036)

This commit is contained in:
bluebear94 2024-05-01 04:07:45 -04:00 committed by GitHub
parent 2c678e8228
commit d27414937a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 1 deletions

View File

@ -3,6 +3,7 @@ use std::str::FromStr;
use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese}; use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese};
use comemo::Tracked; use comemo::Tracked;
use ecow::{eco_format, EcoString, EcoVec}; use ecow::{eco_format, EcoString, EcoVec};
use smallvec::{smallvec, SmallVec};
use crate::diag::SourceResult; use crate::diag::SourceResult;
use crate::engine::Engine; use crate::engine::Engine;
@ -243,11 +244,17 @@ cast! {
/// Different kinds of numberings. /// Different kinds of numberings.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum NumberingKind { pub enum NumberingKind {
/// Arabic numerals (1, 2, 3, etc.).
Arabic, Arabic,
/// Latin letters (A, B, C, etc.). Items beyond Z use multiple symbols. Uses both cases.
Letter, Letter,
/// Roman numerals (I, II, III, etc.). Uses both cases.
Roman, Roman,
/// The symbols *, †, ‡, §, ¶, and ‖. Further items use multiple symbols.
Symbol, Symbol,
/// Hebrew numerals.
Hebrew, Hebrew,
/// Simplified Chinese numerals. Uses standard numerals for lowercase and “banknote” numerals for uppercase.
SimplifiedChinese, SimplifiedChinese,
// TODO: Pick the numbering pattern based on languages choice. // TODO: Pick the numbering pattern based on languages choice.
// As the `1st` numbering character of Chinese (Simplified) and // As the `1st` numbering character of Chinese (Simplified) and
@ -255,13 +262,24 @@ pub enum NumberingKind {
// if the context is Simplified or Traditional by only this // if the context is Simplified or Traditional by only this
// character. // character.
#[allow(unused)] #[allow(unused)]
/// Traditional Chinese numerals. Uses standard numerals for lowercase and “banknote” numerals for uppercase.
TraditionalChinese, TraditionalChinese,
/// Hiragana in the gojūon order. Includes n but excludes wi and we.
HiraganaAiueo, HiraganaAiueo,
/// Hiragana in the iroha order. Includes wi and we but excludes n.
HiraganaIroha, HiraganaIroha,
/// Katakana in the gojūon order. Includes n but excludes wi and we.
KatakanaAiueo, KatakanaAiueo,
/// Katakana in the iroha order. Includes wi and we but excludes n.
KatakanaIroha, KatakanaIroha,
/// Korean jamo (ㄱ, ㄴ, ㄷ, etc.).
KoreanJamo, KoreanJamo,
/// Korean syllables (가, 나, 다, etc.).
KoreanSyllable, KoreanSyllable,
/// Eastern Arabic numerals, used in some Arabic-speaking countries.
EasternArabic,
/// The variant of Eastern Arabic numerals used in Persian and Urdu.
EasternArabicPersian,
} }
impl NumberingKind { impl NumberingKind {
@ -280,6 +298,8 @@ impl NumberingKind {
'イ' => NumberingKind::KatakanaIroha, 'イ' => NumberingKind::KatakanaIroha,
'ㄱ' => NumberingKind::KoreanJamo, 'ㄱ' => NumberingKind::KoreanJamo,
'가' => NumberingKind::KoreanSyllable, '가' => NumberingKind::KoreanSyllable,
'\u{0661}' => NumberingKind::EasternArabic,
'\u{06F1}' => NumberingKind::EasternArabicPersian,
_ => return None, _ => return None,
}) })
} }
@ -300,6 +320,8 @@ impl NumberingKind {
Self::KatakanaIroha => 'イ', Self::KatakanaIroha => 'イ',
Self::KoreanJamo => 'ㄱ', Self::KoreanJamo => 'ㄱ',
Self::KoreanSyllable => '가', Self::KoreanSyllable => '가',
Self::EasternArabic => '\u{0661}',
Self::EasternArabicPersian => '\u{06F1}',
} }
} }
@ -507,6 +529,8 @@ impl NumberingKind {
}, },
n, n,
), ),
Self::EasternArabic => decimal('\u{0660}', n),
Self::EasternArabicPersian => decimal('\u{06F0}', n),
} }
} }
} }
@ -542,7 +566,7 @@ fn zeroless<const N_DIGITS: usize>(
if n == 0 { if n == 0 {
return '-'.into(); return '-'.into();
} }
let mut cs = vec![]; let mut cs: SmallVec<[char; 8]> = smallvec![];
while n > 0 { while n > 0 {
n -= 1; n -= 1;
cs.push(mk_digit(n % N_DIGITS)); cs.push(mk_digit(n % N_DIGITS));
@ -550,3 +574,18 @@ fn zeroless<const N_DIGITS: usize>(
} }
cs.into_iter().rev().collect() cs.into_iter().rev().collect()
} }
/// Stringify a number using a base-10 counting system with a zero digit.
///
/// This function assumes that the digits occupy contiguous codepoints.
fn decimal(start: char, mut n: usize) -> EcoString {
if n == 0 {
return start.into();
}
let mut cs: SmallVec<[char; 8]> = smallvec![];
while n > 0 {
cs.push(char::from_u32((start as u32) + ((n % 10) as u32)).unwrap());
n /= 10;
}
cs.into_iter().rev().collect()
}

View File

@ -98,6 +98,10 @@
[ for #i \ ] [ for #i \ ]
} }
--- numbering-arabic-indic ---
#assert.eq(numbering("\u{0661}", 1475), "١٤٧٥")
#assert.eq(numbering("\u{06F1}", 1475), "۱۴۷۵")
--- numbering-negative --- --- numbering-negative ---
// Error: 17-19 number must be at least zero // Error: 17-19 number must be at least zero
#numbering("1", -1) #numbering("1", -1)