From d27414937a3f9cebe1df6c9b8454dfbf63cf4cdb Mon Sep 17 00:00:00 2001 From: bluebear94 Date: Wed, 1 May 2024 04:07:45 -0400 Subject: [PATCH] Add support for Eastern Arabic numerals (#4036) --- crates/typst/src/model/numbering.rs | 41 ++++++++++++++++++++++++++++- tests/suite/model/numbering.typ | 4 +++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/crates/typst/src/model/numbering.rs b/crates/typst/src/model/numbering.rs index 10946ccc0..7cfdf98b6 100644 --- a/crates/typst/src/model/numbering.rs +++ b/crates/typst/src/model/numbering.rs @@ -3,6 +3,7 @@ use std::str::FromStr; use chinese_number::{ChineseCase, ChineseCountMethod, ChineseVariant, NumberToChinese}; use comemo::Tracked; use ecow::{eco_format, EcoString, EcoVec}; +use smallvec::{smallvec, SmallVec}; use crate::diag::SourceResult; use crate::engine::Engine; @@ -243,11 +244,17 @@ cast! { /// Different kinds of numberings. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum NumberingKind { + /// Arabic numerals (1, 2, 3, etc.). Arabic, + /// Latin letters (A, B, C, etc.). Items beyond Z use multiple symbols. Uses both cases. Letter, + /// Roman numerals (I, II, III, etc.). Uses both cases. Roman, + /// The symbols *, †, ‡, §, ¶, and ‖. Further items use multiple symbols. Symbol, + /// Hebrew numerals. Hebrew, + /// Simplified Chinese numerals. Uses standard numerals for lowercase and “banknote” numerals for uppercase. SimplifiedChinese, // TODO: Pick the numbering pattern based on languages choice. // As the `1st` numbering character of Chinese (Simplified) and @@ -255,13 +262,24 @@ pub enum NumberingKind { // if the context is Simplified or Traditional by only this // character. #[allow(unused)] + /// Traditional Chinese numerals. Uses standard numerals for lowercase and “banknote” numerals for uppercase. TraditionalChinese, + /// Hiragana in the gojūon order. Includes n but excludes wi and we. HiraganaAiueo, + /// Hiragana in the iroha order. Includes wi and we but excludes n. HiraganaIroha, + /// Katakana in the gojūon order. Includes n but excludes wi and we. KatakanaAiueo, + /// Katakana in the iroha order. Includes wi and we but excludes n. KatakanaIroha, + /// Korean jamo (ㄱ, ㄴ, ㄷ, etc.). KoreanJamo, + /// Korean syllables (가, 나, 다, etc.). KoreanSyllable, + /// Eastern Arabic numerals, used in some Arabic-speaking countries. + EasternArabic, + /// The variant of Eastern Arabic numerals used in Persian and Urdu. + EasternArabicPersian, } impl NumberingKind { @@ -280,6 +298,8 @@ impl NumberingKind { 'イ' => NumberingKind::KatakanaIroha, 'ㄱ' => NumberingKind::KoreanJamo, '가' => NumberingKind::KoreanSyllable, + '\u{0661}' => NumberingKind::EasternArabic, + '\u{06F1}' => NumberingKind::EasternArabicPersian, _ => return None, }) } @@ -300,6 +320,8 @@ impl NumberingKind { Self::KatakanaIroha => 'イ', Self::KoreanJamo => 'ㄱ', Self::KoreanSyllable => '가', + Self::EasternArabic => '\u{0661}', + Self::EasternArabicPersian => '\u{06F1}', } } @@ -507,6 +529,8 @@ impl NumberingKind { }, n, ), + Self::EasternArabic => decimal('\u{0660}', n), + Self::EasternArabicPersian => decimal('\u{06F0}', n), } } } @@ -542,7 +566,7 @@ fn zeroless( if n == 0 { return '-'.into(); } - let mut cs = vec![]; + let mut cs: SmallVec<[char; 8]> = smallvec![]; while n > 0 { n -= 1; cs.push(mk_digit(n % N_DIGITS)); @@ -550,3 +574,18 @@ fn zeroless( } cs.into_iter().rev().collect() } + +/// Stringify a number using a base-10 counting system with a zero digit. +/// +/// This function assumes that the digits occupy contiguous codepoints. +fn decimal(start: char, mut n: usize) -> EcoString { + if n == 0 { + return start.into(); + } + let mut cs: SmallVec<[char; 8]> = smallvec![]; + while n > 0 { + cs.push(char::from_u32((start as u32) + ((n % 10) as u32)).unwrap()); + n /= 10; + } + cs.into_iter().rev().collect() +} diff --git a/tests/suite/model/numbering.typ b/tests/suite/model/numbering.typ index c2de1e05e..124173e7c 100644 --- a/tests/suite/model/numbering.typ +++ b/tests/suite/model/numbering.typ @@ -98,6 +98,10 @@ [ for #i \ ] } +--- numbering-arabic-indic --- +#assert.eq(numbering("\u{0661}", 1475), "١٤٧٥") +#assert.eq(numbering("\u{06F1}", 1475), "۱۴۷۵") + --- numbering-negative --- // Error: 17-19 number must be at least zero #numbering("1", -1)