From b805c5f10080912cbfb1fd2fd2733c48a92ce4f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20B=C3=BCnnig?= Date: Tue, 30 May 2023 18:11:30 +0200 Subject: [PATCH] Unicode code point conversion (#1068) (#1132) --- library/src/compute/construct.rs | 57 ++++++++++++++++++++++++++++++++ tests/typ/compute/construct.typ | 21 ++++++++++++ 2 files changed, 78 insertions(+) diff --git a/library/src/compute/construct.rs b/library/src/compute/construct.rs index 169c53284..d11eb3980 100644 --- a/library/src/compute/construct.rs +++ b/library/src/compute/construct.rs @@ -461,6 +461,10 @@ cast_from_value! { /// - Floats are formatted in base 10 and never in exponential notation. /// - From labels the name is extracted. /// +/// If you wish to convert from and to Unicode code points, see +/// [`str.to-unicode`]($func/str.to-unicode) and +/// [`str.from-unicode`]($func/str.from-unicode). +/// /// ## Example { #example } /// ```example /// #str(10) \ @@ -474,6 +478,11 @@ cast_from_value! { /// Category: construct /// Returns: string #[func] +#[scope( + scope.define("to-unicode", to_unicode); + scope.define("from-unicode", from_unicode); + scope +)] pub fn str( /// The value that should be converted to a string. value: ToStr, @@ -553,6 +562,54 @@ fn int_to_base(mut n: i64, base: i64) -> EcoString { std::str::from_utf8(&digits[i..]).unwrap_or_default().into() } +/// Converts a character into its corresponding code point. +/// +/// ## Example +/// ```example +/// #str.to-unicode("a") \ +/// #"a\u{0300}".codepoints().map(str.to-unicode) +/// ``` +/// +/// Display: String To Unicode +/// Category: construct +/// Returns: int +#[func] +pub fn to_unicode( + /// The character that should be converted. + value: char, +) -> Value { + Value::Int(From::::from(value.into())) +} + +#[func] +/// Converts a unicode code point into its corresponding string. +/// +/// ```example +/// #str.from-unicode(97) +/// ``` +/// +/// Display: Sting From Unicode +/// Category: construct +/// Returns: string +pub fn from_unicode( + /// The code point that should be converted. + value: CodePoint, +) -> Value { + Value::Str(format_str!("{}", value.0)) +} + +/// The numeric representation of a single unicode code point. +struct CodePoint(char); + +cast_from_value! { + CodePoint, + v: i64 => { + Self(v.try_into().ok().and_then(|v: u32| v.try_into().ok()).ok_or_else( + || eco_format!("{:#x} is not a valid codepoint", v), + )?) + }, +} + /// Create a label from a string. /// /// Inserting a label into content attaches it to the closest previous element diff --git a/tests/typ/compute/construct.typ b/tests/typ/compute/construct.typ index aea15b53e..f094b6b2f 100644 --- a/tests/typ/compute/construct.typ +++ b/tests/typ/compute/construct.typ @@ -82,6 +82,27 @@ // Error: 18-19 base is only supported for integers #str(1.23, base: 2) +--- +// Test the unicode function. +#test(str.from-unicode(97), "a") +#test(str.to-unicode("a"), 97) + +--- +// Error: 19-22 expected integer, found content +#str.from-unicode([a]) + +--- +// Error: 17-21 expected exactly one character +#str.to-unicode("ab") + +--- +// Error: 19-21 0xffffffffffffffff is not a valid codepoint +#str.from-unicode(-1) // negative values are not valid + +--- +// Error: 19-27 0x110000 is not a valid codepoint +#str.from-unicode(0x110000) // 0x10ffff is the highest valid code point + --- #assert(range(2, 5) == (2, 3, 4))