mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Add #str.normalize(form)
(#5631)
Co-authored-by: +merlan #flirora <uruwi@protonmail.com> Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
parent
bad343748b
commit
d11ad80dee
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2995,6 +2995,7 @@ dependencies = [
|
|||||||
"typst-timing",
|
"typst-timing",
|
||||||
"typst-utils",
|
"typst-utils",
|
||||||
"unicode-math-class",
|
"unicode-math-class",
|
||||||
|
"unicode-normalization",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unscanny",
|
"unscanny",
|
||||||
"usvg",
|
"usvg",
|
||||||
|
@ -129,6 +129,7 @@ unicode-bidi = "0.3.18"
|
|||||||
unicode-ident = "1.0"
|
unicode-ident = "1.0"
|
||||||
unicode-math-class = "0.1"
|
unicode-math-class = "0.1"
|
||||||
unicode-script = "0.5"
|
unicode-script = "0.5"
|
||||||
|
unicode-normalization = "0.1.24"
|
||||||
unicode-segmentation = "1"
|
unicode-segmentation = "1"
|
||||||
unscanny = "0.1"
|
unscanny = "0.1"
|
||||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||||
|
@ -61,6 +61,7 @@ ttf-parser = { workspace = true }
|
|||||||
two-face = { workspace = true }
|
two-face = { workspace = true }
|
||||||
typed-arena = { workspace = true }
|
typed-arena = { workspace = true }
|
||||||
unicode-math-class = { workspace = true }
|
unicode-math-class = { workspace = true }
|
||||||
|
unicode-normalization = { workspace = true }
|
||||||
unicode-segmentation = { workspace = true }
|
unicode-segmentation = { workspace = true }
|
||||||
unscanny = { workspace = true }
|
unscanny = { workspace = true }
|
||||||
usvg = { workspace = true }
|
usvg = { workspace = true }
|
||||||
|
@ -7,12 +7,13 @@ use comemo::Tracked;
|
|||||||
use ecow::EcoString;
|
use ecow::EcoString;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use typst_syntax::{Span, Spanned};
|
use typst_syntax::{Span, Spanned};
|
||||||
|
use unicode_normalization::UnicodeNormalization;
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
use crate::diag::{bail, At, SourceResult, StrResult};
|
use crate::diag::{bail, At, SourceResult, StrResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{
|
use crate::foundations::{
|
||||||
cast, dict, func, repr, scope, ty, Array, Bytes, Context, Decimal, Dict, Func,
|
cast, dict, func, repr, scope, ty, Array, Bytes, Cast, Context, Decimal, Dict, Func,
|
||||||
IntoValue, Label, Repr, Type, Value, Version,
|
IntoValue, Label, Repr, Type, Value, Version,
|
||||||
};
|
};
|
||||||
use crate::layout::Alignment;
|
use crate::layout::Alignment;
|
||||||
@ -286,6 +287,30 @@ impl Str {
|
|||||||
Ok(c.into())
|
Ok(c.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Normalizes the string to the given Unicode normal form.
|
||||||
|
///
|
||||||
|
/// This is useful when manipulating strings containing Unicode combining
|
||||||
|
/// characters.
|
||||||
|
///
|
||||||
|
/// ```typ
|
||||||
|
/// #assert.eq("é".normalize(form: "nfd"), "e\u{0301}")
|
||||||
|
/// #assert.eq("ſ́".normalize(form: "nfkc"), "ś")
|
||||||
|
/// ```
|
||||||
|
#[func]
|
||||||
|
pub fn normalize(
|
||||||
|
&self,
|
||||||
|
#[named]
|
||||||
|
#[default(UnicodeNormalForm::Nfc)]
|
||||||
|
form: UnicodeNormalForm,
|
||||||
|
) -> Str {
|
||||||
|
match form {
|
||||||
|
UnicodeNormalForm::Nfc => self.nfc().collect(),
|
||||||
|
UnicodeNormalForm::Nfd => self.nfd().collect(),
|
||||||
|
UnicodeNormalForm::Nfkc => self.nfkc().collect(),
|
||||||
|
UnicodeNormalForm::Nfkd => self.nfkd().collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether the string contains the specified pattern.
|
/// Whether the string contains the specified pattern.
|
||||||
///
|
///
|
||||||
/// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
|
/// This method also has dedicated syntax: You can write `{"bc" in "abcd"}`
|
||||||
@ -788,6 +813,25 @@ cast! {
|
|||||||
v: Str => Self::Str(v),
|
v: Str => Self::Str(v),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A Unicode normalization form.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
|
||||||
|
pub enum UnicodeNormalForm {
|
||||||
|
/// Canonical composition where e.g. accented letters are turned into a
|
||||||
|
/// single Unicode codepoint.
|
||||||
|
#[string("nfc")]
|
||||||
|
Nfc,
|
||||||
|
/// Canonical decomposition where e.g. accented letters are split into a
|
||||||
|
/// separate base and diacritic.
|
||||||
|
#[string("nfd")]
|
||||||
|
Nfd,
|
||||||
|
/// Like NFC, but using the Unicode compatibility decompositions.
|
||||||
|
#[string("nfkc")]
|
||||||
|
Nfkc,
|
||||||
|
/// Like NFD, but using the Unicode compatibility decompositions.
|
||||||
|
#[string("nfkd")]
|
||||||
|
Nfkd,
|
||||||
|
}
|
||||||
|
|
||||||
/// Convert an item of std's `match_indices` to a dictionary.
|
/// Convert an item of std's `match_indices` to a dictionary.
|
||||||
fn match_to_dict((start, text): (usize, &str)) -> Dict {
|
fn match_to_dict((start, text): (usize, &str)) -> Dict {
|
||||||
dict! {
|
dict! {
|
||||||
|
@ -86,6 +86,13 @@
|
|||||||
// Error: 2-28 0x110000 is not a valid codepoint
|
// Error: 2-28 0x110000 is not a valid codepoint
|
||||||
#str.from-unicode(0x110000) // 0x10ffff is the highest valid code point
|
#str.from-unicode(0x110000) // 0x10ffff is the highest valid code point
|
||||||
|
|
||||||
|
--- str-normalize ---
|
||||||
|
// Test the `normalize` method.
|
||||||
|
#test("e\u{0301}".normalize(form: "nfc"), "é")
|
||||||
|
#test("é".normalize(form: "nfd"), "e\u{0301}")
|
||||||
|
#test("ſ\u{0301}".normalize(form: "nfkc"), "ś")
|
||||||
|
#test("ſ\u{0301}".normalize(form: "nfkd"), "s\u{0301}")
|
||||||
|
|
||||||
--- string-len ---
|
--- string-len ---
|
||||||
// Test the `len` method.
|
// Test the `len` method.
|
||||||
#test("Hello World!".len(), 12)
|
#test("Hello World!".len(), 12)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user