mirror of
https://github.com/typst/typst
synced 2025-05-13 12:36:23 +08:00
Add clusters
and codepoints
methods
This commit is contained in:
parent
585f656487
commit
dd5f07eb91
@ -197,18 +197,18 @@ $arrow.t.quad$
|
||||
# String
|
||||
A sequence of Unicode codepoints.
|
||||
|
||||
You can iterate over the characters (or rather, grapheme clusters) of the string
|
||||
using a [for loop]($scripting/#loops). Strings can be added with
|
||||
the `+` operator, [joined together]($scripting/#blocks) and
|
||||
multiplied with integers.
|
||||
You can iterate over the grapheme clusters of the string using a
|
||||
[for loop]($scripting/#loops). Grapheme clusters are basically characters but
|
||||
keep together things that belong together, e.g. multiple codepoints that
|
||||
together form a flag emoji. Strings can be added with the `+` operator,
|
||||
[joined together]($scripting/#blocks) and multiplied with integers.
|
||||
|
||||
Typst provides utility methods for string manipulation. Many of these methods
|
||||
(e.g., `split`, `trim` and `replace`) operate on _patterns:_ A pattern can be
|
||||
either a string or a [regular expression]($func/regex). This makes the methods
|
||||
quite versatile.
|
||||
|
||||
_Note:_ Currently all lengths and indices are expressed in terms of UTF-8 bytes.
|
||||
This _might_ change to grapheme clusters in the future.
|
||||
All lengths and indices are expressed in terms of UTF-8 bytes.
|
||||
|
||||
### Example
|
||||
```example
|
||||
@ -236,20 +236,20 @@ The length of the string in UTF-8 encoded bytes.
|
||||
- returns: integer
|
||||
|
||||
### first()
|
||||
Extract the first character (or rather, grapheme cluster) of the string.
|
||||
Extract the first grapheme cluster of the string.
|
||||
Fails with an error if the string is empty.
|
||||
|
||||
- returns: any
|
||||
|
||||
### last()
|
||||
Extract the last character (or rather, grapheme cluster) of the string.
|
||||
Extract the last grapheme cluster of the string.
|
||||
Fails with an error if the string is empty.
|
||||
|
||||
- returns: any
|
||||
|
||||
### at()
|
||||
Extract the first character (or rather, grapheme cluster) after the specified
|
||||
index. Fails with an error if the index is out of bounds.
|
||||
Extract the first grapheme cluster after the specified index. Fails with an
|
||||
error if the index is out of bounds.
|
||||
|
||||
- index: integer (positional, required)
|
||||
The byte index.
|
||||
@ -269,6 +269,16 @@ Fails with an error if the start or end index is out of bounds.
|
||||
as the `end` position. Mutually exclusive with `end`.
|
||||
- returns: string
|
||||
|
||||
### clusters()
|
||||
Returns the grapheme clusters of the string as array of substrings.
|
||||
|
||||
- returns: array
|
||||
|
||||
### codepoints()
|
||||
Returns the Unicode codepoints of the string as array of substrings.
|
||||
|
||||
- returns: array
|
||||
|
||||
### contains()
|
||||
Whether the string contains the specified pattern.
|
||||
|
||||
|
@ -37,6 +37,8 @@ pub fn call(
|
||||
}
|
||||
Value::Str(string.slice(start, end).at(span)?)
|
||||
}
|
||||
"clusters" => Value::Array(string.clusters()),
|
||||
"codepoints" => Value::Array(string.codepoints()),
|
||||
"contains" => Value::Bool(string.contains(args.expect("pattern")?)),
|
||||
"starts-with" => Value::Bool(string.starts_with(args.expect("pattern")?)),
|
||||
"ends-with" => Value::Bool(string.ends_with(args.expect("pattern")?)),
|
||||
@ -218,6 +220,8 @@ pub fn methods_on(type_name: &str) -> &[(&'static str, bool)] {
|
||||
"string" => &[
|
||||
("len", false),
|
||||
("at", true),
|
||||
("clusters", false),
|
||||
("codepoints", false),
|
||||
("contains", true),
|
||||
("ends-with", true),
|
||||
("find", true),
|
||||
|
@ -42,11 +42,6 @@ impl Str {
|
||||
self
|
||||
}
|
||||
|
||||
/// The grapheme clusters the string consists of.
|
||||
pub fn graphemes(&self) -> Array {
|
||||
self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
|
||||
}
|
||||
|
||||
/// Extract the first grapheme cluster.
|
||||
pub fn first(&self) -> StrResult<Self> {
|
||||
self.0
|
||||
@ -82,6 +77,16 @@ impl Str {
|
||||
Ok(self.0[start..end].into())
|
||||
}
|
||||
|
||||
/// The grapheme clusters the string consists of.
|
||||
pub fn clusters(&self) -> Array {
|
||||
self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
|
||||
}
|
||||
|
||||
/// The codepoints the string consists of.
|
||||
pub fn codepoints(&self) -> Array {
|
||||
self.chars().map(|c| Value::Str(c.into())).collect()
|
||||
}
|
||||
|
||||
/// Whether the given pattern exists in this string.
|
||||
pub fn contains(&self, pattern: StrPattern) -> bool {
|
||||
match pattern {
|
||||
@ -350,12 +355,10 @@ impl Debug for Str {
|
||||
f.write_char('"')?;
|
||||
for c in self.chars() {
|
||||
match c {
|
||||
'\\' => f.write_str(r"\\")?,
|
||||
'\0' => f.write_str("\\u{0}")?,
|
||||
'\'' => f.write_str("'")?,
|
||||
'"' => f.write_str(r#"\""#)?,
|
||||
'\n' => f.write_str(r"\n")?,
|
||||
'\r' => f.write_str(r"\r")?,
|
||||
'\t' => f.write_str(r"\t")?,
|
||||
_ => f.write_char(c)?,
|
||||
_ => Display::fmt(&c.escape_debug(), f)?,
|
||||
}
|
||||
}
|
||||
f.write_char('"')
|
||||
|
@ -45,6 +45,13 @@
|
||||
// Error: 2-21 string index -1 is not a character boundary
|
||||
#"🏳️🌈".slice(0, -1)
|
||||
|
||||
---
|
||||
// Test the `clusters` and `codepoints` methods.
|
||||
#test("abc".clusters(), ("a", "b", "c"))
|
||||
#test("abc".clusters(), ("a", "b", "c"))
|
||||
#test("🏳️🌈!".clusters(), ("🏳️🌈", "!"))
|
||||
#test("🏳️🌈!".codepoints(), ("🏳", "\u{fe0f}", "\u{200d}", "🌈", "!"))
|
||||
|
||||
---
|
||||
// Test the `contains` method.
|
||||
#test("abc".contains("b"), true)
|
||||
|
Loading…
x
Reference in New Issue
Block a user