mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Add clusters
and codepoints
methods
This commit is contained in:
parent
585f656487
commit
dd5f07eb91
@ -197,18 +197,18 @@ $arrow.t.quad$
|
|||||||
# String
|
# String
|
||||||
A sequence of Unicode codepoints.
|
A sequence of Unicode codepoints.
|
||||||
|
|
||||||
You can iterate over the characters (or rather, grapheme clusters) of the string
|
You can iterate over the grapheme clusters of the string using a
|
||||||
using a [for loop]($scripting/#loops). Strings can be added with
|
[for loop]($scripting/#loops). Grapheme clusters are basically characters but
|
||||||
the `+` operator, [joined together]($scripting/#blocks) and
|
keep together things that belong together, e.g. multiple codepoints that
|
||||||
multiplied with integers.
|
together form a flag emoji. Strings can be added with the `+` operator,
|
||||||
|
[joined together]($scripting/#blocks) and multiplied with integers.
|
||||||
|
|
||||||
Typst provides utility methods for string manipulation. Many of these methods
|
Typst provides utility methods for string manipulation. Many of these methods
|
||||||
(e.g., `split`, `trim` and `replace`) operate on _patterns:_ A pattern can be
|
(e.g., `split`, `trim` and `replace`) operate on _patterns:_ A pattern can be
|
||||||
either a string or a [regular expression]($func/regex). This makes the methods
|
either a string or a [regular expression]($func/regex). This makes the methods
|
||||||
quite versatile.
|
quite versatile.
|
||||||
|
|
||||||
_Note:_ Currently all lengths and indices are expressed in terms of UTF-8 bytes.
|
All lengths and indices are expressed in terms of UTF-8 bytes.
|
||||||
This _might_ change to grapheme clusters in the future.
|
|
||||||
|
|
||||||
### Example
|
### Example
|
||||||
```example
|
```example
|
||||||
@ -236,20 +236,20 @@ The length of the string in UTF-8 encoded bytes.
|
|||||||
- returns: integer
|
- returns: integer
|
||||||
|
|
||||||
### first()
|
### first()
|
||||||
Extract the first character (or rather, grapheme cluster) of the string.
|
Extract the first grapheme cluster of the string.
|
||||||
Fails with an error if the string is empty.
|
Fails with an error if the string is empty.
|
||||||
|
|
||||||
- returns: any
|
- returns: any
|
||||||
|
|
||||||
### last()
|
### last()
|
||||||
Extract the last character (or rather, grapheme cluster) of the string.
|
Extract the last grapheme cluster of the string.
|
||||||
Fails with an error if the string is empty.
|
Fails with an error if the string is empty.
|
||||||
|
|
||||||
- returns: any
|
- returns: any
|
||||||
|
|
||||||
### at()
|
### at()
|
||||||
Extract the first character (or rather, grapheme cluster) after the specified
|
Extract the first grapheme cluster after the specified index. Fails with an
|
||||||
index. Fails with an error if the index is out of bounds.
|
error if the index is out of bounds.
|
||||||
|
|
||||||
- index: integer (positional, required)
|
- index: integer (positional, required)
|
||||||
The byte index.
|
The byte index.
|
||||||
@ -269,6 +269,16 @@ Fails with an error if the start or end index is out of bounds.
|
|||||||
as the `end` position. Mutually exclusive with `end`.
|
as the `end` position. Mutually exclusive with `end`.
|
||||||
- returns: string
|
- returns: string
|
||||||
|
|
||||||
|
### clusters()
|
||||||
|
Returns the grapheme clusters of the string as array of substrings.
|
||||||
|
|
||||||
|
- returns: array
|
||||||
|
|
||||||
|
### codepoints()
|
||||||
|
Returns the Unicode codepoints of the string as array of substrings.
|
||||||
|
|
||||||
|
- returns: array
|
||||||
|
|
||||||
### contains()
|
### contains()
|
||||||
Whether the string contains the specified pattern.
|
Whether the string contains the specified pattern.
|
||||||
|
|
||||||
|
@ -37,6 +37,8 @@ pub fn call(
|
|||||||
}
|
}
|
||||||
Value::Str(string.slice(start, end).at(span)?)
|
Value::Str(string.slice(start, end).at(span)?)
|
||||||
}
|
}
|
||||||
|
"clusters" => Value::Array(string.clusters()),
|
||||||
|
"codepoints" => Value::Array(string.codepoints()),
|
||||||
"contains" => Value::Bool(string.contains(args.expect("pattern")?)),
|
"contains" => Value::Bool(string.contains(args.expect("pattern")?)),
|
||||||
"starts-with" => Value::Bool(string.starts_with(args.expect("pattern")?)),
|
"starts-with" => Value::Bool(string.starts_with(args.expect("pattern")?)),
|
||||||
"ends-with" => Value::Bool(string.ends_with(args.expect("pattern")?)),
|
"ends-with" => Value::Bool(string.ends_with(args.expect("pattern")?)),
|
||||||
@ -218,6 +220,8 @@ pub fn methods_on(type_name: &str) -> &[(&'static str, bool)] {
|
|||||||
"string" => &[
|
"string" => &[
|
||||||
("len", false),
|
("len", false),
|
||||||
("at", true),
|
("at", true),
|
||||||
|
("clusters", false),
|
||||||
|
("codepoints", false),
|
||||||
("contains", true),
|
("contains", true),
|
||||||
("ends-with", true),
|
("ends-with", true),
|
||||||
("find", true),
|
("find", true),
|
||||||
|
@ -42,11 +42,6 @@ impl Str {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The grapheme clusters the string consists of.
|
|
||||||
pub fn graphemes(&self) -> Array {
|
|
||||||
self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract the first grapheme cluster.
|
/// Extract the first grapheme cluster.
|
||||||
pub fn first(&self) -> StrResult<Self> {
|
pub fn first(&self) -> StrResult<Self> {
|
||||||
self.0
|
self.0
|
||||||
@ -82,6 +77,16 @@ impl Str {
|
|||||||
Ok(self.0[start..end].into())
|
Ok(self.0[start..end].into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The grapheme clusters the string consists of.
|
||||||
|
pub fn clusters(&self) -> Array {
|
||||||
|
self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The codepoints the string consists of.
|
||||||
|
pub fn codepoints(&self) -> Array {
|
||||||
|
self.chars().map(|c| Value::Str(c.into())).collect()
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether the given pattern exists in this string.
|
/// Whether the given pattern exists in this string.
|
||||||
pub fn contains(&self, pattern: StrPattern) -> bool {
|
pub fn contains(&self, pattern: StrPattern) -> bool {
|
||||||
match pattern {
|
match pattern {
|
||||||
@ -350,12 +355,10 @@ impl Debug for Str {
|
|||||||
f.write_char('"')?;
|
f.write_char('"')?;
|
||||||
for c in self.chars() {
|
for c in self.chars() {
|
||||||
match c {
|
match c {
|
||||||
'\\' => f.write_str(r"\\")?,
|
'\0' => f.write_str("\\u{0}")?,
|
||||||
|
'\'' => f.write_str("'")?,
|
||||||
'"' => f.write_str(r#"\""#)?,
|
'"' => f.write_str(r#"\""#)?,
|
||||||
'\n' => f.write_str(r"\n")?,
|
_ => Display::fmt(&c.escape_debug(), f)?,
|
||||||
'\r' => f.write_str(r"\r")?,
|
|
||||||
'\t' => f.write_str(r"\t")?,
|
|
||||||
_ => f.write_char(c)?,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
f.write_char('"')
|
f.write_char('"')
|
||||||
|
@ -45,6 +45,13 @@
|
|||||||
// Error: 2-21 string index -1 is not a character boundary
|
// Error: 2-21 string index -1 is not a character boundary
|
||||||
#"🏳️🌈".slice(0, -1)
|
#"🏳️🌈".slice(0, -1)
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test the `clusters` and `codepoints` methods.
|
||||||
|
#test("abc".clusters(), ("a", "b", "c"))
|
||||||
|
#test("abc".clusters(), ("a", "b", "c"))
|
||||||
|
#test("🏳️🌈!".clusters(), ("🏳️🌈", "!"))
|
||||||
|
#test("🏳️🌈!".codepoints(), ("🏳", "\u{fe0f}", "\u{200d}", "🌈", "!"))
|
||||||
|
|
||||||
---
|
---
|
||||||
// Test the `contains` method.
|
// Test the `contains` method.
|
||||||
#test("abc".contains("b"), true)
|
#test("abc".contains("b"), true)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user