diff --git a/docs/src/reference/types.md b/docs/src/reference/types.md index 0714d1f7f..9448feed1 100644 --- a/docs/src/reference/types.md +++ b/docs/src/reference/types.md @@ -361,8 +361,8 @@ string and returns the resulting string. - pattern: string or regex (positional, required) The pattern to search for. -- replacement: string (positional, required) - The string to replace the matches with. +- replacement: string or function (positional, required) + The string to replace the matches with or a function that is passed a match dictionary if a regex was used. - count: integer (named) If given, only the first `count` matches of the pattern are placed. - returns: string diff --git a/src/eval/methods.rs b/src/eval/methods.rs index 3ee4599c2..6cbb846ba 100644 --- a/src/eval/methods.rs +++ b/src/eval/methods.rs @@ -56,9 +56,9 @@ pub fn call( "matches" => Value::Array(string.matches(args.expect("pattern")?)), "replace" => { let pattern = args.expect("pattern")?; - let with = args.expect("replacement string")?; + let with = args.expect("string or function")?; let count = args.named("count")?; - Value::Str(string.replace(pattern, with, count)) + Value::Str(string.replace(vm, pattern, with, count)?) } "trim" => { let pattern = args.eat()?; diff --git a/src/eval/str.rs b/src/eval/str.rs index 22b5498b4..2988074b7 100644 --- a/src/eval/str.rs +++ b/src/eval/str.rs @@ -6,8 +6,9 @@ use std::ops::{Add, AddAssign, Deref}; use ecow::EcoString; use unicode_segmentation::UnicodeSegmentation; -use super::{cast_from_value, dict, Array, Dict, Value}; -use crate::diag::StrResult; +use super::{cast_from_value, dict, Array, Dict, Func, Value, Vm}; +use crate::diag::{At, SourceResult, StrResult}; +use crate::eval::Args; use crate::geom::GenAlign; /// Create a new [`Str`] from a format string. @@ -257,16 +258,71 @@ impl Str { } /// Replace at most `count` occurrences of the given pattern with a - /// replacement string (beginning from the start). - pub fn replace(&self, pattern: StrPattern, with: Self, count: Option) -> Self { - match pattern { - StrPattern::Str(pat) => match count { - Some(n) => self.0.replacen(pat.as_str(), &with, n).into(), - None => self.0.replace(pat.as_str(), &with).into(), - }, - StrPattern::Regex(re) => match count { - Some(n) => re.replacen(self, n, with.as_str()).into(), - None => re.replace(self, with.as_str()).into(), + /// replacement string or function (beginning from the start). If no count is given, + /// all occurrences are replaced. + pub fn replace( + &self, + vm: &mut Vm, + pattern: StrPattern, + with: Replacement, + count: Option, + ) -> SourceResult { + match with { + Replacement::Func(func) => { + // heuristic: assume the new string is about the same length as the current string + let mut new = String::with_capacity(self.as_str().len()); + let mut last_match = 0; + match &pattern { + StrPattern::Str(pat) => { + let matches = self + .0 + .match_indices(pat.as_str()) + .map(|(start, s)| (start, start + s.len(), s)) + .take(count.unwrap_or(usize::MAX)); + for (start, end, text) in matches { + // push everything until the match + new.push_str(&self.as_str()[last_match..start]); + let args = Args::new( + func.span(), + [match_to_dict((start, text)).into()], + ); + let res = + func.call_vm(vm, args)?.cast::().at(func.span())?; + new.push_str(res.as_str()); + last_match = end; + } + } + StrPattern::Regex(re) => { + let all_captures = + re.captures_iter(self).take(count.unwrap_or(usize::MAX)); + for caps in all_captures { + // `caps.get(0)` returns the entire match over all capture groups + let (start, end) = + caps.get(0).map(|c| (c.start(), c.end())).unwrap(); + // push everything until the match + new.push_str(&self.as_str()[last_match..start]); + let args = + Args::new(func.span(), [captures_to_dict(caps).into()]); + let res = + func.call_vm(vm, args)?.cast::().at(func.span())?; + new.push_str(res.as_str()); + last_match = end; + } + } + } + // push the remainder + new.push_str(&self.as_str()[last_match..]); + Ok(new.into()) + } + Replacement::Str(s) => match pattern { + StrPattern::Str(pat) => match count { + Some(n) => Ok(self.0.replacen(pat.as_str(), &s, n).into()), + None => Ok(self.0.replace(pat.as_str(), &s).into()), + }, + StrPattern::Regex(re) => match count { + Some(n) => Ok(re.replacen(self, n, s.as_str()).into()), + None => Ok(re.replace_all(self, s.as_str()).into()), + }, }, } } @@ -521,3 +577,18 @@ cast_from_value! { _ => Err("expected either `start` or `end`")?, }, } + +/// A replacement for a matched [`Str`] +pub enum Replacement { + /// A string a match is replaced with. + Str(Str), + /// Function of type Dict -> Str (see `captures_to_dict` or `match_to_dict`) + /// whose output is inserted for the match. + Func(Func), +} + +cast_from_value! { + Replacement, + text: Str => Self::Str(text), + func: Func => Self::Func(func) +} diff --git a/tests/typ/compiler/string.typ b/tests/typ/compiler/string.typ index 2f7ba9ec7..cba478f7b 100644 --- a/tests/typ/compiler/string.typ +++ b/tests/typ/compiler/string.typ @@ -111,7 +111,7 @@ #test(timesum("1:20, 2:10, 0:40"), "4:10") --- -// Test the `replace` method. +// Test the `replace` method with `Str` replacements. #test("ABC".replace("", "-"), "-A-B-C-") #test("Ok".replace("Ok", "Nope", count: 0), "Ok") #test("to add?".replace("", "How ", count: 1), "How to add?") @@ -126,6 +126,47 @@ #test("123".replace(regex("\d$"), "_"), "12_") #test("123".replace(regex("\d{1,2}$"), "__"), "1__") +--- +// Test the `replace` method with `Func` replacements. + +#test("abc".replace(regex("[a-z]"), m => { + str(m.start) + m.text + str(m.end) +}), "0a11b22c3") +#test("abcd, efgh".replace(regex("\w+"), m => { + upper(m.text) +}), "ABCD, EFGH") +#test("hello : world".replace(regex("^(.+)\s*(:)\s*(.+)$"), m => { + upper(m.captures.at(0)) + m.captures.at(1) + " " + upper(m.captures.at(2)) +}), "HELLO : WORLD") +#test("hello world, lorem ipsum".replace(regex("(\w+) (\w+)"), m => { + m.captures.at(1) + " " + m.captures.at(0) +}), "world hello, ipsum lorem") +#test("hello world, lorem ipsum".replace(regex("(\w+) (\w+)"), count: 1, m => { + m.captures.at(1) + " " + m.captures.at(0) +}), "world hello, lorem ipsum") +#test("123 456".replace(regex("[a-z]+"), "a"), "123 456") + +#test("abc".replace("", m => "-"), "-a-b-c-") +#test("abc".replace("", m => "-", count: 1), "-abc") +#test("123".replace("abc", m => ""), "123") +#test("123".replace("abc", m => "", count: 2), "123") +#test("a123b123c".replace("123", m => { + str(m.start) + "-" + str(m.end) +}), "a1-4b5-8c") +#test("halla warld".replace("a", m => { + if m.start == 1 { "e" } + else if m.start == 4 or m.start == 7 { "o" } +}), "hello world") +#test("aaa".replace("a", m => str(m.captures.len())), "000") + +--- +// Error: 23-24 expected string, found integer +#"123".replace("123", m => 1) + +--- +// Error: 23-32 expected string or function, found array +#"123".replace("123", (1, 2, 3)) + --- // Test the `trim` method. #let str = "Typst, LaTeX, Word, InDesign"