From fc574b39454aec77cf2c33270566225917c7c823 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 26 Jul 2022 23:24:50 +0200 Subject: [PATCH] New `Str` type with methods --- src/eval/args.rs | 7 +- src/eval/array.rs | 2 +- src/eval/cast.rs | 7 + src/eval/dict.rs | 34 +-- src/eval/func.rs | 4 +- src/eval/methods.rs | 90 ++++--- src/eval/mod.rs | 18 +- src/eval/ops.rs | 27 +- src/eval/str.rs | 451 ++++++++++++++++++++++++++++++++-- src/eval/value.rs | 24 +- src/library/layout/page.rs | 2 +- src/library/math/mod.rs | 2 +- src/library/text/link.rs | 4 +- src/library/text/raw.rs | 4 +- src/library/utility/string.rs | 6 +- src/util/eco.rs | 50 ++-- tests/typ/utility/regex.typ | 10 - tests/typ/utility/string.typ | 140 +++++++++-- 18 files changed, 705 insertions(+), 177 deletions(-) delete mode 100644 tests/typ/utility/regex.typ diff --git a/src/eval/args.rs b/src/eval/args.rs index 4d280ff7f..8d62b675a 100644 --- a/src/eval/args.rs +++ b/src/eval/args.rs @@ -1,9 +1,8 @@ use std::fmt::{self, Debug, Formatter, Write}; -use super::{Array, Cast, Dict, Value}; +use super::{Array, Cast, Dict, Str, Value}; use crate::diag::{At, TypResult}; use crate::syntax::{Span, Spanned}; -use crate::util::EcoString; /// Evaluated arguments to a function. #[derive(Clone, PartialEq, Hash)] @@ -20,7 +19,7 @@ pub struct Arg { /// The span of the whole argument. pub span: Span, /// The name of the argument (`None` for positional arguments). - pub name: Option, + pub name: Option, /// The value of the argument. pub value: Spanned, } @@ -177,7 +176,7 @@ impl Args { } /// Reinterpret these arguments as actually being a dictionary key. - pub fn into_key(self) -> TypResult { + pub fn into_key(self) -> TypResult { self.into_castable("key") } diff --git a/src/eval/array.rs b/src/eval/array.rs index e54e7e19d..43261a22e 100644 --- a/src/eval/array.rs +++ b/src/eval/array.rs @@ -20,7 +20,7 @@ macro_rules! array { }; } -/// An array of values with clone-on-write value semantics. +/// A reference counted array with value semantics. #[derive(Default, Clone, PartialEq, Hash)] pub struct Array(Arc>); diff --git a/src/eval/cast.rs b/src/eval/cast.rs index 063d378a4..b27e3edc1 100644 --- a/src/eval/cast.rs +++ b/src/eval/cast.rs @@ -5,6 +5,7 @@ use crate::diag::{with_alternative, StrResult}; use crate::geom::{Corners, Dir, Paint, Sides}; use crate::model::{Content, Group, Layout, LayoutNode, Pattern}; use crate::syntax::Spanned; +use crate::util::EcoString; /// Cast from a value to a specific type. pub trait Cast: Sized { @@ -162,6 +163,12 @@ castable! { Value::Color(color) => Paint::Solid(color), } +castable! { + EcoString, + Expected: "string", + Value::Str(str) => str.into(), +} + castable! { String, Expected: "string", diff --git a/src/eval/dict.rs b/src/eval/dict.rs index 654c90eb5..837933b1a 100644 --- a/src/eval/dict.rs +++ b/src/eval/dict.rs @@ -3,11 +3,11 @@ use std::fmt::{self, Debug, Formatter, Write}; use std::ops::{Add, AddAssign}; use std::sync::Arc; -use super::{Args, Array, Func, Machine, Value}; +use super::{Args, Array, Func, Machine, Str, Value}; use crate::diag::{StrResult, TypResult}; use crate::parse::is_ident; use crate::syntax::Spanned; -use crate::util::{ArcExt, EcoString}; +use crate::util::ArcExt; /// Create a new [`Dict`] from key-value pairs. #[allow(unused_macros)] @@ -20,9 +20,9 @@ macro_rules! dict { }}; } -/// A dictionary from strings to values with clone-on-write value semantics. +/// A reference-counted dictionary with value semantics. #[derive(Default, Clone, PartialEq, Hash)] -pub struct Dict(Arc>); +pub struct Dict(Arc>); impl Dict { /// Create a new, empty dictionary. @@ -31,7 +31,7 @@ impl Dict { } /// Create a new dictionary from a mapping of strings to values. - pub fn from_map(map: BTreeMap) -> Self { + pub fn from_map(map: BTreeMap) -> Self { Self(Arc::new(map)) } @@ -54,7 +54,7 @@ impl Dict { /// /// This inserts the key with [`None`](Value::None) as the value if not /// present so far. - pub fn get_mut(&mut self, key: EcoString) -> &mut Value { + pub fn get_mut(&mut self, key: Str) -> &mut Value { Arc::make_mut(&mut self.0).entry(key).or_default() } @@ -64,7 +64,7 @@ impl Dict { } /// Insert a mapping from the given `key` to the given `value`. - pub fn insert(&mut self, key: EcoString, value: Value) { + pub fn insert(&mut self, key: Str, value: Value) { Arc::make_mut(&mut self.0).insert(key, value); } @@ -112,7 +112,7 @@ impl Dict { } /// Iterate over pairs of references to the contained keys and values. - pub fn iter(&self) -> std::collections::btree_map::Iter { + pub fn iter(&self) -> std::collections::btree_map::Iter { self.0.iter() } } @@ -120,7 +120,7 @@ impl Dict { /// The missing key access error message. #[cold] fn missing_key(key: &str) -> String { - format!("dictionary does not contain key {:?}", EcoString::from(key)) + format!("dictionary does not contain key {:?}", Str::from(key)) } impl Debug for Dict { @@ -163,21 +163,21 @@ impl AddAssign for Dict { } } -impl Extend<(EcoString, Value)> for Dict { - fn extend>(&mut self, iter: T) { +impl Extend<(Str, Value)> for Dict { + fn extend>(&mut self, iter: T) { Arc::make_mut(&mut self.0).extend(iter); } } -impl FromIterator<(EcoString, Value)> for Dict { - fn from_iter>(iter: T) -> Self { +impl FromIterator<(Str, Value)> for Dict { + fn from_iter>(iter: T) -> Self { Self(Arc::new(iter.into_iter().collect())) } } impl IntoIterator for Dict { - type Item = (EcoString, Value); - type IntoIter = std::collections::btree_map::IntoIter; + type Item = (Str, Value); + type IntoIter = std::collections::btree_map::IntoIter; fn into_iter(self) -> Self::IntoIter { Arc::take(self.0).into_iter() @@ -185,8 +185,8 @@ impl IntoIterator for Dict { } impl<'a> IntoIterator for &'a Dict { - type Item = (&'a EcoString, &'a Value); - type IntoIter = std::collections::btree_map::Iter<'a, EcoString, Value>; + type Item = (&'a Str, &'a Value); + type IntoIter = std::collections::btree_map::Iter<'a, Str, Value>; fn into_iter(self) -> Self::IntoIter { self.iter() diff --git a/src/eval/func.rs b/src/eval/func.rs index 7ab03b6a4..bd312d664 100644 --- a/src/eval/func.rs +++ b/src/eval/func.rs @@ -206,7 +206,7 @@ impl Closure { // Parse the arguments according to the parameter list. for (param, default) in &self.params { - scopes.top.define(param, match default { + scopes.top.define(param.clone(), match default { None => args.expect::(param)?, Some(default) => { args.named::(param)?.unwrap_or_else(|| default.clone()) @@ -216,7 +216,7 @@ impl Closure { // Put the remaining arguments into the sink. if let Some(sink) = &self.sink { - scopes.top.define(sink, args.take()); + scopes.top.define(sink.clone(), args.take()); } // Determine the route inside the closure. diff --git a/src/eval/methods.rs b/src/eval/methods.rs index 0e6f5af45..aeb84c5a4 100644 --- a/src/eval/methods.rs +++ b/src/eval/methods.rs @@ -1,6 +1,6 @@ //! Methods on values. -use super::{Args, Machine, Regex, StrExt, Value}; +use super::{Args, Machine, Value}; use crate::diag::{At, TypResult}; use crate::model::{Content, Group}; use crate::syntax::Span; @@ -20,9 +20,42 @@ pub fn call( let output = match value { Value::Str(string) => match method { "len" => Value::Int(string.len() as i64), - "trim" => Value::Str(string.trim().into()), + "slice" => { + let start = args.expect("start")?; + let mut end = args.eat()?; + if end.is_none() { + end = args.named("count")?.map(|c: i64| start + c); + } + Value::Str(string.slice(start, end).at(span)?) + } + "contains" => Value::Bool(string.contains(args.expect("pattern")?)), + "starts-with" => Value::Bool(string.starts_with(args.expect("pattern")?)), + "ends-with" => Value::Bool(string.ends_with(args.expect("pattern")?)), + "find" => { + string.find(args.expect("pattern")?).map_or(Value::None, Value::Str) + } + "position" => string + .position(args.expect("pattern")?) + .map_or(Value::None, Value::Int), + + "match" => string + .match_(args.expect("pattern")?) + .map_or(Value::None, Value::Dict), + "matches" => Value::Array(string.matches(args.expect("pattern")?)), + "replace" => { + let pattern = args.expect("pattern")?; + let with = args.expect("replacement string")?; + let count = args.named("count")?; + Value::Str(string.replace(pattern, with, count)) + } + "trim" => { + let pattern = args.eat()?; + let at = args.named("at")?; + let repeat = args.named("repeat")?.unwrap_or(true); + Value::Str(string.trim(pattern, at, repeat)) + } "split" => Value::Array(string.split(args.eat()?)), - _ => missing()?, + _ => return missing(), }, Value::Array(array) => match method { @@ -54,7 +87,7 @@ pub fn call( array.join(sep, last).at(span)? } "sorted" => Value::Array(array.sorted().at(span)?), - _ => missing()?, + _ => return missing(), }, Value::Dict(dict) => match method { @@ -62,48 +95,37 @@ pub fn call( "keys" => Value::Array(dict.keys()), "values" => Value::Array(dict.values()), "pairs" => Value::Array(dict.map(vm, args.expect("function")?)?), - _ => missing()?, + _ => return missing(), }, Value::Func(func) => match method { "with" => Value::Func(func.clone().with(args.take())), - _ => missing()?, + _ => return missing(), }, Value::Args(args) => match method { "positional" => Value::Array(args.to_positional()), "named" => Value::Dict(args.to_named()), - _ => missing()?, + _ => return missing(), }, - Value::Dyn(dynamic) => match method { - "matches" => { - if let Some(regex) = dynamic.downcast::() { - Value::Bool(regex.is_match(&args.expect::("text")?)) - } else { - missing()? - } - } - "entry" => { - if let Some(group) = dynamic.downcast::() { - Value::Content(Content::Locate( + Value::Dyn(dynamic) => { + if let Some(group) = dynamic.downcast::() { + match method { + "entry" => Value::Content(Content::Locate( group.entry(args.expect("recipe")?, args.named("value")?), - )) - } else { - missing()? + )), + "all" => { + Value::Content(Content::Locate(group.all(args.expect("recipe")?))) + } + _ => return missing(), } + } else { + return missing(); } - "all" => { - if let Some(group) = dynamic.downcast::() { - Value::Content(Content::Locate(group.all(args.expect("recipe")?))) - } else { - missing()? - } - } - _ => missing()?, - }, + } - _ => missing()?, + _ => return missing(), }; args.finish()?; @@ -128,15 +150,15 @@ pub fn call_mut( array.insert(args.expect("index")?, args.expect("value")?).at(span)? } "remove" => array.remove(args.expect("index")?).at(span)?, - _ => missing()?, + _ => return missing(), }, Value::Dict(dict) => match method { "remove" => dict.remove(&args.expect::("key")?).at(span)?, - _ => missing()?, + _ => return missing(), }, - _ => missing()?, + _ => return missing(), } args.finish()?; diff --git a/src/eval/mod.rs b/src/eval/mod.rs index ab92c8f37..94d9ef405 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -7,8 +7,9 @@ mod array; #[macro_use] mod dict; #[macro_use] +mod str; +#[macro_use] mod value; - mod args; mod capture; mod func; @@ -17,7 +18,6 @@ pub mod methods; pub mod ops; mod raw; mod scope; -mod str; pub use self::str::*; pub use args::*; @@ -347,7 +347,7 @@ impl Eval for Lit { Unit::Fr => Fraction::new(v).into(), Unit::Percent => Ratio::new(v / 100.0).into(), }, - LitKind::Str(ref v) => Value::Str(v.clone()), + LitKind::Str(v) => Value::Str(v.into()), }) } } @@ -474,10 +474,10 @@ impl Eval for DictExpr { for item in self.items() { match item { DictItem::Named(named) => { - map.insert(named.name().take(), named.expr().eval(vm)?); + map.insert(named.name().take().into(), named.expr().eval(vm)?); } DictItem::Keyed(keyed) => { - map.insert(keyed.key(), keyed.expr().eval(vm)?); + map.insert(keyed.key().into(), keyed.expr().eval(vm)?); } DictItem::Spread(expr) => match expr.eval(vm)? { Value::None => {} @@ -662,7 +662,7 @@ impl Eval for CallArgs { CallArg::Named(named) => { items.push(Arg { span, - name: Some(named.name().take()), + name: Some(named.name().take().into()), value: Spanned::new(named.expr().eval(vm)?, named.expr().span()), }); } @@ -859,7 +859,7 @@ impl Eval for ForExpr { (for ($($binding:ident => $value:ident),*) in $iter:expr) => {{ #[allow(unused_parens)] for ($($value),*) in $iter { - $(vm.scopes.top.define(&$binding, $value);)* + $(vm.scopes.top.define($binding.clone(), $value);)* let body = self.body(); let value = body.eval(vm)?; @@ -886,7 +886,7 @@ impl Eval for ForExpr { match (key, value, iter) { (None, v, Value::Str(string)) => { - iter!(for (v => value) in string.graphemes(true)); + iter!(for (v => value) in string.as_str().graphemes(true)); } (None, v, Value::Array(array)) => { iter!(for (v => value) in array.into_iter()); @@ -1047,7 +1047,7 @@ impl Access for Ident { impl Access for FieldAccess { fn access<'a>(&self, vm: &'a mut Machine) -> TypResult<&'a mut Value> { Ok(match self.object().access(vm)? { - Value::Dict(dict) => dict.get_mut(self.field().take()), + Value::Dict(dict) => dict.get_mut(self.field().take().into()), v => bail!( self.object().span(), "expected dictionary, found {}", diff --git a/src/eval/ops.rs b/src/eval/ops.rs index 95c3c9ebf..7e4653200 100644 --- a/src/eval/ops.rs +++ b/src/eval/ops.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; -use super::{RawAlign, RawLength, RawStroke, Smart, StrExt, Value}; +use super::{RawAlign, RawLength, RawStroke, Regex, Smart, Value}; use crate::diag::StrResult; use crate::geom::{Numeric, Relative, Spec, SpecAxis}; use crate::model; @@ -21,8 +21,8 @@ pub fn join(lhs: Value, rhs: Value) -> StrResult { (a, None) => a, (None, b) => b, (Str(a), Str(b)) => Str(a + b), - (Str(a), Content(b)) => Content(model::Content::Text(a) + b), - (Content(a), Str(b)) => Content(a + model::Content::Text(b)), + (Str(a), Content(b)) => Content(model::Content::Text(a.into()) + b), + (Content(a), Str(b)) => Content(a + model::Content::Text(b.into())), (Content(a), Content(b)) => Content(a + b), (Array(a), Array(b)) => Array(a + b), (Dict(a), Dict(b)) => Dict(a + b), @@ -87,8 +87,8 @@ pub fn add(lhs: Value, rhs: Value) -> StrResult { (Str(a), Str(b)) => Str(a + b), (Content(a), Content(b)) => Content(a + b), - (Content(a), Str(b)) => Content(a + model::Content::Text(b)), - (Str(a), Content(b)) => Content(model::Content::Text(a) + b), + (Content(a), Str(b)) => Content(a + model::Content::Text(b.into())), + (Str(a), Content(b)) => Content(model::Content::Text(a.into()) + b), (Array(a), Array(b)) => Array(a + b), (Dict(a), Dict(b)) => Dict(a + b), @@ -183,8 +183,8 @@ pub fn mul(lhs: Value, rhs: Value) -> StrResult { (Fraction(a), Float(b)) => Fraction(a * b), (Int(a), Fraction(b)) => Fraction(a as f64 * b), - (Str(a), Int(b)) => Str(StrExt::repeat(&a, b)?), - (Int(a), Str(b)) => Str(StrExt::repeat(&b, a)?), + (Str(a), Int(b)) => Str(a.repeat(b)?), + (Int(a), Str(b)) => Str(b.repeat(a)?), (Array(a), Int(b)) => Array(a.repeat(b)?), (Int(a), Array(b)) => Array(b.repeat(a)?), (Content(a), Int(b)) => Content(a.repeat(b)?), @@ -384,9 +384,16 @@ pub fn not_in(lhs: Value, rhs: Value) -> StrResult { /// Test for containment. pub fn contains(lhs: &Value, rhs: &Value) -> Option { Some(match (lhs, rhs) { - (Value::Str(a), Value::Str(b)) => b.contains(a.as_str()), - (Value::Str(a), Value::Dict(b)) => b.contains(a), - (a, Value::Array(b)) => b.contains(a), + (Str(a), Str(b)) => b.as_str().contains(a.as_str()), + (Dyn(a), Str(b)) => { + if let Some(regex) = a.downcast::() { + regex.is_match(b) + } else { + return Option::None; + } + } + (Str(a), Dict(b)) => b.contains(a), + (a, Array(b)) => b.contains(a), _ => return Option::None, }) } diff --git a/src/eval/str.rs b/src/eval/str.rs index a03453125..9d2375d31 100644 --- a/src/eval/str.rs +++ b/src/eval/str.rs @@ -1,43 +1,404 @@ -use std::fmt::{self, Debug, Formatter}; +use std::borrow::{Borrow, Cow}; +use std::fmt::{self, Debug, Formatter, Write}; use std::hash::{Hash, Hasher}; -use std::ops::Deref; +use std::ops::{Add, AddAssign, Deref}; -use super::{Array, Value}; +use unicode_segmentation::UnicodeSegmentation; + +use super::{Array, Dict, RawAlign, Value}; use crate::diag::StrResult; use crate::util::EcoString; -/// Extra methods on strings. -pub trait StrExt { - /// Repeat a string a number of times. - fn repeat(&self, n: i64) -> StrResult; - - /// Split this string at whitespace or a specific pattern. - fn split(&self, at: Option) -> Array; +/// Create a new [`Str`] from a format string. +#[allow(unused_macros)] +macro_rules! format_str { + ($($tts:tt)*) => {{ + $crate::eval::Str::from(format_eco!($($tts)*)) + }}; } -impl StrExt for EcoString { - fn repeat(&self, n: i64) -> StrResult { - let n = usize::try_from(n) - .ok() - .and_then(|n| self.len().checked_mul(n).map(|_| n)) - .ok_or_else(|| format!("cannot repeat this string {} times", n))?; +/// An immutable reference counted string. +#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Str(EcoString); - Ok(self.repeat(n)) +impl Str { + /// Create a new, empty string. + pub fn new() -> Self { + Self(EcoString::new()) } - fn split(&self, at: Option) -> Array { - if let Some(pat) = at { - self.as_str() - .split(pat.as_str()) - .map(|s| Value::Str(s.into())) - .collect() + /// The length of the string in bytes. + pub fn len(&self) -> i64 { + self.0.len() as i64 + } + + /// A string slice containing the entire string. + pub fn as_str(&self) -> &str { + self + } + + /// The codepoints the string consists of. + pub fn codepoints(&self) -> Array { + self.as_str().chars().map(|c| Value::Str(c.into())).collect() + } + + /// The grapheme clusters the string consists of. + pub fn graphemes(&self) -> Array { + self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect() + } + + /// Extract a contigous substring. + pub fn slice(&self, start: i64, end: Option) -> StrResult { + let len = self.len(); + let start = self + .locate(start) + .filter(|&start| start <= self.0.len()) + .ok_or_else(|| out_of_bounds(start, len))?; + + let end = end.unwrap_or(self.len()); + let end = self + .locate(end) + .filter(|&end| end <= self.0.len()) + .ok_or_else(|| out_of_bounds(end, len))? + .max(start); + + Ok(self.0[start .. end].into()) + } + + /// Resolve an index. + fn locate(&self, index: i64) -> Option { + usize::try_from(if index >= 0 { + index } else { - self.as_str() - .split_whitespace() - .map(|s| Value::Str(s.into())) - .collect() + self.len().checked_add(index)? + }) + .ok() + } + + /// Whether the given pattern exists in this string. + pub fn contains(&self, pattern: TextPattern) -> bool { + match pattern { + TextPattern::Str(pat) => self.0.contains(pat.as_str()), + TextPattern::Regex(re) => re.is_match(self), } } + + /// Whether this string begins with the given pattern. + pub fn starts_with(&self, pattern: TextPattern) -> bool { + match pattern { + TextPattern::Str(pat) => self.0.starts_with(pat.as_str()), + TextPattern::Regex(re) => re.find(self).map_or(false, |m| m.start() == 0), + } + } + + /// Whether this string ends with the given pattern. + pub fn ends_with(&self, pattern: TextPattern) -> bool { + match pattern { + TextPattern::Str(pat) => self.0.ends_with(pat.as_str()), + TextPattern::Regex(re) => { + re.find_iter(self).last().map_or(false, |m| m.end() == self.0.len()) + } + } + } + + /// The text of the pattern's first match in this string. + pub fn find(&self, pattern: TextPattern) -> Option { + match pattern { + TextPattern::Str(pat) => self.0.contains(pat.as_str()).then(|| pat), + TextPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()), + } + } + + /// The position of the pattern's first match in this string. + pub fn position(&self, pattern: TextPattern) -> Option { + match pattern { + TextPattern::Str(pat) => self.0.find(pat.as_str()).map(|i| i as i64), + TextPattern::Regex(re) => re.find(self).map(|m| m.start() as i64), + } + } + + /// The start and, text and capture groups (if any) of the first match of + /// the pattern in this string. + pub fn match_(&self, pattern: TextPattern) -> Option { + match pattern { + TextPattern::Str(pat) => { + self.0.match_indices(pat.as_str()).next().map(match_to_dict) + } + TextPattern::Regex(re) => re.captures(self).map(captures_to_dict), + } + } + + /// The start, end, text and capture groups (if any) of all matches of the + /// pattern in this string. + pub fn matches(&self, pattern: TextPattern) -> Array { + match pattern { + TextPattern::Str(pat) => self + .0 + .match_indices(pat.as_str()) + .map(match_to_dict) + .map(Value::Dict) + .collect(), + TextPattern::Regex(re) => re + .captures_iter(self) + .map(captures_to_dict) + .map(Value::Dict) + .collect(), + } + } + + /// Split this string at whitespace or a specific pattern. + pub fn split(&self, pattern: Option) -> Array { + let s = self.as_str(); + match pattern { + None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(), + Some(TextPattern::Str(pat)) => { + s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect() + } + Some(TextPattern::Regex(re)) => { + re.split(s).map(|v| Value::Str(v.into())).collect() + } + } + } + + /// Trim either whitespace or the given pattern at both or just one side of + /// the string. If `repeat` is true, the pattern is trimmed repeatedly + /// instead of just once. Repeat must only be given in combination with a + /// pattern. + pub fn trim( + &self, + pattern: Option, + at: Option, + repeat: bool, + ) -> Self { + let mut start = matches!(at, Some(TextSide::Start) | None); + let end = matches!(at, Some(TextSide::End) | None); + + let trimmed = match pattern { + None => match at { + None => self.0.trim(), + Some(TextSide::Start) => self.0.trim_start(), + Some(TextSide::End) => self.0.trim_end(), + }, + Some(TextPattern::Str(pat)) => { + let pat = pat.as_str(); + let mut s = self.as_str(); + if repeat { + if start { + s = s.trim_start_matches(pat); + } + if end { + s = s.trim_end_matches(pat); + } + } else { + if start { + s = s.strip_prefix(pat).unwrap_or(s); + } + if end { + s = s.strip_suffix(pat).unwrap_or(s); + } + } + s + } + Some(TextPattern::Regex(re)) => { + let s = self.as_str(); + let mut last = 0; + let mut range = 0 .. s.len(); + + for m in re.find_iter(s) { + // Does this match follow directly after the last one? + let consecutive = last == m.start(); + + // As long as we're consecutive and still trimming at the + // start, trim. + start &= consecutive; + if start { + range.start = m.end(); + start &= repeat; + } + + // Reset end trim if we aren't consecutive anymore or aren't + // repeating. + if end && (!consecutive || !repeat) { + range.end = m.start(); + } + + last = m.end(); + } + + // Is the last match directly at the end? + if last < s.len() { + range.end = s.len(); + } + + &s[range.start .. range.start.max(range.end)] + } + }; + + trimmed.into() + } + + /// Replace at most `count` occurances of the given pattern with a + /// replacement string (beginning from the start). + pub fn replace( + &self, + pattern: TextPattern, + with: Self, + count: Option, + ) -> Self { + match pattern { + TextPattern::Str(pat) => match count { + Some(n) => self.0.replacen(pat.as_str(), &with, n).into(), + None => self.0.replace(pat.as_str(), &with).into(), + }, + TextPattern::Regex(re) => match count { + Some(n) => re.replacen(self, n, with.as_str()).into(), + None => re.replace(self, with.as_str()).into(), + }, + } + } + + /// Repeat the string a number of times. + pub fn repeat(&self, n: i64) -> StrResult { + let n = usize::try_from(n) + .ok() + .and_then(|n| self.0.len().checked_mul(n).map(|_| n)) + .ok_or_else(|| format!("cannot repeat this string {} times", n))?; + + Ok(Self(self.0.repeat(n))) + } +} + +/// The out of bounds access error message. +#[cold] +fn out_of_bounds(index: i64, len: i64) -> String { + format!( + "string index out of bounds (index: {}, len: {})", + index, len + ) +} + +/// Convert an item of std's `match_indices` to a dictionary. +fn match_to_dict((start, text): (usize, &str)) -> Dict { + dict! { + "start" => Value::Int(start as i64), + "end" => Value::Int((start + text.len()) as i64), + "text" => Value::Str(text.into()), + "captures" => Value::Array(Array::new()), + } +} + +/// Convert regex captures to a dictionary. +fn captures_to_dict(cap: regex::Captures) -> Dict { + let m = cap.get(0).expect("missing first match"); + dict! { + "start" => Value::Int(m.start() as i64), + "end" => Value::Int(m.end() as i64), + "text" => Value::Str(m.as_str().into()), + "captures" => Value::Array( + cap.iter() + .skip(1) + .map(|opt| opt.map_or(Value::None, |m| m.as_str().into())) + .collect(), + ), + } +} + +impl Deref for Str { + type Target = str; + + fn deref(&self) -> &str { + &self.0 + } +} + +impl Debug for Str { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_char('"')?; + for c in self.chars() { + match c { + '\\' => f.write_str(r"\\")?, + '"' => f.write_str(r#"\""#)?, + '\n' => f.write_str(r"\n")?, + '\r' => f.write_str(r"\r")?, + '\t' => f.write_str(r"\t")?, + _ => f.write_char(c)?, + } + } + f.write_char('"') + } +} + +impl Add for Str { + type Output = Self; + + fn add(mut self, rhs: Self) -> Self::Output { + self += rhs; + self + } +} + +impl AddAssign for Str { + fn add_assign(&mut self, rhs: Self) { + self.0.push_str(rhs.as_str()); + } +} + +impl AsRef for Str { + fn as_ref(&self) -> &str { + self + } +} + +impl Borrow for Str { + fn borrow(&self) -> &str { + self + } +} + +impl From for Str { + fn from(c: char) -> Self { + Self(c.into()) + } +} + +impl From<&str> for Str { + fn from(s: &str) -> Self { + Self(s.into()) + } +} + +impl From for Str { + fn from(s: EcoString) -> Self { + Self(s) + } +} + +impl From for Str { + fn from(s: String) -> Self { + Self(s.into()) + } +} +impl From> for Str { + fn from(s: Cow) -> Self { + Self(s.into()) + } +} +impl FromIterator for Str { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl From for EcoString { + fn from(str: Str) -> Self { + str.0 + } +} + +impl From for String { + fn from(s: Str) -> Self { + s.0.into() + } } /// A regular expression. @@ -76,3 +437,39 @@ impl Hash for Regex { self.0.as_str().hash(state); } } + +/// A pattern which can be searched for in a string. +#[derive(Debug, Clone)] +pub enum TextPattern { + /// Just a string. + Str(Str), + /// A regular expression. + Regex(Regex), +} + +castable! { + TextPattern, + Expected: "string or regular expression", + Value::Str(text) => Self::Str(text), + @regex: Regex => Self::Regex(regex.clone()), +} + +/// A side of a string. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] +pub enum TextSide { + /// The logical start of the string, may be left or right depending on the + /// language. + Start, + /// The logical end of the string. + End, +} + +castable! { + TextSide, + Expected: "start or end", + @align: RawAlign => match align { + RawAlign::Start => Self::Start, + RawAlign::End => Self::End, + _ => Err("expected either `start` or `end`")?, + }, +} diff --git a/src/eval/value.rs b/src/eval/value.rs index cdd403a77..b7bd6d3c6 100644 --- a/src/eval/value.rs +++ b/src/eval/value.rs @@ -4,7 +4,7 @@ use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use super::{ops, Args, Array, Cast, Dict, Func, RawLength}; +use super::{ops, Args, Array, Cast, Dict, Func, RawLength, Str}; use crate::diag::StrResult; use crate::geom::{Angle, Color, Em, Fraction, Length, Ratio, Relative, RgbaColor}; use crate::library::text::RawNode; @@ -37,7 +37,7 @@ pub enum Value { /// A color value: `#f79143ff`. Color(Color), /// A string: `"string"`. - Str(EcoString), + Str(Str), /// A content value: `[*Hi* there]`. Content(Content), /// An array of values: `(1, "hi", 12cm)`. @@ -91,7 +91,7 @@ impl Value { Self::Relative(_) => Relative::::TYPE_NAME, Self::Fraction(_) => Fraction::TYPE_NAME, Self::Color(_) => Color::TYPE_NAME, - Self::Str(_) => EcoString::TYPE_NAME, + Self::Str(_) => Str::TYPE_NAME, Self::Content(_) => Content::TYPE_NAME, Self::Array(_) => Array::TYPE_NAME, Self::Dict(_) => Dict::TYPE_NAME, @@ -107,8 +107,8 @@ impl Value { } /// Return the debug representation of the value. - pub fn repr(&self) -> EcoString { - format_eco!("{:?}", self) + pub fn repr(&self) -> Str { + format_str!("{:?}", self) } /// Return the display representation of the value. @@ -117,12 +117,12 @@ impl Value { Value::None => Content::new(), Value::Int(v) => Content::Text(format_eco!("{}", v)), Value::Float(v) => Content::Text(format_eco!("{}", v)), - Value::Str(v) => Content::Text(v), + Value::Str(v) => Content::Text(v.into()), Value::Content(v) => v, // For values which can't be shown "naturally", we return the raw // representation with typst code syntax highlighting. - v => Content::show(RawNode { text: v.repr(), block: false }) + v => Content::show(RawNode { text: v.repr().into(), block: false }) .styled(RawNode::LANG, Some("typc".into())), } } @@ -233,6 +233,12 @@ impl From<&str> for Value { } } +impl From for Value { + fn from(v: EcoString) -> Self { + Self::Str(v.into()) + } +} + impl From for Value { fn from(v: String) -> Self { Self::Str(v.into()) @@ -388,11 +394,11 @@ primitive! { Relative: "relative length", } primitive! { Fraction: "fraction", Fraction } primitive! { Color: "color", Color } -primitive! { EcoString: "string", Str } +primitive! { Str: "string", Str } primitive! { Content: "content", Content, None => Content::new(), - Str(text) => Content::Text(text) + Str(text) => Content::Text(text.into()) } primitive! { Array: "array", Array } primitive! { Dict: "dictionary", Dict } diff --git a/src/library/layout/page.rs b/src/library/layout/page.rs index 0a7c75791..afcc48552 100644 --- a/src/library/layout/page.rs +++ b/src/library/layout/page.rs @@ -198,7 +198,7 @@ impl Cast> for Marginal { fn cast(value: Spanned) -> StrResult { match value.v { Value::None => Ok(Self::None), - Value::Str(v) => Ok(Self::Content(Content::Text(v))), + Value::Str(v) => Ok(Self::Content(Content::Text(v.into()))), Value::Content(v) => Ok(Self::Content(v)), Value::Func(v) => Ok(Self::Func(v, value.span)), v => Err(format!( diff --git a/src/library/math/mod.rs b/src/library/math/mod.rs index 1eed89b12..81593c4fc 100644 --- a/src/library/math/mod.rs +++ b/src/library/math/mod.rs @@ -43,7 +43,7 @@ impl Show for MathNode { fn encode(&self, _: StyleChain) -> Dict { dict! { - "formula" => Value::Str(self.formula.v.clone()), + "formula" => Value::Str(self.formula.v.clone().into()), "display" => Value::Bool(self.display) } } diff --git a/src/library/text/link.rs b/src/library/text/link.rs index 740426a3e..c4898eb05 100644 --- a/src/library/text/link.rs +++ b/src/library/text/link.rs @@ -33,7 +33,7 @@ impl LinkNode { castable! { Destination, Expected: "string or dictionary with `page`, `x`, and `y` keys", - Value::Str(string) => Self::Url(string), + Value::Str(string) => Self::Url(string.into()), Value::Dict(dict) => { let page = dict.get("page")?.clone().cast()?; let x: RawLength = dict.get("x")?.clone().cast()?; @@ -54,7 +54,7 @@ impl Show for LinkNode { fn encode(&self, _: StyleChain) -> Dict { dict! { "url" => match &self.dest { - Destination::Url(url) => Value::Str(url.clone()), + Destination::Url(url) => Value::Str(url.clone().into()), Destination::Internal(loc) => Value::Dict(loc.encode()), }, "body" => match &self.body { diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs index 8db4cf3e6..e64636f81 100644 --- a/src/library/text/raw.rs +++ b/src/library/text/raw.rs @@ -50,10 +50,10 @@ impl Show for RawNode { fn encode(&self, styles: StyleChain) -> Dict { dict! { - "text" => Value::Str(self.text.clone()), + "text" => Value::Str(self.text.clone().into()), "block" => Value::Bool(self.block), "lang" => match styles.get(Self::LANG) { - Some(lang) => Value::Str(lang.clone()), + Some(lang) => Value::Str(lang.clone().into()), None => Value::None, }, } diff --git a/src/library/utility/string.rs b/src/library/utility/string.rs index 4739dbf41..972b44d70 100644 --- a/src/library/utility/string.rs +++ b/src/library/utility/string.rs @@ -10,8 +10,8 @@ pub fn repr(_: &mut Machine, args: &mut Args) -> TypResult { pub fn str(_: &mut Machine, args: &mut Args) -> TypResult { let Spanned { v, span } = args.expect("value")?; Ok(Value::Str(match v { - Value::Int(v) => format_eco!("{}", v), - Value::Float(v) => format_eco!("{}", v), + Value::Int(v) => format_str!("{}", v), + Value::Float(v) => format_str!("{}", v), Value::Str(v) => v, v => bail!(span, "cannot convert {} to string", v.type_name()), })) @@ -46,7 +46,7 @@ pub fn symbol(_: &mut Machine, args: &mut Args) -> TypResult { fn numbered(numbering: Numbering, args: &mut Args) -> TypResult { let n = args.expect::("non-negative integer")?; - Ok(Value::Str(numbering.apply(n))) + Ok(Value::Str(numbering.apply(n).into())) } /// Allows to convert a number into letters, roman numerals and symbols. diff --git a/src/util/eco.rs b/src/util/eco.rs index 63abe9e74..10a1f2ed3 100644 --- a/src/util/eco.rs +++ b/src/util/eco.rs @@ -1,4 +1,4 @@ -use std::borrow::Borrow; +use std::borrow::{Borrow, Cow}; use std::cmp::Ordering; use std::fmt::{self, Debug, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; @@ -227,18 +227,7 @@ impl Default for EcoString { impl Debug for EcoString { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.write_char('"')?; - for c in self.chars() { - match c { - '\\' => f.write_str(r"\\")?, - '"' => f.write_str(r#"\""#)?, - '\n' => f.write_str(r"\n")?, - '\r' => f.write_str(r"\r")?, - '\t' => f.write_str(r"\t")?, - _ => f.write_char(c)?, - } - } - f.write_char('"') + Debug::fmt(self.as_str(), f) } } @@ -325,12 +314,6 @@ impl Borrow for EcoString { } } -impl From<&Self> for EcoString { - fn from(s: &Self) -> Self { - s.clone() - } -} - impl From for EcoString { fn from(c: char) -> Self { let mut buf = [0; LIMIT]; @@ -351,9 +334,22 @@ impl From for EcoString { } } -impl From<&EcoString> for String { - fn from(s: &EcoString) -> Self { - s.as_str().to_owned() +impl From> for EcoString { + fn from(s: Cow) -> Self { + match s { + Cow::Borrowed(s) => s.into(), + Cow::Owned(s) => s.into(), + } + } +} + +impl FromIterator for EcoString { + fn from_iter>(iter: T) -> Self { + let mut s = Self::new(); + for c in iter { + s.push(c); + } + s } } @@ -366,13 +362,9 @@ impl From for String { } } -impl FromIterator for EcoString { - fn from_iter>(iter: T) -> Self { - let mut s = Self::new(); - for c in iter { - s.push(c); - } - s +impl From<&EcoString> for String { + fn from(s: &EcoString) -> Self { + s.as_str().to_owned() } } diff --git a/tests/typ/utility/regex.typ b/tests/typ/utility/regex.typ deleted file mode 100644 index 4cc7d1ea3..000000000 --- a/tests/typ/utility/regex.typ +++ /dev/null @@ -1,10 +0,0 @@ -// Test regexes. -// Ref: false - ---- -{ - let re = regex("(La)?TeX") - test(re.matches("La"), false) - test(re.matches("TeX"), true) - test(re.matches("LaTeX"), true) -} diff --git a/tests/typ/utility/string.typ b/tests/typ/utility/string.typ index e002b2070..3104a3eac 100644 --- a/tests/typ/utility/string.typ +++ b/tests/typ/utility/string.typ @@ -14,12 +14,120 @@ #str([]) --- -// Test the `split` and `trim` methods. +// Test the `slice` method. +#test("abc".slice(1, 2), "b") +#test("abc🏡def".slice(2, 7), "c🏡") +#test("abc🏡def".slice(2, -2), "c🏡d") +#test("abc🏡def".slice(-3, -1), "de") + +--- +// Test the `contains` method. +#test("abc".contains("b"), true) +#test("b" in "abc", true) +#test("1234f".contains(regex("\d")), true) +#test(regex("\d") in "1234f", true) +#test("abc".contains("d"), false) +#test("1234g" in "1234f", false) +#test("abc".contains(regex("^[abc]$")), false) +#test("abc".contains(regex("^[abc]+$")), true) + +--- +// Test the `starts-with` and `ends-with` methods. +#test("Typst".starts-with("Ty"), true) +#test("Typst".starts-with(regex("[Tt]ys")), false) +#test("Typst".starts-with("st"), false) +#test("Typst".ends-with("st"), true) +#test("Typst".ends-with(regex("\d*")), true) +#test("Typst".ends-with(regex("\d+")), false) +#test("Typ12".ends-with(regex("\d+")), true) + +--- +// Test the `find` and `position` methods. +#let date = regex("\d{2}:\d{2}") +#test("Hello World".find("World"), "World") +#test("Hello World".position("World"), 6) +#test("It's 12:13 now".find(date), "12:13") +#test("It's 12:13 now".position(date), 5) + +--- +// Test the `match` method. +#test("Is there a".match("for this?"), none) #test( - "Typst, LaTeX, Word, InDesign".split(",").map(s => s.trim()), - ("Typst", "LaTeX", "Word", "InDesign"), + "The time of my life.".match(regex("[mit]+e")), + (start: 4, end: 8, text: "time", captures: ()), ) +// Test the `matches` method. +#test("Hello there".matches("\d"), ()) +#test("Day by Day.".matches("Day"), ( + (start: 0, end: 3, text: "Day", captures: ()), + (start: 7, end: 10, text: "Day", captures: ()), +)) + +// Compute the sum of all timestamps in the text. +#let timesum(text) = { + let time = 0 + for match in text.matches(regex("(\d+):(\d+)")) { + let caps = match.captures + time += 60 * int(caps(0)) + int(caps(1)) + } + str(int(time / 60)) + ":" + str(mod(time, 60)) +} + +#test(timesum(""), "0:0") +#test(timesum("2:70"), "3:10") +#test(timesum("1:20, 2:10, 0:40"), "4:10") + +--- +// Test the `replace` method. +#test("ABC".replace("", "-"), "-A-B-C-") +#test("Ok".replace("Ok", "Nope", count: 0), "Ok") +#test("to add?".replace("", "How ", count: 1), "How to add?") +#test("AB C DEF GH J".replace(" ", ",", count: 2), "AB,C,DEF GH J") +#test("Walcemo" + .replace("o", "k") + .replace("e", "o") + .replace("k", "e") + .replace("a", "e"), + "Welcome" +) +#test("123".replace(regex("\d$"), "_"), "12_") +#test("123".replace(regex("\d{1,2}$"), "__"), "1__") + +--- +// Test the `trim` method. +#let str = "Typst, LaTeX, Word, InDesign" +#let array = ("Typst", "LaTeX", "Word", "InDesign") +#test(str.split(",").map(s => s.trim()), array) +#test("".trim(), "") +#test(" abc ".trim(at: start), "abc ") +#test(" abc ".trim(at: end, repeat: true), " abc") +#test(" abc".trim(at: start, repeat: false), "abc") +#test("aabcaa".trim("a", repeat: false), "abca") +#test("aabca".trim("a", at: start), "bca") +#test("aabcaa".trim("a", at: end, repeat: false), "aabca") +#test("".trim(regex(".")), "") +#test("123abc456".trim(regex("\d")), "abc") +#test("123abc456".trim(regex("\d"), repeat: false), "23abc45") +#test("123a4b5c678".trim(regex("\d"), repeat: true), "a4b5c") +#test("123a4b5c678".trim(regex("\d"), repeat: false), "23a4b5c67") +#test("123abc456".trim(regex("\d"), at: start), "abc456") +#test("123abc456".trim(regex("\d"), at: end), "123abc") +#test("123abc456".trim(regex("\d+"), at: end, repeat: false), "123abc") +#test("123abc456".trim(regex("\d{1,2}$"), repeat: false), "123abc4") +#test("hello world".trim(regex(".")), "") + +--- +// Error: 17-21 expected either `start` or `end` +{"abc".trim(at: left)} + +--- +// Test the `split` method. +#test("abc".split(""), ("", "a", "b", "c", "")) +#test("abc".split("b"), ("a", "c")) +#test("a123c".split(regex("\d")), ("a", "", "", "c")) +#test("a123c".split(regex("\d+")), ("a", "c")) + --- // Test the `upper` and `lower` functions. #let memes = "ArE mEmEs gReAt?"; @@ -27,14 +135,6 @@ #test(upper(memes), "ARE MEMES GREAT?") #test(upper("Ελλάδα"), "ΕΛΛΆΔΑ") ---- -// Error: 8-9 expected string or content, found integer -#upper(1) - ---- -// Error: 9-11 must be at least zero -#symbol(-1) - --- // Test integrated lower, upper and symbols. // Ref: true @@ -45,9 +145,17 @@ #lower("SCREAMING MUST BE SILENCED in " + roman(1672) + " years") #for i in range(9) { - symbol(i) - [ and ] - roman(i) - [ for #i] - parbreak() + symbol(i) + [ and ] + roman(i) + [ for #i] + parbreak() } + +--- +// Error: 8-9 expected string or content, found integer +#upper(1) + +--- +// Error: 9-11 must be at least zero +#symbol(-1)