use std::borrow::{Borrow, Cow}; use std::fmt::{self, Debug, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::ops::{Add, AddAssign, Deref}; use ecow::EcoString; use unicode_segmentation::UnicodeSegmentation; use super::{castable, dict, Array, Dict, Value}; use crate::diag::StrResult; use crate::geom::GenAlign; /// Create a new [`Str`] from a format string. #[macro_export] #[doc(hidden)] macro_rules! __format_str { ($($tts:tt)*) => {{ $crate::model::Str::from($crate::model::eco_format!($($tts)*)) }}; } #[doc(inline)] pub use crate::__format_str as format_str; #[doc(hidden)] pub use ecow::eco_format; /// An immutable reference counted string. #[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Str(EcoString); impl Str { /// Create a new, empty string. pub fn new() -> Self { Self(EcoString::new()) } /// The length of the string in bytes. pub fn len(&self) -> i64 { self.0.len() as i64 } /// A string slice containing the entire string. pub fn as_str(&self) -> &str { self } /// Extract the first grapheme cluster. pub fn first(&self) -> StrResult { self.0 .graphemes(true) .next() .map(Into::into) .ok_or_else(string_is_empty) } /// Extract the last grapheme cluster. pub fn last(&self) -> StrResult { self.0 .graphemes(true) .next_back() .map(Into::into) .ok_or_else(string_is_empty) } /// Extract the grapheme cluster at the given index. pub fn at(&self, index: i64) -> StrResult { let len = self.len(); let grapheme = self.0[self.locate(index)?..] .graphemes(true) .next() .ok_or_else(|| out_of_bounds(index, len))?; Ok(grapheme.into()) } /// Extract a contigous substring. pub fn slice(&self, start: i64, end: Option) -> StrResult { let start = self.locate(start)?; let end = self.locate(end.unwrap_or(self.len()))?.max(start); Ok(self.0[start..end].into()) } /// The grapheme clusters the string consists of. pub fn clusters(&self) -> Array { self.as_str().graphemes(true).map(|s| Value::Str(s.into())).collect() } /// The codepoints the string consists of. pub fn codepoints(&self) -> Array { self.chars().map(|c| Value::Str(c.into())).collect() } /// Whether the given pattern exists in this string. pub fn contains(&self, pattern: StrPattern) -> bool { match pattern { StrPattern::Str(pat) => self.0.contains(pat.as_str()), StrPattern::Regex(re) => re.is_match(self), } } /// Whether this string begins with the given pattern. pub fn starts_with(&self, pattern: StrPattern) -> bool { match pattern { StrPattern::Str(pat) => self.0.starts_with(pat.as_str()), StrPattern::Regex(re) => re.find(self).map_or(false, |m| m.start() == 0), } } /// Whether this string ends with the given pattern. pub fn ends_with(&self, pattern: StrPattern) -> bool { match pattern { StrPattern::Str(pat) => self.0.ends_with(pat.as_str()), StrPattern::Regex(re) => { re.find_iter(self).last().map_or(false, |m| m.end() == self.0.len()) } } } /// The text of the pattern's first match in this string. pub fn find(&self, pattern: StrPattern) -> Option { match pattern { StrPattern::Str(pat) => self.0.contains(pat.as_str()).then(|| pat), StrPattern::Regex(re) => re.find(self).map(|m| m.as_str().into()), } } /// The position of the pattern's first match in this string. pub fn position(&self, pattern: StrPattern) -> Option { match pattern { StrPattern::Str(pat) => self.0.find(pat.as_str()).map(|i| i as i64), StrPattern::Regex(re) => re.find(self).map(|m| m.start() as i64), } } /// The start and, text and capture groups (if any) of the first match of /// the pattern in this string. pub fn match_(&self, pattern: StrPattern) -> Option { match pattern { StrPattern::Str(pat) => { self.0.match_indices(pat.as_str()).next().map(match_to_dict) } StrPattern::Regex(re) => re.captures(self).map(captures_to_dict), } } /// The start, end, text and capture groups (if any) of all matches of the /// pattern in this string. pub fn matches(&self, pattern: StrPattern) -> Array { match pattern { StrPattern::Str(pat) => self .0 .match_indices(pat.as_str()) .map(match_to_dict) .map(Value::Dict) .collect(), StrPattern::Regex(re) => re .captures_iter(self) .map(captures_to_dict) .map(Value::Dict) .collect(), } } /// Split this string at whitespace or a specific pattern. pub fn split(&self, pattern: Option) -> Array { let s = self.as_str(); match pattern { None => s.split_whitespace().map(|v| Value::Str(v.into())).collect(), Some(StrPattern::Str(pat)) => { s.split(pat.as_str()).map(|v| Value::Str(v.into())).collect() } Some(StrPattern::Regex(re)) => { re.split(s).map(|v| Value::Str(v.into())).collect() } } } /// Trim either whitespace or the given pattern at both or just one side of /// the string. If `repeat` is true, the pattern is trimmed repeatedly /// instead of just once. Repeat must only be given in combination with a /// pattern. pub fn trim( &self, pattern: Option, at: Option, repeat: bool, ) -> Self { let mut start = matches!(at, Some(StrSide::Start) | None); let end = matches!(at, Some(StrSide::End) | None); let trimmed = match pattern { None => match at { None => self.0.trim(), Some(StrSide::Start) => self.0.trim_start(), Some(StrSide::End) => self.0.trim_end(), }, Some(StrPattern::Str(pat)) => { let pat = pat.as_str(); let mut s = self.as_str(); if repeat { if start { s = s.trim_start_matches(pat); } if end { s = s.trim_end_matches(pat); } } else { if start { s = s.strip_prefix(pat).unwrap_or(s); } if end { s = s.strip_suffix(pat).unwrap_or(s); } } s } Some(StrPattern::Regex(re)) => { let s = self.as_str(); let mut last = 0; let mut range = 0..s.len(); for m in re.find_iter(s) { // Does this match follow directly after the last one? let consecutive = last == m.start(); // As long as we're consecutive and still trimming at the // start, trim. start &= consecutive; if start { range.start = m.end(); start &= repeat; } // Reset end trim if we aren't consecutive anymore or aren't // repeating. if end && (!consecutive || !repeat) { range.end = m.start(); } last = m.end(); } // Is the last match directly at the end? if last < s.len() { range.end = s.len(); } &s[range.start..range.start.max(range.end)] } }; trimmed.into() } /// Replace at most `count` occurances of the given pattern with a /// replacement string (beginning from the start). pub fn replace(&self, pattern: StrPattern, with: Self, count: Option) -> Self { match pattern { StrPattern::Str(pat) => match count { Some(n) => self.0.replacen(pat.as_str(), &with, n).into(), None => self.0.replace(pat.as_str(), &with).into(), }, StrPattern::Regex(re) => match count { Some(n) => re.replacen(self, n, with.as_str()).into(), None => re.replace(self, with.as_str()).into(), }, } } /// Repeat the string a number of times. pub fn repeat(&self, n: i64) -> StrResult { let n = usize::try_from(n) .ok() .and_then(|n| self.0.len().checked_mul(n).map(|_| n)) .ok_or_else(|| format!("cannot repeat this string {} times", n))?; Ok(Self(self.0.repeat(n))) } /// Resolve an index. fn locate(&self, index: i64) -> StrResult { let wrapped = if index >= 0 { Some(index) } else { self.len().checked_add(index) }; let resolved = wrapped .and_then(|v| usize::try_from(v).ok()) .filter(|&v| v <= self.0.len()) .ok_or_else(|| out_of_bounds(index, self.len()))?; if !self.0.is_char_boundary(resolved) { return Err(not_a_char_boundary(index)); } Ok(resolved) } } /// The out of bounds access error message. #[cold] fn out_of_bounds(index: i64, len: i64) -> EcoString { eco_format!("string index out of bounds (index: {}, len: {})", index, len) } /// The char boundary access error message. #[cold] fn not_a_char_boundary(index: i64) -> EcoString { eco_format!("string index {} is not a character boundary", index) } /// The error message when the string is empty. #[cold] fn string_is_empty() -> EcoString { "string is empty".into() } /// Convert an item of std's `match_indices` to a dictionary. fn match_to_dict((start, text): (usize, &str)) -> Dict { dict! { "start" => Value::Int(start as i64), "end" => Value::Int((start + text.len()) as i64), "text" => Value::Str(text.into()), "captures" => Value::Array(Array::new()), } } /// Convert regex captures to a dictionary. fn captures_to_dict(cap: regex::Captures) -> Dict { let m = cap.get(0).expect("missing first match"); dict! { "start" => Value::Int(m.start() as i64), "end" => Value::Int(m.end() as i64), "text" => Value::Str(m.as_str().into()), "captures" => Value::Array( cap.iter() .skip(1) .map(|opt| opt.map_or(Value::None, |m| m.as_str().into())) .collect(), ), } } impl Deref for Str { type Target = str; fn deref(&self) -> &str { &self.0 } } impl Display for Str { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.pad(self) } } impl Debug for Str { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.write_char('"')?; for c in self.chars() { match c { '\0' => f.write_str("\\u{0}")?, '\'' => f.write_str("'")?, '"' => f.write_str(r#"\""#)?, _ => Display::fmt(&c.escape_debug(), f)?, } } f.write_char('"') } } impl Add for Str { type Output = Self; fn add(mut self, rhs: Self) -> Self::Output { self += rhs; self } } impl AddAssign for Str { fn add_assign(&mut self, rhs: Self) { self.0.push_str(rhs.as_str()); } } impl AsRef for Str { fn as_ref(&self) -> &str { self } } impl Borrow for Str { fn borrow(&self) -> &str { self } } impl From for Str { fn from(c: char) -> Self { Self(c.into()) } } impl From<&str> for Str { fn from(s: &str) -> Self { Self(s.into()) } } impl From for Str { fn from(s: EcoString) -> Self { Self(s) } } impl From for Str { fn from(s: String) -> Self { Self(s.into()) } } impl From> for Str { fn from(s: Cow) -> Self { Self(s.into()) } } impl FromIterator for Str { fn from_iter>(iter: T) -> Self { Self(iter.into_iter().collect()) } } impl From for EcoString { fn from(str: Str) -> Self { str.0 } } impl From for String { fn from(s: Str) -> Self { s.0.into() } } /// A regular expression. #[derive(Clone)] pub struct Regex(regex::Regex); impl Regex { /// Create a new regular expression. pub fn new(re: &str) -> StrResult { regex::Regex::new(re).map(Self).map_err(|err| eco_format!("{err}")) } } impl Deref for Regex { type Target = regex::Regex; fn deref(&self) -> &Self::Target { &self.0 } } impl Debug for Regex { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "regex({:?})", self.0.as_str()) } } impl PartialEq for Regex { fn eq(&self, other: &Self) -> bool { self.0.as_str() == other.0.as_str() } } impl Hash for Regex { fn hash(&self, state: &mut H) { self.0.as_str().hash(state); } } /// A pattern which can be searched for in a string. #[derive(Debug, Clone)] pub enum StrPattern { /// Just a string. Str(Str), /// A regular expression. Regex(Regex), } castable! { StrPattern, text: Str => Self::Str(text), regex: Regex => Self::Regex(regex), } /// A side of a string. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] pub enum StrSide { /// The logical start of the string, may be left or right depending on the /// language. Start, /// The logical end of the string. End, } castable! { StrSide, align: GenAlign => match align { GenAlign::Start => Self::Start, GenAlign::End => Self::End, _ => Err("expected either `start` or `end`")?, }, }