From 585f6564874d16a8f81a6c29e73091a008ccd484 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 17 Feb 2023 10:20:52 +0100 Subject: [PATCH] Check char boundaries in string methods --- src/model/str.rs | 55 +++++++++++++++++++---------------- tests/typ/compiler/string.typ | 12 ++++++-- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/src/model/str.rs b/src/model/str.rs index 6bfbcebdd..ae0ef8994 100644 --- a/src/model/str.rs +++ b/src/model/str.rs @@ -68,38 +68,20 @@ impl Str { /// Extract the grapheme cluster at the given index. pub fn at(&self, index: i64) -> StrResult { let len = self.len(); - let grapheme = self - .locate(index) - .filter(|&index| index <= self.0.len()) - .and_then(|index| self.0[index..].graphemes(true).next()) + let grapheme = self.0[self.locate(index)?..] + .graphemes(true) + .next() .ok_or_else(|| out_of_bounds(index, len))?; Ok(grapheme.into()) } /// Extract a contigous substring. pub fn slice(&self, start: i64, end: Option) -> StrResult { - let len = self.len(); - let start = self - .locate(start) - .filter(|&start| start <= self.0.len()) - .ok_or_else(|| out_of_bounds(start, len))?; - - let end = end.unwrap_or(self.len()); - let end = self - .locate(end) - .filter(|&end| end <= self.0.len()) - .ok_or_else(|| out_of_bounds(end, len))? - .max(start); - + let start = self.locate(start)?; + let end = self.locate(end.unwrap_or(self.len()))?.max(start); Ok(self.0[start..end].into()) } - /// Resolve an index. - fn locate(&self, index: i64) -> Option { - usize::try_from(if index >= 0 { index } else { self.len().checked_add(index)? }) - .ok() - } - /// Whether the given pattern exists in this string. pub fn contains(&self, pattern: StrPattern) -> bool { match pattern { @@ -286,12 +268,35 @@ impl Str { Ok(Self(self.0.repeat(n))) } + + /// Resolve an index. + fn locate(&self, index: i64) -> StrResult { + let wrapped = + if index >= 0 { Some(index) } else { self.len().checked_add(index) }; + + let resolved = wrapped + .and_then(|v| usize::try_from(v).ok()) + .filter(|&v| v <= self.0.len()) + .ok_or_else(|| out_of_bounds(index, self.len()))?; + + if !self.0.is_char_boundary(resolved) { + return Err(not_a_char_boundary(index)); + } + + Ok(resolved) + } } /// The out of bounds access error message. #[cold] -fn out_of_bounds(index: i64, len: i64) -> String { - format!("string index out of bounds (index: {}, len: {})", index, len) +fn out_of_bounds(index: i64, len: i64) -> EcoString { + format_eco!("string index out of bounds (index: {}, len: {})", index, len) +} + +/// The char boundary access error message. +#[cold] +fn not_a_char_boundary(index: i64) -> EcoString { + format_eco!("string index {} is not a character boundary", index) } /// The error message when the string is empty. diff --git a/tests/typ/compiler/string.typ b/tests/typ/compiler/string.typ index d96213b63..017e1cdd9 100644 --- a/tests/typ/compiler/string.typ +++ b/tests/typ/compiler/string.typ @@ -27,8 +27,12 @@ #test("Hey: πŸ³οΈβ€πŸŒˆ there!".at(5), "πŸ³οΈβ€πŸŒˆ") --- -// Error: 4-17 string index out of bounds (index: 5, len: 5) -#{ "Hello".at(5) } +// Error: 2-14 string index 2 is not a character boundary +#"πŸ³οΈβ€πŸŒˆ".at(2) + +--- +// Error: 2-15 string index out of bounds (index: 5, len: 5) +#"Hello".at(5) --- // Test the `slice` method. @@ -37,6 +41,10 @@ #test("abc🏑def".slice(2, -2), "c🏑d") #test("abc🏑def".slice(-3, -1), "de") +--- +// Error: 2-21 string index -1 is not a character boundary +#"πŸ³οΈβ€πŸŒˆ".slice(0, -1) + --- // Test the `contains` method. #test("abc".contains("b"), true)