diff --git a/crates/typst-cli/src/world.rs b/crates/typst-cli/src/world.rs index 09ae1b428..f63d34b63 100644 --- a/crates/typst-cli/src/world.rs +++ b/crates/typst-cli/src/world.rs @@ -190,7 +190,7 @@ impl SystemWorld { source.lines() } else if let Some(bytes) = slot.file.get() { let bytes = bytes.as_ref().expect("file is not valid"); - Lines::from_bytes(bytes).expect("file is not valid utf-8") + Lines::try_from(bytes).expect("file is not valid utf-8") } else { panic!("file id does not point to any source file"); } diff --git a/crates/typst-library/src/diag.rs b/crates/typst-library/src/diag.rs index 90caabcc3..b84b80679 100644 --- a/crates/typst-library/src/diag.rs +++ b/crates/typst-library/src/diag.rs @@ -599,6 +599,18 @@ impl LoadError { } } +impl From for LoadError { + fn from(err: Utf8Error) -> Self { + let start = err.valid_up_to(); + let end = start + err.error_len().unwrap_or(0); + LoadError::new( + start..end, + "failed to convert to string", + "file is not valid utf-8", + ) + } +} + /// Convert a [`LoadResult`] to a [`SourceResult`] by adding the [`Loaded`] context. pub trait LoadedWithin { /// Report an error, possibly in an external file. @@ -622,7 +634,7 @@ fn load_err_in_text( // This also does utf-8 validation. Only report an error in an external // file if it is human readable (valid utf-8), otherwise fall back to // `load_err_in_invalid_text`. - let lines = Lines::from_bytes(&loaded.data); + let lines = Lines::try_from(&loaded.data); match (loaded.source.v, lines) { (LoadSource::Path(file_id), Ok(lines)) => { if let Some(range) = pos.range(&lines) { @@ -784,6 +796,7 @@ impl LineCol { pub fn try_from_byte_pos(pos: usize, bytes: &[u8]) -> Option { let bytes = &bytes[..pos]; let mut line = 0; + #[allow(clippy::double_ended_iterator_last)] let line_start = memchr::memchr_iter(b'\n', bytes) .inspect(|_| line += 1) .last() diff --git a/crates/typst-library/src/foundations/bytes.rs b/crates/typst-library/src/foundations/bytes.rs index 6bbf6bb58..180dcdad5 100644 --- a/crates/typst-library/src/foundations/bytes.rs +++ b/crates/typst-library/src/foundations/bytes.rs @@ -7,9 +7,10 @@ use std::sync::Arc; use ecow::{eco_format, EcoString}; use serde::{Serialize, Serializer}; +use typst_syntax::Lines; use typst_utils::LazyHash; -use crate::diag::{bail, LoadError, LoadResult, StrResult}; +use crate::diag::{bail, StrResult}; use crate::foundations::{cast, func, scope, ty, Array, Reflect, Repr, Str, Value}; /// A sequence of bytes. @@ -112,21 +113,6 @@ impl Bytes { } } - pub fn load_str(&self) -> LoadResult<&str> { - match self.inner().as_any().downcast_ref::() { - Some(string) => Ok(string.as_str()), - None => self.as_str().map_err(|err| { - let start = err.valid_up_to(); - let end = start + err.error_len().unwrap_or(0); - LoadError::new( - start..end, - "failed to convert to string", - "file is not valid utf-8", - ) - }), - } - } - /// Resolve an index or throw an out of bounds error. fn locate(&self, index: i64) -> StrResult { self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len())) @@ -301,6 +287,16 @@ impl Serialize for Bytes { } } +impl TryFrom<&Bytes> for Lines { + type Error = Utf8Error; + + #[comemo::memoize] + fn try_from(value: &Bytes) -> Result, Utf8Error> { + let text = value.as_str()?; + Ok(Lines::new(text.to_string())) + } +} + /// Any type that can back a byte buffer. trait Bytelike: Send + Sync { fn as_bytes(&self) -> &[u8]; diff --git a/crates/typst-library/src/loading/mod.rs b/crates/typst-library/src/loading/mod.rs index ec307d946..c1cf8aef7 100644 --- a/crates/typst-library/src/loading/mod.rs +++ b/crates/typst-library/src/loading/mod.rs @@ -133,7 +133,7 @@ impl Loaded { } pub fn load_str(&self) -> SourceResult<&str> { - self.data.load_str().within(self) + self.data.as_str().map_err(Into::into).within(self) } } diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index 912ce07e6..db76d33d7 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -413,9 +413,9 @@ fn decode_library(loaded: &Loaded) -> SourceResult { match bib_errs { Some(bib_errs) if biblatex >= yaml => { - Err(format_biblatex_error(bib_errs)).within(&loaded) + Err(format_biblatex_error(bib_errs)).within(loaded) } - _ => Err(format_yaml_error(haya_err)).within(&loaded), + _ => Err(format_yaml_error(haya_err)).within(loaded), } } } @@ -472,7 +472,7 @@ impl CslStyle { /// Load a CSL style from file contents. #[comemo::memoize] pub fn from_data(bytes: &Bytes) -> LoadResult { - let text = bytes.load_str()?; + let text = bytes.as_str()?; citationberg::IndependentStyle::from_xml(text) .map(|style| { Self(Arc::new(ManuallyHash::new( diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs index fbfd9594b..f2485e16b 100644 --- a/crates/typst-library/src/text/raw.rs +++ b/crates/typst-library/src/text/raw.rs @@ -553,7 +553,7 @@ impl RawSyntax { #[comemo::memoize] #[typst_macros::time(name = "load syntaxes")] fn decode(bytes: &Bytes) -> LoadResult { - let str = bytes.load_str()?; + let str = bytes.as_str()?; let syntax = SyntaxDefinition::load_from_str(str, false, None) .map_err(format_syntax_error)?; diff --git a/crates/typst-syntax/src/lines.rs b/crates/typst-syntax/src/lines.rs index 2496258b2..fa1e77563 100644 --- a/crates/typst-syntax/src/lines.rs +++ b/crates/typst-syntax/src/lines.rs @@ -1,7 +1,6 @@ use std::hash::{Hash, Hasher}; use std::iter::zip; use std::ops::Range; -use std::str::Utf8Error; use std::sync::Arc; use crate::is_newline; @@ -11,9 +10,9 @@ use crate::is_newline; pub struct Lines(Arc>); #[derive(Clone)] -struct Repr { +struct Repr { lines: Vec, - text: S, + text: T, } /// Metadata about a line. @@ -25,12 +24,14 @@ pub struct Line { utf16_idx: usize, } -impl> Lines { - pub fn new(text: S) -> Self { +impl> Lines { + /// Create from the text buffer and compute the line metadata. + pub fn new(text: T) -> Self { let lines = lines(text.as_ref()); Lines(Arc::new(Repr { lines, text })) } + /// The text as a string slice. pub fn text(&self) -> &str { self.0.text.as_ref() } @@ -142,13 +143,6 @@ impl> Lines { } impl Lines { - /// Tries to convert the bytes - #[comemo::memoize] - pub fn from_bytes(bytes: &[u8]) -> Result, Utf8Error> { - let text = std::str::from_utf8(bytes)?; - Ok(Lines::new(text.to_string())) - } - /// Fully replace the source text. /// /// This performs a naive (suffix/prefix-based) diff of the old and new text diff --git a/tests/src/collect.rs b/tests/src/collect.rs index 97f168b34..456db028d 100644 --- a/tests/src/collect.rs +++ b/tests/src/collect.rs @@ -397,6 +397,8 @@ impl<'a> Parser<'a> { /// if the range is empty. #[cfg(feature = "default")] fn parse_range_external(&mut self, file: FileId) -> Option> { + use typst::foundations::Bytes; + use crate::world::{read, system_path}; let path = match system_path(file) { @@ -407,8 +409,8 @@ impl<'a> Parser<'a> { } }; - let text = match read(&path) { - Ok(text) => text, + let bytes = match read(&path) { + Ok(data) => Bytes::new(data), Err(err) => { self.error(err.to_string()); return None; @@ -416,7 +418,7 @@ impl<'a> Parser<'a> { }; let start = self.parse_line_col()?; - let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)"); + let lines = Lines::try_from(&bytes).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)"); let range = if self.s.eat_if('-') { let (line, col) = start; let start = lines.line_column_to_byte(line, col); diff --git a/tests/src/world.rs b/tests/src/world.rs index c37d21dee..bc3e690b2 100644 --- a/tests/src/world.rs +++ b/tests/src/world.rs @@ -95,7 +95,7 @@ impl TestWorld { source.lines() } else if let Some(bytes) = slot.file.get() { let bytes = bytes.as_ref().expect("file is not valid"); - Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8") + Lines::try_from(bytes).expect("file is not valid utf-8") } else { panic!("file id does not point to any source file"); }