feat: use LoadError to avoid polluting memoization with callsite spans and file ids

This commit is contained in:
Tobias Schmitz 2025-05-20 14:22:51 +02:00
parent 7933ccb961
commit 1c4eea8353
No known key found for this signature in database
12 changed files with 157 additions and 125 deletions

1
Cargo.lock generated
View File

@ -3039,6 +3039,7 @@ dependencies = [
"icu_provider_blob",
"icu_segmenter",
"kurbo",
"memchr",
"rustybuzz",
"smallvec",
"ttf-parser",

View File

@ -30,6 +30,7 @@ icu_provider_adapters = { workspace = true }
icu_provider_blob = { workspace = true }
icu_segmenter = { workspace = true }
kurbo = { workspace = true }
memchr = { workspace = true }
rustybuzz = { workspace = true }
smallvec = { workspace = true }
ttf-parser = { workspace = true }

View File

@ -1,6 +1,6 @@
use std::ffi::OsStr;
use typst_library::diag::{warning, At, SourceResult, StrResult};
use typst_library::diag::{warning, At, LoadedAt, SourceResult, StrResult};
use typst_library::engine::Engine;
use typst_library::foundations::{Bytes, Derived, Packed, Smart, StyleChain};
use typst_library::introspection::Locator;
@ -30,14 +30,14 @@ pub fn layout_image(
let Derived { source, derived: data } = &elem.source;
let format = match elem.format(styles) {
Smart::Custom(v) => v,
Smart::Auto => determine_format(source, data).at(span)?,
Smart::Auto => determine_format(source, &data.bytes).at(span)?,
};
// Warn the user if the image contains a foreign object. Not perfect
// because the svg could also be encoded, but that's an edge case.
if format == ImageFormat::Vector(VectorFormat::Svg) {
let has_foreign_object =
data.as_str().is_ok_and(|s| s.contains("<foreignObject"));
memchr::memmem::find(&data.bytes, b"<foreignObject").is_some();
if has_foreign_object {
engine.sink.warn(warning!(
@ -53,7 +53,7 @@ pub fn layout_image(
let kind = match format {
ImageFormat::Raster(format) => ImageKind::Raster(
RasterImage::new(
data.clone(),
data.bytes.clone(),
format,
elem.icc(styles).as_ref().map(|icc| icc.derived.clone()),
)
@ -61,11 +61,11 @@ pub fn layout_image(
),
ImageFormat::Vector(VectorFormat::Svg) => ImageKind::Svg(
SvgImage::with_fonts(
data.clone(),
data.bytes.clone(),
engine.world,
&families(styles).map(|f| f.as_str()).collect::<Vec<_>>(),
)
.at(span)?,
.in_text(&data)?,
),
};

View File

@ -570,8 +570,54 @@ impl From<PackageError> for EcoString {
}
}
impl Loaded {
pub type LoadResult<T> = Result<T, LoadError>;
/// A callsite independent error that occurred during data loading.
/// Can be turned into a [`SourceDiagnostic`] using the [`LoadedAt::in_text`]
/// or [`LoadedAt::in_invalid_text`] methods available on [`LoadResult`].
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct LoadError {
pub pos: ReportPos,
pub message: EcoString,
pub error: EcoString,
}
impl LoadError {
pub fn new(
pos: impl Into<ReportPos>,
message: impl std::fmt::Display,
error: impl std::fmt::Display,
) -> Self {
Self {
pos: pos.into(),
message: eco_format!("{message}"),
error: eco_format!("{error}"),
}
}
}
/// Convert a [`LoadResult`] to a [`SourceResult`] by adding the [`Loaded`] context.
pub trait LoadedAt<T> {
/// Add the span information.
fn in_text(self, data: &Loaded) -> SourceResult<T>;
/// Add the span information.
fn in_invalid_text(self, data: &Loaded) -> SourceResult<T>;
}
impl<T> LoadedAt<T> for Result<T, LoadError> {
/// Report an error, possibly in an external file.
fn in_text(self, data: &Loaded) -> SourceResult<T> {
self.map_err(|err| data.err_in_text(err.pos, err.message, err.error))
}
/// Report an error in invalid text.
fn in_invalid_text(self, data: &Loaded) -> SourceResult<T> {
self.map_err(|err| data.err_in_invalid_text(err.pos, err.message, err.error))
}
}
impl Loaded {
pub fn err_in_text(
&self,
pos: impl Into<ReportPos>,
@ -632,7 +678,7 @@ impl Loaded {
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub enum ReportPos {
/// Contains the range, and the 0-based line/column.
Full(std::ops::Range<usize>, LineCol),
@ -694,7 +740,7 @@ impl ReportPos {
}
/// A line/column pair.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct LineCol {
/// The 0-based line.
line: usize,
@ -732,38 +778,34 @@ impl LineCol {
/// Returns the 0-based line/column indices.
pub fn indices(&self) -> (usize, usize) {
(self.line, self.col)
(self.line as usize, self.col as usize)
}
/// Returns the 1-based line/column numbers.
pub fn numbers(&self) -> (usize, usize) {
(self.line + 1, self.col + 1)
(self.line as usize + 1, self.col as usize + 1)
}
}
/// Format a user-facing error message for an XML-like file format.
pub fn format_xml_like_error(
format: &str,
data: &Loaded,
error: roxmltree::Error,
) -> EcoVec<SourceDiagnostic> {
pub fn format_xml_like_error(format: &str, error: roxmltree::Error) -> LoadError {
let pos = LineCol::one_based(error.pos().row as usize, error.pos().col as usize);
let msg = format!("failed to parse {format}");
let msg = eco_format!("failed to parse {format}");
let err = match error {
roxmltree::Error::UnexpectedCloseTag(expected, actual, _) => {
format!("found closing tag '{actual}' instead of '{expected}'")
eco_format!("found closing tag '{actual}' instead of '{expected}'")
}
roxmltree::Error::UnknownEntityReference(entity, _) => {
format!("unknown entity '{entity}'")
eco_format!("unknown entity '{entity}'")
}
roxmltree::Error::DuplicatedAttribute(attr, _) => {
format!("duplicate attribute '{attr}'")
eco_format!("duplicate attribute '{attr}'")
}
roxmltree::Error::NoRootNode => {
format!("missing root node")
eco_format!("missing root node")
}
err => err.to_string(),
err => eco_format!("{err}"),
};
data.err_in_text(pos, msg, err)
LoadError::new(pos, msg, err)
}

View File

@ -17,7 +17,7 @@ mod yaml_;
use comemo::Tracked;
use ecow::EcoString;
use typst_syntax::{FileId, Span, Spanned};
use typst_syntax::{FileId, Spanned};
pub use self::cbor_::*;
pub use self::csv_::*;
@ -27,7 +27,7 @@ pub use self::toml_::*;
pub use self::xml_::*;
pub use self::yaml_::*;
use crate::diag::{At, FileError, SourceResult};
use crate::diag::{At, LoadError, LoadResult, LoadedAt, SourceResult};
use crate::foundations::OneOrMultiple;
use crate::foundations::{cast, Bytes, Scope, Str};
use crate::World;
@ -121,46 +121,47 @@ impl Load for Spanned<&OneOrMultiple<DataSource>> {
}
/// Data loaded from a [`DataSource`].
#[derive(Clone, Hash)]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Loaded {
pub source: Spanned<LoadSource>,
pub bytes: Bytes,
}
impl Loaded {
/// FIXME: remove this?
pub fn dummy() -> Self {
Loaded::new(
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
Bytes::new([]),
)
}
pub fn new(source: Spanned<LoadSource>, bytes: Bytes) -> Self {
Self { source, bytes }
}
pub fn as_str(&self) -> SourceResult<&str> {
self.bytes.as_str().map_err(|err| {
let start = err.valid_up_to();
let end = start + err.error_len().unwrap_or(0);
// always report this error in the source file.
self.err_in_invalid_text(
start..end,
"failed to convert to string",
FileError::from(err),
)
})
pub fn load_str(&self) -> SourceResult<&str> {
self.bytes.load_str().in_invalid_text(self)
}
}
/// A loaded [`DataSource`].
#[derive(Clone, Copy, Hash)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum LoadSource {
Path(FileId),
Bytes,
}
pub trait LoadStr {
fn load_str(&self) -> LoadResult<&str>;
}
impl<T: AsRef<[u8]>> LoadStr for T {
fn load_str(&self) -> LoadResult<&str> {
std::str::from_utf8(self.as_ref()).map_err(|err| {
let start = err.valid_up_to();
let end = start + err.error_len().unwrap_or(0);
LoadError::new(
start..end,
"failed to convert to string",
"file is not valid utf-8",
)
})
}
}
/// A value that can be read from a file.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum Readable {

View File

@ -38,7 +38,7 @@ pub fn read(
let data = path.map(DataSource::Path).load(engine.world)?;
Ok(match encoding {
None => Readable::Bytes(data.bytes),
Some(Encoding::Utf8) => Readable::Str(data.as_str()?.into()),
Some(Encoding::Utf8) => Readable::Str(data.load_str()?.into()),
})
}

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoVec};
use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult};
use crate::diag::{At, LoadError, LoadedAt, ReportPos, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Loaded, Readable};
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a TOML file.
///
@ -33,8 +33,8 @@ pub fn toml(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
let raw = data.as_str()?;
::toml::from_str(raw).map_err(|err| format_toml_error(&data, err))
let raw = data.load_str()?;
::toml::from_str(raw).map_err(format_toml_error).in_text(&data)
}
#[scope]
@ -69,10 +69,7 @@ impl toml {
}
/// Format the user-facing TOML error message.
fn format_toml_error(
data: &Loaded,
error: ::toml::de::Error,
) -> EcoVec<SourceDiagnostic> {
let pos = error.span().map(ReportPos::Range).unwrap_or_default();
data.err_in_text(pos, "failed to parse TOML", error.message())
fn format_toml_error(error: ::toml::de::Error) -> LoadError {
let pos = error.span().map(ReportPos::from).unwrap_or_default();
LoadError::new(pos, "failed to parse TOML", error.message())
}

View File

@ -1,11 +1,10 @@
use ecow::EcoVec;
use roxmltree::ParsingOptions;
use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
use crate::diag::{format_xml_like_error, LoadError, LoadedAt, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
use crate::loading::{DataSource, Load, Loaded, Readable};
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from an XML file.
///
@ -62,12 +61,13 @@ pub fn xml(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
let text = data.as_str()?;
let text = data.load_str()?;
let document = roxmltree::Document::parse_with_options(
text,
ParsingOptions { allow_dtd: true, ..Default::default() },
)
.map_err(|err| format_xml_error(&data, err))?;
.map_err(format_xml_error)
.in_text(&data)?;
Ok(convert_xml(document.root()))
}
@ -110,6 +110,6 @@ fn convert_xml(node: roxmltree::Node) -> Value {
}
/// Format the user-facing XML error message.
fn format_xml_error(data: &Loaded, error: roxmltree::Error) -> EcoVec<SourceDiagnostic> {
format_xml_like_error("XML", data, error)
fn format_xml_error(error: roxmltree::Error) -> LoadError {
format_xml_like_error("XML", error)
}

View File

@ -20,8 +20,8 @@ use typst_syntax::{Span, Spanned};
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
use crate::diag::{
bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult,
StrResult,
bail, error, At, HintedStrResult, LoadError, LoadResult, LoadedAt, ReportPos,
SourceDiagnostic, SourceResult, StrResult,
};
use crate::engine::{Engine, Sink};
use crate::foundations::{
@ -34,7 +34,7 @@ use crate::layout::{
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
Sides, Sizing, TrackSizings,
};
use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded};
use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, LoadStr, Loaded};
use crate::model::{
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
Url,
@ -356,7 +356,7 @@ impl Debug for Bibliography {
/// Decode on library from one data source.
fn decode_library(data: &Loaded) -> SourceResult<Library> {
let str = data.as_str()?;
let str = data.load_str()?;
if let LoadSource::Path(file_id) = data.source.v {
// If we got a path, use the extension to determine whether it is
@ -451,7 +451,7 @@ impl CslStyle {
CslSource::Named(style) => Self::from_archived(*style),
CslSource::Normal(source) => {
let data = Spanned::new(source, span).load(world)?;
Self::from_data(&data)?
Self::from_data(&data.bytes).in_text(&data)?
}
};
Ok(Derived::new(source, style))
@ -472,17 +472,17 @@ impl CslStyle {
/// Load a CSL style from file contents.
#[comemo::memoize]
pub fn from_data(data: &Loaded) -> SourceResult<CslStyle> {
let text = data.as_str()?;
pub fn from_data(bytes: &Bytes) -> LoadResult<CslStyle> {
let text = bytes.load_str()?;
citationberg::IndependentStyle::from_xml(text)
.map(|style| {
Self(Arc::new(ManuallyHash::new(
style,
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
typst_utils::hash128(&(TypeId::of::<Bytes>(), bytes)),
)))
})
.map_err(|err| {
data.err_in_text(ReportPos::None, "failed to load CSL style", err)
LoadError::new(ReportPos::None, "failed to load CSL style", err)
})
}

View File

@ -11,15 +11,15 @@ use typst_utils::ManuallyHash;
use unicode_segmentation::UnicodeSegmentation;
use super::Lang;
use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult};
use crate::diag::{LineCol, LoadError, LoadResult, LoadedAt, ReportPos, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed,
PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
};
use crate::html::{tag, HtmlElem};
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
use crate::loading::{DataSource, Load, Loaded};
use crate::loading::{DataSource, Load, LoadStr};
use crate::model::{Figurable, ParElem};
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
use crate::visualize::Color;
@ -540,25 +540,28 @@ impl RawSyntax {
sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
let data = sources.load(world)?;
let list = data.iter().map(Self::decode).collect::<SourceResult<_>>()?;
let list = data
.iter()
.map(|data| Self::decode(&data.bytes).in_text(data))
.collect::<SourceResult<_>>()?;
Ok(Derived::new(sources.v, list))
}
/// Decode a syntax from a loaded source.
#[comemo::memoize]
#[typst_macros::time(name = "load syntaxes")]
fn decode(data: &Loaded) -> SourceResult<RawSyntax> {
let str = data.as_str()?;
fn decode(bytes: &Bytes) -> LoadResult<RawSyntax> {
let str = bytes.load_str()?;
let syntax = SyntaxDefinition::load_from_str(str, false, None)
.map_err(|err| format_syntax_error(data, err))?;
.map_err(format_syntax_error)?;
let mut builder = SyntaxSetBuilder::new();
builder.add(syntax);
Ok(RawSyntax(Arc::new(ManuallyHash::new(
builder.build(),
typst_utils::hash128(data),
typst_utils::hash128(bytes),
))))
}
@ -568,12 +571,9 @@ impl RawSyntax {
}
}
fn format_syntax_error(
data: &Loaded,
error: ParseSyntaxError,
) -> EcoVec<SourceDiagnostic> {
fn format_syntax_error(error: ParseSyntaxError) -> LoadError {
let pos = syntax_error_pos(&error);
data.err_in_text(pos, "failed to parse syntax", error)
LoadError::new(pos, "failed to parse syntax", error)
}
fn syntax_error_pos(error: &ParseSyntaxError) -> ReportPos {
@ -600,17 +600,17 @@ impl RawTheme {
source: Spanned<DataSource>,
) -> SourceResult<Derived<DataSource, Self>> {
let data = source.load(world)?;
let theme = Self::decode(&data)?;
let theme = Self::decode(&data.bytes).in_text(&data)?;
Ok(Derived::new(source.v, theme))
}
/// Decode a theme from bytes.
#[comemo::memoize]
fn decode(data: &Loaded) -> SourceResult<RawTheme> {
let mut cursor = std::io::Cursor::new(data.bytes.as_slice());
let theme = synt::ThemeSet::load_from_reader(&mut cursor)
.map_err(|err| format_theme_error(data, err))?;
Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data)))))
fn decode(bytes: &Bytes) -> LoadResult<RawTheme> {
let mut cursor = std::io::Cursor::new(bytes.as_slice());
let theme =
synt::ThemeSet::load_from_reader(&mut cursor).map_err(format_theme_error)?;
Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(bytes)))))
}
/// Get the underlying syntect theme.
@ -619,15 +619,12 @@ impl RawTheme {
}
}
fn format_theme_error(
data: &Loaded,
error: syntect::LoadingError,
) -> EcoVec<SourceDiagnostic> {
fn format_theme_error(error: syntect::LoadingError) -> LoadError {
let pos = match &error {
syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
_ => ReportPos::None,
};
data.err_in_text(pos, "failed to parse theme", error)
LoadError::new(pos, "failed to parse theme", error)
}
/// A highlighted line of raw text.

View File

@ -22,7 +22,7 @@ use crate::foundations::{
Smart, StyleChain,
};
use crate::layout::{BlockElem, Length, Rel, Sizing};
use crate::loading::{DataSource, Load, Readable};
use crate::loading::{DataSource, Load, LoadSource, Loaded, Readable};
use crate::model::Figurable;
use crate::text::LocalName;
@ -66,9 +66,9 @@ pub struct ImageElem {
#[parse(
let source = args.expect::<Spanned<DataSource>>("source")?;
let data = source.load(engine.world)?;
Derived::new(source.v, data.bytes)
Derived::new(source.v, data)
)]
pub source: Derived<DataSource, Bytes>,
pub source: Derived<DataSource, Loaded>,
/// The image's format.
///
@ -173,7 +173,7 @@ impl ImageElem {
pub fn decode(
span: Span,
/// The data to decode as an image. Can be a string for SVGs.
data: Readable,
data: Spanned<Readable>,
/// The image's format. Detected automatically by default.
#[named]
format: Option<Smart<ImageFormat>>,
@ -193,8 +193,9 @@ impl ImageElem {
#[named]
scaling: Option<Smart<ImageScaling>>,
) -> StrResult<Content> {
let bytes = data.into_bytes();
let source = Derived::new(DataSource::Bytes(bytes.clone()), bytes);
let bytes = data.v.into_bytes();
let data = Loaded::new(Spanned::new(LoadSource::Bytes, data.span), bytes.clone());
let source = Derived::new(DataSource::Bytes(bytes), data);
let mut elem = ImageElem::new(source);
if let Some(format) = format {
elem.push_format(format);

View File

@ -3,13 +3,11 @@ use std::hash::{Hash, Hasher};
use std::sync::{Arc, Mutex};
use comemo::Tracked;
use ecow::EcoString;
use siphasher::sip128::{Hasher128, SipHasher13};
use crate::diag::{format_xml_like_error, StrResult};
use crate::diag::{format_xml_like_error, LoadError, LoadResult, ReportPos};
use crate::foundations::Bytes;
use crate::layout::Axes;
use crate::loading::Loaded;
use crate::text::{
Font, FontBook, FontFlags, FontStretch, FontStyle, FontVariant, FontWeight,
};
@ -31,7 +29,7 @@ impl SvgImage {
/// Decode an SVG image without fonts.
#[comemo::memoize]
#[typst_macros::time(name = "load svg")]
pub fn new(data: Bytes) -> StrResult<SvgImage> {
pub fn new(data: Bytes) -> LoadResult<SvgImage> {
let tree =
usvg::Tree::from_data(&data, &base_options()).map_err(format_usvg_error)?;
Ok(Self(Arc::new(Repr { data, size: tree_size(&tree), font_hash: 0, tree })))
@ -44,7 +42,7 @@ impl SvgImage {
data: Bytes,
world: Tracked<dyn World + '_>,
families: &[&str],
) -> StrResult<SvgImage> {
) -> LoadResult<SvgImage> {
let book = world.book();
let resolver = Mutex::new(FontResolver::new(world, book, families));
let tree = usvg::Tree::from_data(
@ -126,21 +124,15 @@ fn tree_size(tree: &usvg::Tree) -> Axes<f64> {
}
/// Format the user-facing SVG decoding error message.
fn format_usvg_error(error: usvg::Error) -> EcoString {
match error {
usvg::Error::NotAnUtf8Str => "file is not valid utf-8".into(),
usvg::Error::MalformedGZip => "file is not compressed correctly".into(),
usvg::Error::ElementsLimitReached => "file is too large".into(),
usvg::Error::InvalidSize => {
"failed to parse SVG (width, height, or viewbox is invalid)".into()
}
usvg::Error::ParsingFailed(error) => {
format_xml_like_error("SVG", &Loaded::dummy(), error)
.pop()
.expect("at least one error")
.message
}
}
fn format_usvg_error(error: usvg::Error) -> LoadError {
let error = match error {
usvg::Error::NotAnUtf8Str => "file is not valid utf-8",
usvg::Error::MalformedGZip => "file is not compressed correctly",
usvg::Error::ElementsLimitReached => "file is too large",
usvg::Error::InvalidSize => "width, height, or viewbox is invalid",
usvg::Error::ParsingFailed(error) => return format_xml_like_error("SVG", error),
};
LoadError::new(ReportPos::None, "failed to parse SVG", error)
}
/// Provides Typst's fonts to usvg.