From 7e6c3b41595a33e47d1c671ecb125203d35aa7b6 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 16 May 2025 12:12:09 +0200 Subject: [PATCH] feat: show external error messages in csv, json, toml, xml, yaml, and syntax/theme files --- crates/typst-library/src/diag.rs | 41 ++- .../typst-library/src/foundations/plugin.rs | 2 +- crates/typst-library/src/loading/cbor.rs | 2 +- crates/typst-library/src/loading/csv.rs | 39 +-- crates/typst-library/src/loading/json.rs | 9 +- crates/typst-library/src/loading/mod.rs | 247 +++++++++++++++++- crates/typst-library/src/loading/read.rs | 15 +- crates/typst-library/src/loading/toml.rs | 28 +- crates/typst-library/src/loading/xml.rs | 15 +- crates/typst-library/src/loading/yaml.rs | 26 +- .../typst-library/src/model/bibliography.rs | 124 +++------ crates/typst-library/src/text/raw.rs | 77 +++--- .../typst-library/src/visualize/image/mod.rs | 4 +- .../typst-library/src/visualize/image/svg.rs | 8 +- 14 files changed, 419 insertions(+), 218 deletions(-) diff --git a/crates/typst-library/src/diag.rs b/crates/typst-library/src/diag.rs index 49cbd02c6..75ae1b45d 100644 --- a/crates/typst-library/src/diag.rs +++ b/crates/typst-library/src/diag.rs @@ -12,6 +12,7 @@ use typst_syntax::package::{PackageSpec, PackageVersion}; use typst_syntax::{Span, Spanned, SyntaxError}; use crate::engine::Engine; +use crate::loading::{Data, LineCol}; use crate::{World, WorldExt}; /// Early-return with a [`StrResult`] or [`SourceResult`]. @@ -569,30 +570,28 @@ impl From for EcoString { } /// Format a user-facing error message for an XML-like file format. -pub fn format_xml_like_error(format: &str, error: roxmltree::Error) -> EcoString { - match error { - roxmltree::Error::UnexpectedCloseTag(expected, actual, pos) => { - eco_format!( - "failed to parse {format} (found closing tag '{actual}' \ - instead of '{expected}' in line {})", - pos.row - ) +pub fn format_xml_like_error( + format: &str, + data: &Data, + error: roxmltree::Error, +) -> EcoVec { + let pos = LineCol::one_based(error.pos().row as usize, error.pos().col as usize); + let msg = format!("failed to parse {format}"); + let err = match error { + roxmltree::Error::UnexpectedCloseTag(expected, actual, _) => { + format!("found closing tag '{actual}' instead of '{expected}'") } - roxmltree::Error::UnknownEntityReference(entity, pos) => { - eco_format!( - "failed to parse {format} (unknown entity '{entity}' in line {})", - pos.row - ) + roxmltree::Error::UnknownEntityReference(entity, _) => { + format!("unknown entity '{entity}'") } - roxmltree::Error::DuplicatedAttribute(attr, pos) => { - eco_format!( - "failed to parse {format} (duplicate attribute '{attr}' in line {})", - pos.row - ) + roxmltree::Error::DuplicatedAttribute(attr, _) => { + format!("duplicate attribute '{attr}'") } roxmltree::Error::NoRootNode => { - eco_format!("failed to parse {format} (missing root node)") + format!("missing root node") } - err => eco_format!("failed to parse {format} ({err})"), - } + err => err.to_string(), + }; + + data.err_at(pos, msg, err) } diff --git a/crates/typst-library/src/foundations/plugin.rs b/crates/typst-library/src/foundations/plugin.rs index 31f8cd732..e713fa688 100644 --- a/crates/typst-library/src/foundations/plugin.rs +++ b/crates/typst-library/src/foundations/plugin.rs @@ -152,7 +152,7 @@ pub fn plugin( source: Spanned, ) -> SourceResult { let data = source.load(engine.world)?; - Plugin::module(data).at(source.span) + Plugin::module(data.bytes).at(source.span) } #[scope] diff --git a/crates/typst-library/src/loading/cbor.rs b/crates/typst-library/src/loading/cbor.rs index aa14c5c77..20837a9d9 100644 --- a/crates/typst-library/src/loading/cbor.rs +++ b/crates/typst-library/src/loading/cbor.rs @@ -24,7 +24,7 @@ pub fn cbor( source: Spanned, ) -> SourceResult { let data = source.load(engine.world)?; - ciborium::from_reader(data.as_slice()) + ciborium::from_reader(data.bytes.as_slice()) .map_err(|err| eco_format!("failed to parse CBOR ({err})")) .at(source.span) } diff --git a/crates/typst-library/src/loading/csv.rs b/crates/typst-library/src/loading/csv.rs index 6afb5baeb..d2892fb63 100644 --- a/crates/typst-library/src/loading/csv.rs +++ b/crates/typst-library/src/loading/csv.rs @@ -1,10 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::EcoVec; use typst_syntax::Spanned; -use crate::diag::{bail, At, SourceResult}; +use crate::diag::{bail, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value}; -use crate::loading::{DataSource, Load, Readable}; +use crate::loading::{Data, DataSource, LineCol, Load, Readable, ReportPos}; /// Reads structured data from a CSV file. /// @@ -53,7 +53,7 @@ pub fn csv( // Counting lines from 1 by default. let mut line_offset: usize = 1; - let mut reader = builder.from_reader(data.as_slice()); + let mut reader = builder.from_reader(data.bytes.as_slice()); let mut headers: Option<::csv::StringRecord> = None; if has_headers { @@ -62,9 +62,8 @@ pub fn csv( headers = Some( reader .headers() - .map_err(|err| format_csv_error(err, 1)) - .at(source.span)? - .clone(), + .cloned() + .map_err(|err| format_csv_error(&data, err, 1))?, ); } @@ -74,7 +73,7 @@ pub fn csv( // incorrect with `has_headers` set to `false`. See issue: // https://github.com/BurntSushi/rust-csv/issues/184 let line = line + line_offset; - let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?; + let row = result.map_err(|err| format_csv_error(&data, err, line))?; let item = if let Some(headers) = &headers { let mut dict = Dict::new(); for (field, value) in headers.iter().zip(&row) { @@ -164,15 +163,25 @@ cast! { } /// Format the user-facing CSV error message. -fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString { +fn format_csv_error( + data: &Data, + err: ::csv::Error, + line: usize, +) -> EcoVec { + let msg = "failed to parse CSV"; + let pos = (err.kind().position()) + .map(|pos| { + let start = pos.byte() as usize; + ReportPos::Range(start..start) + }) + .unwrap_or(LineCol::one_based(line, 1).into()); match err.kind() { - ::csv::ErrorKind::Utf8 { .. } => "file is not valid utf-8".into(), + ::csv::ErrorKind::Utf8 { .. } => data.err_at(pos, msg, "file is not valid utf-8"), ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => { - eco_format!( - "failed to parse CSV (found {len} instead of \ - {expected_len} fields in line {line})" - ) + let err = + format!("found {len} instead of {expected_len} fields in line {line}"); + data.err_at(pos, msg, err) } - _ => eco_format!("failed to parse CSV ({err})"), + _ => data.err_at(pos, "failed to parse CSV", err), } } diff --git a/crates/typst-library/src/loading/json.rs b/crates/typst-library/src/loading/json.rs index aa908cca4..f9b1682da 100644 --- a/crates/typst-library/src/loading/json.rs +++ b/crates/typst-library/src/loading/json.rs @@ -4,7 +4,7 @@ use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::{DataSource, Load, Readable}; +use crate::loading::{DataSource, LineCol, Load, Readable}; /// Reads structured data from a JSON file. /// @@ -55,9 +55,10 @@ pub fn json( source: Spanned, ) -> SourceResult { let data = source.load(engine.world)?; - serde_json::from_slice(data.as_slice()) - .map_err(|err| eco_format!("failed to parse JSON ({err})")) - .at(source.span) + serde_json::from_slice(data.bytes.as_slice()).map_err(|err| { + let pos = LineCol::one_based(err.line(), err.column()); + data.err_at(pos, "failed to parse JSON", err) + }) } #[scope] diff --git a/crates/typst-library/src/loading/mod.rs b/crates/typst-library/src/loading/mod.rs index c57e02888..28e2fb45f 100644 --- a/crates/typst-library/src/loading/mod.rs +++ b/crates/typst-library/src/loading/mod.rs @@ -16,8 +16,8 @@ mod xml_; mod yaml_; use comemo::Tracked; -use ecow::EcoString; -use typst_syntax::Spanned; +use ecow::{eco_vec, EcoString, EcoVec}; +use typst_syntax::{FileId, Span, Spanned}; pub use self::cbor_::*; pub use self::csv_::*; @@ -27,7 +27,7 @@ pub use self::toml_::*; pub use self::xml_::*; pub use self::yaml_::*; -use crate::diag::{At, SourceResult}; +use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult}; use crate::foundations::OneOrMultiple; use crate::foundations::{cast, Bytes, Scope, Str}; use crate::World; @@ -74,39 +74,44 @@ pub trait Load { } impl Load for Spanned { - type Output = Bytes; + type Output = Data; - fn load(&self, world: Tracked) -> SourceResult { + fn load(&self, world: Tracked) -> SourceResult { self.as_ref().load(world) } } impl Load for Spanned<&DataSource> { - type Output = Bytes; + type Output = Data; - fn load(&self, world: Tracked) -> SourceResult { + fn load(&self, world: Tracked) -> SourceResult { match &self.v { DataSource::Path(path) => { let file_id = self.span.resolve_path(path).at(self.span)?; - world.file(file_id).at(self.span) + let bytes = world.file(file_id).at(self.span)?; + let source = Spanned::new(LoadSource::Path(file_id), self.span); + Ok(Data::new(source, bytes)) + } + DataSource::Bytes(bytes) => { + let source = Spanned::new(LoadSource::Bytes, self.span); + Ok(Data::new(source, bytes.clone())) } - DataSource::Bytes(bytes) => Ok(bytes.clone()), } } } impl Load for Spanned> { - type Output = Vec; + type Output = Vec; - fn load(&self, world: Tracked) -> SourceResult> { + fn load(&self, world: Tracked) -> SourceResult { self.as_ref().load(world) } } impl Load for Spanned<&OneOrMultiple> { - type Output = Vec; + type Output = Vec; - fn load(&self, world: Tracked) -> SourceResult> { + fn load(&self, world: Tracked) -> SourceResult { self.v .0 .iter() @@ -115,6 +120,222 @@ impl Load for Spanned<&OneOrMultiple> { } } +/// Data loaded from a [`DataSource`]. +#[derive(Clone, Hash)] +pub struct Data { + pub source: Spanned, + pub bytes: Bytes, +} + +impl Data { + pub fn dummy() -> Self { + Data::new( + typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()), + Bytes::new([]), + ) + } + + pub fn new(source: Spanned, bytes: Bytes) -> Self { + Self { source, bytes } + } + + pub fn as_str(&self) -> SourceResult<&str> { + self.bytes.as_str().map_err(|err| { + // TODO: should the error even be reported in the file if it's possibly binary? + let start = err.valid_up_to(); + let end = start + err.error_len().unwrap_or(0); + self.err_at(start..end, "failed to convert to string", FileError::from(err)) + }) + } + + /// Report an error, possibly in an external file. + pub fn err_at( + &self, + pos: impl Into, + msg: impl std::fmt::Display, + error: impl std::fmt::Display, + ) -> EcoVec { + let pos = pos.into(); + let error = match self.source.v { + LoadSource::Path(file_id) => { + if let Some(range) = pos.range(self.bytes.as_slice()) { + let span = Span::from_range(file_id, range); + return eco_vec!(error!(span, "{msg} ({error})")); + } + + // Either there was no range provided, or resolving the range + // from the line/column failed. If present report the possibly + // wrong line/column anyway. + let span = Span::from_range(file_id, 0..self.bytes.len()); + if let Some(pair) = pos.line_col(self.bytes.as_slice()) { + let (line, col) = pair.numbers(); + error!(span, "{msg} ({error} at {line}:{col})") + } else { + error!(span, "{msg} ({error})") + } + } + LoadSource::Bytes => { + if let Some(pair) = pos.line_col(self.bytes.as_slice()) { + let (line, col) = pair.numbers(); + error!(self.source.span, "{msg} ({error} at {line}:{col})") + } else { + error!(self.source.span, "{msg} ({error})") + } + } + }; + eco_vec![error] + } +} + +#[derive(Debug, Default)] +pub enum ReportPos { + /// Contains the range, and the 0-based line/column. + Full(std::ops::Range, LineCol), + /// Contains the range. + Range(std::ops::Range), + /// Contains the 0-based line/column. + LineCol(LineCol), + #[default] + None, +} + +impl From> for ReportPos { + fn from(value: std::ops::Range) -> Self { + Self::Range(value) + } +} + +impl From for ReportPos { + fn from(value: LineCol) -> Self { + Self::LineCol(value) + } +} + +impl ReportPos { + fn range(&self, bytes: &[u8]) -> Option> { + match self { + ReportPos::Full(range, _) => Some(range.clone()), + ReportPos::Range(range) => Some(range.clone()), + &ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i), + ReportPos::None => None, + } + } + + fn line_col(&self, bytes: &[u8]) -> Option { + match self { + &ReportPos::Full(_, pair) => Some(pair), + ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes), + &ReportPos::LineCol(pair) => Some(pair), + ReportPos::None => None, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct LineCol { + /// The 0-based line. + line: usize, + /// The 0-based column. + col: usize, +} + +impl LineCol { + /// Constructs the line/column pair from 0-based indices. + pub fn zero_based(line: usize, col: usize) -> Self { + Self { line, col } + } + + /// Constructs the line/column pair from 1-based numbers. + pub fn one_based(line: usize, col: usize) -> Self { + Self { + line: line.saturating_sub(1), + col: col.saturating_sub(1), + } + } + + // TODO: this function should only return None if the position is out of + // bounds not if there is invalid utf-8 + pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option { + let bytes = &bytes[..pos]; + let mut line = 0; + let line_start = memchr::memchr_iter(b'\n', bytes) + .inspect(|_| line += 1) + .last() + .map(|i| i + 1) + .unwrap_or(bytes.len()); + // TODO: streaming-utf8 decoding ignore invalid characters + // might neeed to update error reporting too (use utf8_iter) + let str = std::str::from_utf8(&bytes[line_start..]).ok()?; + let col = str.chars().count(); + Some(LineCol::zero_based(line, col)) + } + + pub fn byte_pos(&self, bytes: &[u8]) -> Option { + let line_offset = if let Some(idx) = self.line.checked_sub(1) { + memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)? + } else { + 0 + }; + + let col_offset = col_offset(line_offset, self.col, bytes)?; + let pos = line_offset + col_offset; + Some(pos) + } + + pub fn byte_range( + range: std::ops::Range, + bytes: &[u8], + ) -> Option> { + let mut line_iter = memchr::memchr_iter(b'\n', bytes); + let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) { + line_iter.nth(idx).map(|i| i + 1)? + } else { + 0 + }; + let line_delta = range.end.line - range.start.line; + let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) { + line_iter.nth(idx).map(|i| i + 1)? + } else { + start_line_offset + }; + + let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?; + let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?; + + let start = start_line_offset + start_col_offset; + let end = end_line_offset + end_col_offset; + Some(start..end) + } + + pub fn numbers(&self) -> (usize, usize) { + (self.line + 1, self.col + 1) + } +} + +// TODO: this function should only return None if the position is out of +// bounds not if there is invalid utf-8 +fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option { + let line = &bytes[line_offset..]; + // TODO: streaming-utf8 decoding ignore invalid characters + // might neeed to update error reporting too (use utf8_iter) + + // validate the whole line, so it can be displayed + let len = memchr::memchr(b'\n', line).unwrap_or(line.len()); + let str = std::str::from_utf8(&line[..len]).ok()?; + if let Some(idx) = col.checked_sub(1) { + str.char_indices().nth(idx).map(|(i, c)| i + c.len_utf8()) + } else { + Some(0) + } +} + +/// A loaded [`DataSource`]. +#[derive(Clone, Copy, Hash)] +pub enum LoadSource { + Path(FileId), + Bytes, +} + /// A value that can be read from a file. #[derive(Debug, Clone, PartialEq, Hash)] pub enum Readable { diff --git a/crates/typst-library/src/loading/read.rs b/crates/typst-library/src/loading/read.rs index 32dadc799..118ed681a 100644 --- a/crates/typst-library/src/loading/read.rs +++ b/crates/typst-library/src/loading/read.rs @@ -1,11 +1,10 @@ use ecow::EcoString; use typst_syntax::Spanned; -use crate::diag::{At, FileError, SourceResult}; +use crate::diag::SourceResult; use crate::engine::Engine; use crate::foundations::{func, Cast}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads plain text or data from a file. /// @@ -36,14 +35,10 @@ pub fn read( #[default(Some(Encoding::Utf8))] encoding: Option, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; + let data = path.map(DataSource::Path).load(engine.world)?; Ok(match encoding { - None => Readable::Bytes(data), - Some(Encoding::Utf8) => { - Readable::Str(data.to_str().map_err(FileError::from).at(span)?) - } + None => Readable::Bytes(data.bytes), + Some(Encoding::Utf8) => Readable::Str(data.as_str()?.into()), }) } diff --git a/crates/typst-library/src/loading/toml.rs b/crates/typst-library/src/loading/toml.rs index f04b2e746..0bca7cc3d 100644 --- a/crates/typst-library/src/loading/toml.rs +++ b/crates/typst-library/src/loading/toml.rs @@ -1,10 +1,10 @@ -use ecow::{eco_format, EcoString}; -use typst_syntax::{is_newline, Spanned}; +use ecow::{eco_format, EcoVec}; +use typst_syntax::Spanned; -use crate::diag::{At, FileError, SourceResult}; +use crate::diag::{At, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::{DataSource, Load, Readable}; +use crate::loading::{Data, DataSource, Load, Readable, ReportPos}; /// Reads structured data from a TOML file. /// @@ -33,10 +33,8 @@ pub fn toml( source: Spanned, ) -> SourceResult { let data = source.load(engine.world)?; - let raw = data.as_str().map_err(FileError::from).at(source.span)?; - ::toml::from_str(raw) - .map_err(|err| format_toml_error(err, raw)) - .at(source.span) + let raw = data.as_str()?; + ::toml::from_str(raw).map_err(|err| format_toml_error(&data, err)) } #[scope] @@ -71,15 +69,7 @@ impl toml { } /// Format the user-facing TOML error message. -fn format_toml_error(error: ::toml::de::Error, raw: &str) -> EcoString { - if let Some(head) = error.span().and_then(|range| raw.get(..range.start)) { - let line = head.lines().count(); - let column = 1 + head.chars().rev().take_while(|&c| !is_newline(c)).count(); - eco_format!( - "failed to parse TOML ({} at line {line} column {column})", - error.message(), - ) - } else { - eco_format!("failed to parse TOML ({})", error.message()) - } +fn format_toml_error(data: &Data, error: ::toml::de::Error) -> EcoVec { + let pos = error.span().map(ReportPos::Range).unwrap_or_default(); + data.err_at(pos, "failed to parse TOML", error.message()) } diff --git a/crates/typst-library/src/loading/xml.rs b/crates/typst-library/src/loading/xml.rs index e76c4e9cf..d2aa97dbc 100644 --- a/crates/typst-library/src/loading/xml.rs +++ b/crates/typst-library/src/loading/xml.rs @@ -1,11 +1,11 @@ -use ecow::EcoString; +use ecow::EcoVec; use roxmltree::ParsingOptions; use typst_syntax::Spanned; -use crate::diag::{format_xml_like_error, At, FileError, SourceResult}; +use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value}; -use crate::loading::{DataSource, Load, Readable}; +use crate::loading::{Data, DataSource, Load, Readable}; /// Reads structured data from an XML file. /// @@ -62,13 +62,12 @@ pub fn xml( source: Spanned, ) -> SourceResult { let data = source.load(engine.world)?; - let text = data.as_str().map_err(FileError::from).at(source.span)?; + let text = data.as_str()?; let document = roxmltree::Document::parse_with_options( text, ParsingOptions { allow_dtd: true, ..Default::default() }, ) - .map_err(format_xml_error) - .at(source.span)?; + .map_err(|err| format_xml_error(&data, err))?; Ok(convert_xml(document.root())) } @@ -111,6 +110,6 @@ fn convert_xml(node: roxmltree::Node) -> Value { } /// Format the user-facing XML error message. -fn format_xml_error(error: roxmltree::Error) -> EcoString { - format_xml_like_error("XML", error) +fn format_xml_error(data: &Data, error: roxmltree::Error) -> EcoVec { + format_xml_like_error("XML", data, error) } diff --git a/crates/typst-library/src/loading/yaml.rs b/crates/typst-library/src/loading/yaml.rs index 3f48113e8..8e0a266d5 100644 --- a/crates/typst-library/src/loading/yaml.rs +++ b/crates/typst-library/src/loading/yaml.rs @@ -1,10 +1,10 @@ -use ecow::eco_format; +use ecow::{eco_format, EcoVec}; use typst_syntax::Spanned; -use crate::diag::{At, SourceResult}; +use crate::diag::{At, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::{DataSource, Load, Readable}; +use crate::loading::{Data, DataSource, LineCol, Load, Readable, ReportPos}; /// Reads structured data from a YAML file. /// @@ -45,9 +45,8 @@ pub fn yaml( source: Spanned, ) -> SourceResult { let data = source.load(engine.world)?; - serde_yaml::from_slice(data.as_slice()) - .map_err(|err| eco_format!("failed to parse YAML ({err})")) - .at(source.span) + serde_yaml::from_slice(data.bytes.as_slice()) + .map_err(|err| format_yaml_error(&data, err)) } #[scope] @@ -76,3 +75,18 @@ impl yaml { .at(span) } } + +pub fn format_yaml_error( + data: &Data, + error: serde_yaml::Error, +) -> EcoVec { + let pos = error + .location() + .map(|loc| { + let line_col = LineCol::one_based(loc.line(), loc.column()); + let range = loc.index()..loc.index(); + ReportPos::Full(range, line_col) + }) + .unwrap_or_default(); + data.err_at(pos, "failed to parse YAML", error) +} diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index 122faff6f..e1b9af77a 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -7,7 +7,7 @@ use std::path::Path; use std::sync::{Arc, LazyLock}; use comemo::{Track, Tracked}; -use ecow::{eco_format, eco_vec, EcoString, EcoVec}; +use ecow::{eco_format, EcoString, EcoVec}; use hayagriva::archive::ArchivedStyle; use hayagriva::io::BibLaTeXError; use hayagriva::{ @@ -16,12 +16,11 @@ use hayagriva::{ }; use indexmap::IndexMap; use smallvec::{smallvec, SmallVec}; -use typst_syntax::{FileId, Span, Spanned}; +use typst_syntax::{Span, Spanned}; use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr}; use crate::diag::{ - bail, error, At, FileError, HintedStrResult, SourceDiagnostic, SourceResult, - StrResult, + bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult, }; use crate::engine::{Engine, Sink}; use crate::foundations::{ @@ -34,7 +33,7 @@ use crate::layout::{ BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem, Sides, Sizing, TrackSizings, }; -use crate::loading::{DataSource, Load}; +use crate::loading::{format_yaml_error, Data, DataSource, Load, LoadSource, ReportPos}; use crate::model::{ CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem, Url, @@ -291,46 +290,28 @@ impl LocalName for Packed { #[derive(Clone, PartialEq, Hash)] pub struct Bibliography(Arc>>); -#[derive(Clone, Copy, Hash)] -enum LibSource { - Path(FileId), - Bytes, -} - impl Bibliography { /// Load a bibliography from data sources. fn load( world: Tracked, sources: Spanned>, ) -> SourceResult, Self>> { - let data = (sources.v.0.iter()) - .map(|source| match source { - DataSource::Path(path) => { - let file_id = sources.span.resolve_path(path).at(sources.span)?; - let bytes = world.file(file_id).at(sources.span)?; - Ok((LibSource::Path(file_id), bytes)) - } - DataSource::Bytes(bytes) => Ok((LibSource::Bytes, bytes.clone())), - }) - .collect::>>()?; - let bibliography = Self::decode(sources.span, &data)?; + let data = sources.load(world)?; + let bibliography = Self::decode(&data)?; Ok(Derived::new(sources.v, bibliography)) } /// Decode a bibliography from loaded data sources. #[comemo::memoize] #[typst_macros::time(name = "load bibliography")] - fn decode( - source_span: Span, - data: &[(LibSource, Bytes)], - ) -> SourceResult { + fn decode(data: &[Data]) -> SourceResult { let mut map = IndexMap::new(); // TODO: store spans of entries for duplicate key error messages let mut duplicates = Vec::::new(); // We might have multiple bib/yaml files - for (source, bytes) in data.iter() { - let library = decode_library(source_span, *source, bytes)?; + for d in data.iter() { + let library = decode_library(d)?; for entry in library { match map.entry(Label::new(PicoStr::intern(entry.key()))) { indexmap::map::Entry::Vacant(vacant) => { @@ -344,8 +325,10 @@ impl Bibliography { } if !duplicates.is_empty() { - // TODO: errors with spans of source files - bail!(source_span, "duplicate bibliography keys: {}", duplicates.join(", ")); + // TODO: errors with spans of source files, + // requires hayagriva entries to store the range + let span = data.first().unwrap().source.span; + bail!(span, "duplicate bibliography keys: {}", duplicates.join(", ")); } Ok(Bibliography(Arc::new(ManuallyHash::new(map, typst_utils::hash128(data))))) @@ -371,14 +354,10 @@ impl Debug for Bibliography { } /// Decode on library from one data source. -fn decode_library( - source_span: Span, - source: LibSource, - data: &Bytes, -) -> SourceResult { - let data = data.as_str().map_err(FileError::from).at(source_span)?; +fn decode_library(data: &Data) -> SourceResult { + let str = data.as_str()?; - if let LibSource::Path(file_id) = source { + if let LoadSource::Path(file_id) = data.source.v { // If we got a path, use the extension to determine whether it is // YAML or BibLaTeX. let ext = file_id @@ -389,29 +368,25 @@ fn decode_library( .unwrap_or_default(); match ext.to_lowercase().as_str() { - "yml" | "yaml" => hayagriva::io::from_yaml_str(data).map_err(|err| { - let start = err.location().map(|loc| loc.index()).unwrap_or(0); - let span = Span::from_range(file_id, start..start); - eco_vec![error!(span, "failed to parse YAML {err}")] - }), - "bib" => hayagriva::io::from_biblatex_str(data).map_err(|errors| { - eco_vec![format_biblatex_error(source_span, source, data, errors)] - }), + "yml" | "yaml" => hayagriva::io::from_yaml_str(str) + .map_err(|err| format_yaml_error(data, err)), + "bib" => hayagriva::io::from_biblatex_str(str) + .map_err(|errors| format_biblatex_error(data, errors)), _ => bail!( - source_span, + data.source.span, "unknown bibliography format (must be .yml/.yaml or .bib)" ), } } else { // If we just got bytes, we need to guess. If it can be decoded as // hayagriva YAML, we'll use that. - let haya_err = match hayagriva::io::from_yaml_str(data) { + let haya_err = match hayagriva::io::from_yaml_str(str) { Ok(library) => return Ok(library), Err(err) => err, }; // If it can be decoded as BibLaTeX, we use that isntead. - let bib_errs = match hayagriva::io::from_biblatex_str(data) { + let bib_errs = match hayagriva::io::from_biblatex_str(str) { // If the file is almost valid yaml, but contains no `@` character // it will be successfully parsed as an empty BibLaTeX library, // since BibLaTeX does support arbitrary text outside of entries. @@ -425,7 +400,7 @@ fn decode_library( // and emit the more appropriate error. let mut yaml = 0; let mut biblatex = 0; - for c in data.chars() { + for c in str.chars() { match c { ':' => yaml += 1, '{' => biblatex += 1, @@ -435,53 +410,30 @@ fn decode_library( match bib_errs { Some(bib_errs) if biblatex >= yaml => { - bail!(format_biblatex_error(source_span, source, data, bib_errs)) - } - _ => { - if let Some(loc) = haya_err.location() { - let line = loc.line(); - bail!(source_span, "failed to parse YAML ({line}: {haya_err})") - } else { - bail!(source_span, "failed to parse YAML ({haya_err})") - } + Err(format_biblatex_error(data, bib_errs)) } + _ => Err(format_yaml_error(data, haya_err)), } } } /// Format a BibLaTeX loading error. fn format_biblatex_error( - source_span: Span, - source: LibSource, - data: &str, + data: &Data, errors: Vec, -) -> SourceDiagnostic { +) -> EcoVec { // TODO: return multiple errors? - let Some(error) = errors.first() else { - return match source { - LibSource::Path(file_id) => { - let span = Span::from_range(file_id, 0..0); - error!(span, "failed to parse BibLaTeX file") - } - LibSource::Bytes => error!(source_span, "failed to parse BibLaTeX"), - }; + let Some(error) = errors.into_iter().next() else { + // TODO: can this even happen, should we just unwrap? + return data.err_at(ReportPos::None, "failed to parse BibLaTeX", "???"); }; let (range, msg) = match error { - BibLaTeXError::Parse(error) => (&error.span, error.kind.to_string()), - BibLaTeXError::Type(error) => (&error.span, error.kind.to_string()), + BibLaTeXError::Parse(error) => (error.span, error.kind.to_string()), + BibLaTeXError::Type(error) => (error.span, error.kind.to_string()), }; - match source { - LibSource::Path(file_id) => { - let span = Span::from_range(file_id, range.clone()); - error!(span, "failed to parse BibLaTeX file ({msg})") - } - LibSource::Bytes => { - let line = data.get(..range.start).unwrap_or_default().lines().count(); - error!(source_span, "failed to parse BibLaTeX ({line}: {msg})") - } - } + data.err_at(range, "failed to parse BibLaTeX", msg) } /// A loaded CSL style. @@ -498,7 +450,7 @@ impl CslStyle { CslSource::Named(style) => Self::from_archived(*style), CslSource::Normal(source) => { let data = Spanned::new(source, span).load(world)?; - Self::from_data(data).at(span)? + Self::from_data(&data)? } }; Ok(Derived::new(source, style)) @@ -519,8 +471,8 @@ impl CslStyle { /// Load a CSL style from file contents. #[comemo::memoize] - pub fn from_data(data: Bytes) -> StrResult { - let text = data.as_str().map_err(FileError::from)?; + pub fn from_data(data: &Data) -> SourceResult { + let text = data.as_str()?; citationberg::IndependentStyle::from_xml(text) .map(|style| { Self(Arc::new(ManuallyHash::new( @@ -528,7 +480,7 @@ impl CslStyle { typst_utils::hash128(&(TypeId::of::(), data)), ))) }) - .map_err(|err| eco_format!("failed to load CSL style ({err})")) + .map_err(|err| data.err_at(ReportPos::None, "failed to load CSL style", err)) } /// Get the underlying independent style. diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs index d5c07424d..43a069605 100644 --- a/crates/typst-library/src/text/raw.rs +++ b/crates/typst-library/src/text/raw.rs @@ -3,23 +3,23 @@ use std::ops::Range; use std::sync::{Arc, LazyLock}; use comemo::Tracked; -use ecow::{eco_format, EcoString, EcoVec}; -use syntect::highlighting as synt; -use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; +use ecow::{EcoString, EcoVec}; +use syntect::highlighting::{self as synt}; +use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; use typst_syntax::{split_newlines, LinkedNode, Span, Spanned}; use typst_utils::ManuallyHash; use unicode_segmentation::UnicodeSegmentation; use super::Lang; -use crate::diag::{At, FileError, SourceResult, StrResult}; +use crate::diag::{SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed, - PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, + cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText, + Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, }; use crate::html::{tag, HtmlElem}; use crate::layout::{BlockBody, BlockElem, Em, HAlignment}; -use crate::loading::{DataSource, Load}; +use crate::loading::{Data, DataSource, LineCol, Load, ReportPos}; use crate::model::{Figurable, ParElem}; use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize}; use crate::visualize::Color; @@ -540,32 +540,18 @@ impl RawSyntax { sources: Spanned>, ) -> SourceResult, Vec>> { let data = sources.load(world)?; - let list = sources - .v - .0 - .iter() - .zip(&data) - .map(|(source, data)| Self::decode(source, data)) - .collect::>() - .at(sources.span)?; + let list = data.iter().map(Self::decode).collect::>()?; Ok(Derived::new(sources.v, list)) } /// Decode a syntax from a loaded source. #[comemo::memoize] #[typst_macros::time(name = "load syntaxes")] - fn decode(source: &DataSource, data: &Bytes) -> StrResult { - let src = data.as_str().map_err(FileError::from)?; - let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err( - |err| match source { - DataSource::Path(path) => { - eco_format!("failed to parse syntax file `{path}` ({err})") - } - DataSource::Bytes(_) => { - eco_format!("failed to parse syntax ({err})") - } - }, - )?; + fn decode(data: &Data) -> SourceResult { + let str = data.as_str()?; + + let syntax = SyntaxDefinition::load_from_str(str, false, None) + .map_err(|err| format_syntax_error(data, err))?; let mut builder = SyntaxSetBuilder::new(); builder.add(syntax); @@ -582,6 +568,24 @@ impl RawSyntax { } } +fn format_syntax_error(data: &Data, error: ParseSyntaxError) -> EcoVec { + let pos = syntax_error_pos(&error); + data.err_at(pos, "failed to parse syntax", error) +} + +fn syntax_error_pos(error: &ParseSyntaxError) -> ReportPos { + match error { + ParseSyntaxError::InvalidYaml(scan_error) => { + let m = scan_error.marker(); + ReportPos::Full( + m.index()..m.index(), + LineCol::one_based(m.line(), m.col() + 1), + ) + } + _ => ReportPos::None, + } +} + /// A loaded syntect theme. #[derive(Debug, Clone, PartialEq, Hash)] pub struct RawTheme(Arc>); @@ -593,16 +597,16 @@ impl RawTheme { source: Spanned, ) -> SourceResult> { let data = source.load(world)?; - let theme = Self::decode(&data).at(source.span)?; + let theme = Self::decode(&data)?; Ok(Derived::new(source.v, theme)) } /// Decode a theme from bytes. #[comemo::memoize] - fn decode(data: &Bytes) -> StrResult { - let mut cursor = std::io::Cursor::new(data.as_slice()); + fn decode(data: &Data) -> SourceResult { + let mut cursor = std::io::Cursor::new(data.bytes.as_slice()); let theme = synt::ThemeSet::load_from_reader(&mut cursor) - .map_err(|err| eco_format!("failed to parse theme ({err})"))?; + .map_err(|err| format_theme_error(data, err))?; Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data))))) } @@ -612,6 +616,17 @@ impl RawTheme { } } +fn format_theme_error( + data: &Data, + error: syntect::LoadingError, +) -> EcoVec { + let pos = match &error { + syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err), + _ => ReportPos::None, + }; + data.err_at(pos, "failed to parse theme", error) +} + /// A highlighted line of raw text. /// /// This is a helper element that is synthesized by [`raw`] elements. diff --git a/crates/typst-library/src/visualize/image/mod.rs b/crates/typst-library/src/visualize/image/mod.rs index f9e345e70..387c43b4a 100644 --- a/crates/typst-library/src/visualize/image/mod.rs +++ b/crates/typst-library/src/visualize/image/mod.rs @@ -66,7 +66,7 @@ pub struct ImageElem { #[parse( let source = args.expect::>("source")?; let data = source.load(engine.world)?; - Derived::new(source.v, data) + Derived::new(source.v, data.bytes) )] pub source: Derived, @@ -155,7 +155,7 @@ pub struct ImageElem { #[parse(match args.named::>>("icc")? { Some(Spanned { v: Smart::Custom(source), span }) => Some(Smart::Custom({ let data = Spanned::new(&source, span).load(engine.world)?; - Derived::new(source, data) + Derived::new(source, data.bytes) })), Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto), None => None, diff --git a/crates/typst-library/src/visualize/image/svg.rs b/crates/typst-library/src/visualize/image/svg.rs index 9bf1ead0d..8683dc37e 100644 --- a/crates/typst-library/src/visualize/image/svg.rs +++ b/crates/typst-library/src/visualize/image/svg.rs @@ -9,6 +9,7 @@ use siphasher::sip128::{Hasher128, SipHasher13}; use crate::diag::{format_xml_like_error, StrResult}; use crate::foundations::Bytes; use crate::layout::Axes; +use crate::loading::Data; use crate::text::{ Font, FontBook, FontFlags, FontStretch, FontStyle, FontVariant, FontWeight, }; @@ -133,7 +134,12 @@ fn format_usvg_error(error: usvg::Error) -> EcoString { usvg::Error::InvalidSize => { "failed to parse SVG (width, height, or viewbox is invalid)".into() } - usvg::Error::ParsingFailed(error) => format_xml_like_error("SVG", error), + usvg::Error::ParsingFailed(error) => { + format_xml_like_error("SVG", &Data::dummy(), error) + .pop() + .unwrap() + .message + } } }