feat: show external error messages in csv, json, toml, xml, yaml, and syntax/theme files

This commit is contained in:
Tobias Schmitz 2025-05-16 12:12:09 +02:00
parent 9e820f883e
commit 7e6c3b4159
No known key found for this signature in database
14 changed files with 419 additions and 218 deletions

View File

@ -12,6 +12,7 @@ use typst_syntax::package::{PackageSpec, PackageVersion};
use typst_syntax::{Span, Spanned, SyntaxError}; use typst_syntax::{Span, Spanned, SyntaxError};
use crate::engine::Engine; use crate::engine::Engine;
use crate::loading::{Data, LineCol};
use crate::{World, WorldExt}; use crate::{World, WorldExt};
/// Early-return with a [`StrResult`] or [`SourceResult`]. /// Early-return with a [`StrResult`] or [`SourceResult`].
@ -569,30 +570,28 @@ impl From<PackageError> for EcoString {
} }
/// Format a user-facing error message for an XML-like file format. /// Format a user-facing error message for an XML-like file format.
pub fn format_xml_like_error(format: &str, error: roxmltree::Error) -> EcoString { pub fn format_xml_like_error(
match error { format: &str,
roxmltree::Error::UnexpectedCloseTag(expected, actual, pos) => { data: &Data,
eco_format!( error: roxmltree::Error,
"failed to parse {format} (found closing tag '{actual}' \ ) -> EcoVec<SourceDiagnostic> {
instead of '{expected}' in line {})", let pos = LineCol::one_based(error.pos().row as usize, error.pos().col as usize);
pos.row let msg = format!("failed to parse {format}");
) let err = match error {
roxmltree::Error::UnexpectedCloseTag(expected, actual, _) => {
format!("found closing tag '{actual}' instead of '{expected}'")
} }
roxmltree::Error::UnknownEntityReference(entity, pos) => { roxmltree::Error::UnknownEntityReference(entity, _) => {
eco_format!( format!("unknown entity '{entity}'")
"failed to parse {format} (unknown entity '{entity}' in line {})",
pos.row
)
} }
roxmltree::Error::DuplicatedAttribute(attr, pos) => { roxmltree::Error::DuplicatedAttribute(attr, _) => {
eco_format!( format!("duplicate attribute '{attr}'")
"failed to parse {format} (duplicate attribute '{attr}' in line {})",
pos.row
)
} }
roxmltree::Error::NoRootNode => { roxmltree::Error::NoRootNode => {
eco_format!("failed to parse {format} (missing root node)") format!("missing root node")
} }
err => eco_format!("failed to parse {format} ({err})"), err => err.to_string(),
} };
data.err_at(pos, msg, err)
} }

View File

@ -152,7 +152,7 @@ pub fn plugin(
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Module> { ) -> SourceResult<Module> {
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
Plugin::module(data).at(source.span) Plugin::module(data.bytes).at(source.span)
} }
#[scope] #[scope]

View File

@ -24,7 +24,7 @@ pub fn cbor(
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Value> { ) -> SourceResult<Value> {
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
ciborium::from_reader(data.as_slice()) ciborium::from_reader(data.bytes.as_slice())
.map_err(|err| eco_format!("failed to parse CBOR ({err})")) .map_err(|err| eco_format!("failed to parse CBOR ({err})"))
.at(source.span) .at(source.span)
} }

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoString}; use ecow::EcoVec;
use typst_syntax::Spanned; use typst_syntax::Spanned;
use crate::diag::{bail, At, SourceResult}; use crate::diag::{bail, SourceDiagnostic, SourceResult};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value}; use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
use crate::loading::{DataSource, Load, Readable}; use crate::loading::{Data, DataSource, LineCol, Load, Readable, ReportPos};
/// Reads structured data from a CSV file. /// Reads structured data from a CSV file.
/// ///
@ -53,7 +53,7 @@ pub fn csv(
// Counting lines from 1 by default. // Counting lines from 1 by default.
let mut line_offset: usize = 1; let mut line_offset: usize = 1;
let mut reader = builder.from_reader(data.as_slice()); let mut reader = builder.from_reader(data.bytes.as_slice());
let mut headers: Option<::csv::StringRecord> = None; let mut headers: Option<::csv::StringRecord> = None;
if has_headers { if has_headers {
@ -62,9 +62,8 @@ pub fn csv(
headers = Some( headers = Some(
reader reader
.headers() .headers()
.map_err(|err| format_csv_error(err, 1)) .cloned()
.at(source.span)? .map_err(|err| format_csv_error(&data, err, 1))?,
.clone(),
); );
} }
@ -74,7 +73,7 @@ pub fn csv(
// incorrect with `has_headers` set to `false`. See issue: // incorrect with `has_headers` set to `false`. See issue:
// https://github.com/BurntSushi/rust-csv/issues/184 // https://github.com/BurntSushi/rust-csv/issues/184
let line = line + line_offset; let line = line + line_offset;
let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?; let row = result.map_err(|err| format_csv_error(&data, err, line))?;
let item = if let Some(headers) = &headers { let item = if let Some(headers) = &headers {
let mut dict = Dict::new(); let mut dict = Dict::new();
for (field, value) in headers.iter().zip(&row) { for (field, value) in headers.iter().zip(&row) {
@ -164,15 +163,25 @@ cast! {
} }
/// Format the user-facing CSV error message. /// Format the user-facing CSV error message.
fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString { fn format_csv_error(
data: &Data,
err: ::csv::Error,
line: usize,
) -> EcoVec<SourceDiagnostic> {
let msg = "failed to parse CSV";
let pos = (err.kind().position())
.map(|pos| {
let start = pos.byte() as usize;
ReportPos::Range(start..start)
})
.unwrap_or(LineCol::one_based(line, 1).into());
match err.kind() { match err.kind() {
::csv::ErrorKind::Utf8 { .. } => "file is not valid utf-8".into(), ::csv::ErrorKind::Utf8 { .. } => data.err_at(pos, msg, "file is not valid utf-8"),
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => { ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
eco_format!( let err =
"failed to parse CSV (found {len} instead of \ format!("found {len} instead of {expected_len} fields in line {line}");
{expected_len} fields in line {line})" data.err_at(pos, msg, err)
)
} }
_ => eco_format!("failed to parse CSV ({err})"), _ => data.err_at(pos, "failed to parse CSV", err),
} }
} }

View File

@ -4,7 +4,7 @@ use typst_syntax::Spanned;
use crate::diag::{At, SourceResult}; use crate::diag::{At, SourceResult};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value}; use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Readable}; use crate::loading::{DataSource, LineCol, Load, Readable};
/// Reads structured data from a JSON file. /// Reads structured data from a JSON file.
/// ///
@ -55,9 +55,10 @@ pub fn json(
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Value> { ) -> SourceResult<Value> {
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
serde_json::from_slice(data.as_slice()) serde_json::from_slice(data.bytes.as_slice()).map_err(|err| {
.map_err(|err| eco_format!("failed to parse JSON ({err})")) let pos = LineCol::one_based(err.line(), err.column());
.at(source.span) data.err_at(pos, "failed to parse JSON", err)
})
} }
#[scope] #[scope]

View File

@ -16,8 +16,8 @@ mod xml_;
mod yaml_; mod yaml_;
use comemo::Tracked; use comemo::Tracked;
use ecow::EcoString; use ecow::{eco_vec, EcoString, EcoVec};
use typst_syntax::Spanned; use typst_syntax::{FileId, Span, Spanned};
pub use self::cbor_::*; pub use self::cbor_::*;
pub use self::csv_::*; pub use self::csv_::*;
@ -27,7 +27,7 @@ pub use self::toml_::*;
pub use self::xml_::*; pub use self::xml_::*;
pub use self::yaml_::*; pub use self::yaml_::*;
use crate::diag::{At, SourceResult}; use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
use crate::foundations::OneOrMultiple; use crate::foundations::OneOrMultiple;
use crate::foundations::{cast, Bytes, Scope, Str}; use crate::foundations::{cast, Bytes, Scope, Str};
use crate::World; use crate::World;
@ -74,39 +74,44 @@ pub trait Load {
} }
impl Load for Spanned<DataSource> { impl Load for Spanned<DataSource> {
type Output = Bytes; type Output = Data;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> { fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
self.as_ref().load(world) self.as_ref().load(world)
} }
} }
impl Load for Spanned<&DataSource> { impl Load for Spanned<&DataSource> {
type Output = Bytes; type Output = Data;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> { fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
match &self.v { match &self.v {
DataSource::Path(path) => { DataSource::Path(path) => {
let file_id = self.span.resolve_path(path).at(self.span)?; let file_id = self.span.resolve_path(path).at(self.span)?;
world.file(file_id).at(self.span) let bytes = world.file(file_id).at(self.span)?;
let source = Spanned::new(LoadSource::Path(file_id), self.span);
Ok(Data::new(source, bytes))
}
DataSource::Bytes(bytes) => {
let source = Spanned::new(LoadSource::Bytes, self.span);
Ok(Data::new(source, bytes.clone()))
} }
DataSource::Bytes(bytes) => Ok(bytes.clone()),
} }
} }
} }
impl Load for Spanned<OneOrMultiple<DataSource>> { impl Load for Spanned<OneOrMultiple<DataSource>> {
type Output = Vec<Bytes>; type Output = Vec<Data>;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> { fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
self.as_ref().load(world) self.as_ref().load(world)
} }
} }
impl Load for Spanned<&OneOrMultiple<DataSource>> { impl Load for Spanned<&OneOrMultiple<DataSource>> {
type Output = Vec<Bytes>; type Output = Vec<Data>;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> { fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
self.v self.v
.0 .0
.iter() .iter()
@ -115,6 +120,222 @@ impl Load for Spanned<&OneOrMultiple<DataSource>> {
} }
} }
/// Data loaded from a [`DataSource`].
#[derive(Clone, Hash)]
pub struct Data {
pub source: Spanned<LoadSource>,
pub bytes: Bytes,
}
impl Data {
pub fn dummy() -> Self {
Data::new(
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
Bytes::new([]),
)
}
pub fn new(source: Spanned<LoadSource>, bytes: Bytes) -> Self {
Self { source, bytes }
}
pub fn as_str(&self) -> SourceResult<&str> {
self.bytes.as_str().map_err(|err| {
// TODO: should the error even be reported in the file if it's possibly binary?
let start = err.valid_up_to();
let end = start + err.error_len().unwrap_or(0);
self.err_at(start..end, "failed to convert to string", FileError::from(err))
})
}
/// Report an error, possibly in an external file.
pub fn err_at(
&self,
pos: impl Into<ReportPos>,
msg: impl std::fmt::Display,
error: impl std::fmt::Display,
) -> EcoVec<SourceDiagnostic> {
let pos = pos.into();
let error = match self.source.v {
LoadSource::Path(file_id) => {
if let Some(range) = pos.range(self.bytes.as_slice()) {
let span = Span::from_range(file_id, range);
return eco_vec!(error!(span, "{msg} ({error})"));
}
// Either there was no range provided, or resolving the range
// from the line/column failed. If present report the possibly
// wrong line/column anyway.
let span = Span::from_range(file_id, 0..self.bytes.len());
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
let (line, col) = pair.numbers();
error!(span, "{msg} ({error} at {line}:{col})")
} else {
error!(span, "{msg} ({error})")
}
}
LoadSource::Bytes => {
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
let (line, col) = pair.numbers();
error!(self.source.span, "{msg} ({error} at {line}:{col})")
} else {
error!(self.source.span, "{msg} ({error})")
}
}
};
eco_vec![error]
}
}
#[derive(Debug, Default)]
pub enum ReportPos {
/// Contains the range, and the 0-based line/column.
Full(std::ops::Range<usize>, LineCol),
/// Contains the range.
Range(std::ops::Range<usize>),
/// Contains the 0-based line/column.
LineCol(LineCol),
#[default]
None,
}
impl From<std::ops::Range<usize>> for ReportPos {
fn from(value: std::ops::Range<usize>) -> Self {
Self::Range(value)
}
}
impl From<LineCol> for ReportPos {
fn from(value: LineCol) -> Self {
Self::LineCol(value)
}
}
impl ReportPos {
fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
match self {
ReportPos::Full(range, _) => Some(range.clone()),
ReportPos::Range(range) => Some(range.clone()),
&ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
ReportPos::None => None,
}
}
fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
match self {
&ReportPos::Full(_, pair) => Some(pair),
ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
&ReportPos::LineCol(pair) => Some(pair),
ReportPos::None => None,
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct LineCol {
/// The 0-based line.
line: usize,
/// The 0-based column.
col: usize,
}
impl LineCol {
/// Constructs the line/column pair from 0-based indices.
pub fn zero_based(line: usize, col: usize) -> Self {
Self { line, col }
}
/// Constructs the line/column pair from 1-based numbers.
pub fn one_based(line: usize, col: usize) -> Self {
Self {
line: line.saturating_sub(1),
col: col.saturating_sub(1),
}
}
// TODO: this function should only return None if the position is out of
// bounds not if there is invalid utf-8
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
let bytes = &bytes[..pos];
let mut line = 0;
let line_start = memchr::memchr_iter(b'\n', bytes)
.inspect(|_| line += 1)
.last()
.map(|i| i + 1)
.unwrap_or(bytes.len());
// TODO: streaming-utf8 decoding ignore invalid characters
// might neeed to update error reporting too (use utf8_iter)
let str = std::str::from_utf8(&bytes[line_start..]).ok()?;
let col = str.chars().count();
Some(LineCol::zero_based(line, col))
}
pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
let line_offset = if let Some(idx) = self.line.checked_sub(1) {
memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
} else {
0
};
let col_offset = col_offset(line_offset, self.col, bytes)?;
let pos = line_offset + col_offset;
Some(pos)
}
pub fn byte_range(
range: std::ops::Range<Self>,
bytes: &[u8],
) -> Option<std::ops::Range<usize>> {
let mut line_iter = memchr::memchr_iter(b'\n', bytes);
let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
line_iter.nth(idx).map(|i| i + 1)?
} else {
0
};
let line_delta = range.end.line - range.start.line;
let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
line_iter.nth(idx).map(|i| i + 1)?
} else {
start_line_offset
};
let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
let start = start_line_offset + start_col_offset;
let end = end_line_offset + end_col_offset;
Some(start..end)
}
pub fn numbers(&self) -> (usize, usize) {
(self.line + 1, self.col + 1)
}
}
// TODO: this function should only return None if the position is out of
// bounds not if there is invalid utf-8
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
let line = &bytes[line_offset..];
// TODO: streaming-utf8 decoding ignore invalid characters
// might neeed to update error reporting too (use utf8_iter)
// validate the whole line, so it can be displayed
let len = memchr::memchr(b'\n', line).unwrap_or(line.len());
let str = std::str::from_utf8(&line[..len]).ok()?;
if let Some(idx) = col.checked_sub(1) {
str.char_indices().nth(idx).map(|(i, c)| i + c.len_utf8())
} else {
Some(0)
}
}
/// A loaded [`DataSource`].
#[derive(Clone, Copy, Hash)]
pub enum LoadSource {
Path(FileId),
Bytes,
}
/// A value that can be read from a file. /// A value that can be read from a file.
#[derive(Debug, Clone, PartialEq, Hash)] #[derive(Debug, Clone, PartialEq, Hash)]
pub enum Readable { pub enum Readable {

View File

@ -1,11 +1,10 @@
use ecow::EcoString; use ecow::EcoString;
use typst_syntax::Spanned; use typst_syntax::Spanned;
use crate::diag::{At, FileError, SourceResult}; use crate::diag::SourceResult;
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{func, Cast}; use crate::foundations::{func, Cast};
use crate::loading::Readable; use crate::loading::{DataSource, Load, Readable};
use crate::World;
/// Reads plain text or data from a file. /// Reads plain text or data from a file.
/// ///
@ -36,14 +35,10 @@ pub fn read(
#[default(Some(Encoding::Utf8))] #[default(Some(Encoding::Utf8))]
encoding: Option<Encoding>, encoding: Option<Encoding>,
) -> SourceResult<Readable> { ) -> SourceResult<Readable> {
let Spanned { v: path, span } = path; let data = path.map(DataSource::Path).load(engine.world)?;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
Ok(match encoding { Ok(match encoding {
None => Readable::Bytes(data), None => Readable::Bytes(data.bytes),
Some(Encoding::Utf8) => { Some(Encoding::Utf8) => Readable::Str(data.as_str()?.into()),
Readable::Str(data.to_str().map_err(FileError::from).at(span)?)
}
}) })
} }

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoString}; use ecow::{eco_format, EcoVec};
use typst_syntax::{is_newline, Spanned}; use typst_syntax::Spanned;
use crate::diag::{At, FileError, SourceResult}; use crate::diag::{At, SourceDiagnostic, SourceResult};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value}; use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Readable}; use crate::loading::{Data, DataSource, Load, Readable, ReportPos};
/// Reads structured data from a TOML file. /// Reads structured data from a TOML file.
/// ///
@ -33,10 +33,8 @@ pub fn toml(
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Value> { ) -> SourceResult<Value> {
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
let raw = data.as_str().map_err(FileError::from).at(source.span)?; let raw = data.as_str()?;
::toml::from_str(raw) ::toml::from_str(raw).map_err(|err| format_toml_error(&data, err))
.map_err(|err| format_toml_error(err, raw))
.at(source.span)
} }
#[scope] #[scope]
@ -71,15 +69,7 @@ impl toml {
} }
/// Format the user-facing TOML error message. /// Format the user-facing TOML error message.
fn format_toml_error(error: ::toml::de::Error, raw: &str) -> EcoString { fn format_toml_error(data: &Data, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
if let Some(head) = error.span().and_then(|range| raw.get(..range.start)) { let pos = error.span().map(ReportPos::Range).unwrap_or_default();
let line = head.lines().count(); data.err_at(pos, "failed to parse TOML", error.message())
let column = 1 + head.chars().rev().take_while(|&c| !is_newline(c)).count();
eco_format!(
"failed to parse TOML ({} at line {line} column {column})",
error.message(),
)
} else {
eco_format!("failed to parse TOML ({})", error.message())
}
} }

View File

@ -1,11 +1,11 @@
use ecow::EcoString; use ecow::EcoVec;
use roxmltree::ParsingOptions; use roxmltree::ParsingOptions;
use typst_syntax::Spanned; use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, At, FileError, SourceResult}; use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value}; use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
use crate::loading::{DataSource, Load, Readable}; use crate::loading::{Data, DataSource, Load, Readable};
/// Reads structured data from an XML file. /// Reads structured data from an XML file.
/// ///
@ -62,13 +62,12 @@ pub fn xml(
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Value> { ) -> SourceResult<Value> {
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
let text = data.as_str().map_err(FileError::from).at(source.span)?; let text = data.as_str()?;
let document = roxmltree::Document::parse_with_options( let document = roxmltree::Document::parse_with_options(
text, text,
ParsingOptions { allow_dtd: true, ..Default::default() }, ParsingOptions { allow_dtd: true, ..Default::default() },
) )
.map_err(format_xml_error) .map_err(|err| format_xml_error(&data, err))?;
.at(source.span)?;
Ok(convert_xml(document.root())) Ok(convert_xml(document.root()))
} }
@ -111,6 +110,6 @@ fn convert_xml(node: roxmltree::Node) -> Value {
} }
/// Format the user-facing XML error message. /// Format the user-facing XML error message.
fn format_xml_error(error: roxmltree::Error) -> EcoString { fn format_xml_error(data: &Data, error: roxmltree::Error) -> EcoVec<SourceDiagnostic> {
format_xml_like_error("XML", error) format_xml_like_error("XML", data, error)
} }

View File

@ -1,10 +1,10 @@
use ecow::eco_format; use ecow::{eco_format, EcoVec};
use typst_syntax::Spanned; use typst_syntax::Spanned;
use crate::diag::{At, SourceResult}; use crate::diag::{At, SourceDiagnostic, SourceResult};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value}; use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Readable}; use crate::loading::{Data, DataSource, LineCol, Load, Readable, ReportPos};
/// Reads structured data from a YAML file. /// Reads structured data from a YAML file.
/// ///
@ -45,9 +45,8 @@ pub fn yaml(
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Value> { ) -> SourceResult<Value> {
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
serde_yaml::from_slice(data.as_slice()) serde_yaml::from_slice(data.bytes.as_slice())
.map_err(|err| eco_format!("failed to parse YAML ({err})")) .map_err(|err| format_yaml_error(&data, err))
.at(source.span)
} }
#[scope] #[scope]
@ -76,3 +75,18 @@ impl yaml {
.at(span) .at(span)
} }
} }
pub fn format_yaml_error(
data: &Data,
error: serde_yaml::Error,
) -> EcoVec<SourceDiagnostic> {
let pos = error
.location()
.map(|loc| {
let line_col = LineCol::one_based(loc.line(), loc.column());
let range = loc.index()..loc.index();
ReportPos::Full(range, line_col)
})
.unwrap_or_default();
data.err_at(pos, "failed to parse YAML", error)
}

View File

@ -7,7 +7,7 @@ use std::path::Path;
use std::sync::{Arc, LazyLock}; use std::sync::{Arc, LazyLock};
use comemo::{Track, Tracked}; use comemo::{Track, Tracked};
use ecow::{eco_format, eco_vec, EcoString, EcoVec}; use ecow::{eco_format, EcoString, EcoVec};
use hayagriva::archive::ArchivedStyle; use hayagriva::archive::ArchivedStyle;
use hayagriva::io::BibLaTeXError; use hayagriva::io::BibLaTeXError;
use hayagriva::{ use hayagriva::{
@ -16,12 +16,11 @@ use hayagriva::{
}; };
use indexmap::IndexMap; use indexmap::IndexMap;
use smallvec::{smallvec, SmallVec}; use smallvec::{smallvec, SmallVec};
use typst_syntax::{FileId, Span, Spanned}; use typst_syntax::{Span, Spanned};
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr}; use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
use crate::diag::{ use crate::diag::{
bail, error, At, FileError, HintedStrResult, SourceDiagnostic, SourceResult, bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
StrResult,
}; };
use crate::engine::{Engine, Sink}; use crate::engine::{Engine, Sink};
use crate::foundations::{ use crate::foundations::{
@ -34,7 +33,7 @@ use crate::layout::{
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem, BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
Sides, Sizing, TrackSizings, Sides, Sizing, TrackSizings,
}; };
use crate::loading::{DataSource, Load}; use crate::loading::{format_yaml_error, Data, DataSource, Load, LoadSource, ReportPos};
use crate::model::{ use crate::model::{
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem, CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
Url, Url,
@ -291,46 +290,28 @@ impl LocalName for Packed<BibliographyElem> {
#[derive(Clone, PartialEq, Hash)] #[derive(Clone, PartialEq, Hash)]
pub struct Bibliography(Arc<ManuallyHash<IndexMap<Label, hayagriva::Entry>>>); pub struct Bibliography(Arc<ManuallyHash<IndexMap<Label, hayagriva::Entry>>>);
#[derive(Clone, Copy, Hash)]
enum LibSource {
Path(FileId),
Bytes,
}
impl Bibliography { impl Bibliography {
/// Load a bibliography from data sources. /// Load a bibliography from data sources.
fn load( fn load(
world: Tracked<dyn World + '_>, world: Tracked<dyn World + '_>,
sources: Spanned<OneOrMultiple<DataSource>>, sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Self>> { ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Self>> {
let data = (sources.v.0.iter()) let data = sources.load(world)?;
.map(|source| match source { let bibliography = Self::decode(&data)?;
DataSource::Path(path) => {
let file_id = sources.span.resolve_path(path).at(sources.span)?;
let bytes = world.file(file_id).at(sources.span)?;
Ok((LibSource::Path(file_id), bytes))
}
DataSource::Bytes(bytes) => Ok((LibSource::Bytes, bytes.clone())),
})
.collect::<SourceResult<Vec<_>>>()?;
let bibliography = Self::decode(sources.span, &data)?;
Ok(Derived::new(sources.v, bibliography)) Ok(Derived::new(sources.v, bibliography))
} }
/// Decode a bibliography from loaded data sources. /// Decode a bibliography from loaded data sources.
#[comemo::memoize] #[comemo::memoize]
#[typst_macros::time(name = "load bibliography")] #[typst_macros::time(name = "load bibliography")]
fn decode( fn decode(data: &[Data]) -> SourceResult<Bibliography> {
source_span: Span,
data: &[(LibSource, Bytes)],
) -> SourceResult<Bibliography> {
let mut map = IndexMap::new(); let mut map = IndexMap::new();
// TODO: store spans of entries for duplicate key error messages // TODO: store spans of entries for duplicate key error messages
let mut duplicates = Vec::<EcoString>::new(); let mut duplicates = Vec::<EcoString>::new();
// We might have multiple bib/yaml files // We might have multiple bib/yaml files
for (source, bytes) in data.iter() { for d in data.iter() {
let library = decode_library(source_span, *source, bytes)?; let library = decode_library(d)?;
for entry in library { for entry in library {
match map.entry(Label::new(PicoStr::intern(entry.key()))) { match map.entry(Label::new(PicoStr::intern(entry.key()))) {
indexmap::map::Entry::Vacant(vacant) => { indexmap::map::Entry::Vacant(vacant) => {
@ -344,8 +325,10 @@ impl Bibliography {
} }
if !duplicates.is_empty() { if !duplicates.is_empty() {
// TODO: errors with spans of source files // TODO: errors with spans of source files,
bail!(source_span, "duplicate bibliography keys: {}", duplicates.join(", ")); // requires hayagriva entries to store the range
let span = data.first().unwrap().source.span;
bail!(span, "duplicate bibliography keys: {}", duplicates.join(", "));
} }
Ok(Bibliography(Arc::new(ManuallyHash::new(map, typst_utils::hash128(data))))) Ok(Bibliography(Arc::new(ManuallyHash::new(map, typst_utils::hash128(data)))))
@ -371,14 +354,10 @@ impl Debug for Bibliography {
} }
/// Decode on library from one data source. /// Decode on library from one data source.
fn decode_library( fn decode_library(data: &Data) -> SourceResult<Library> {
source_span: Span, let str = data.as_str()?;
source: LibSource,
data: &Bytes,
) -> SourceResult<Library> {
let data = data.as_str().map_err(FileError::from).at(source_span)?;
if let LibSource::Path(file_id) = source { if let LoadSource::Path(file_id) = data.source.v {
// If we got a path, use the extension to determine whether it is // If we got a path, use the extension to determine whether it is
// YAML or BibLaTeX. // YAML or BibLaTeX.
let ext = file_id let ext = file_id
@ -389,29 +368,25 @@ fn decode_library(
.unwrap_or_default(); .unwrap_or_default();
match ext.to_lowercase().as_str() { match ext.to_lowercase().as_str() {
"yml" | "yaml" => hayagriva::io::from_yaml_str(data).map_err(|err| { "yml" | "yaml" => hayagriva::io::from_yaml_str(str)
let start = err.location().map(|loc| loc.index()).unwrap_or(0); .map_err(|err| format_yaml_error(data, err)),
let span = Span::from_range(file_id, start..start); "bib" => hayagriva::io::from_biblatex_str(str)
eco_vec![error!(span, "failed to parse YAML {err}")] .map_err(|errors| format_biblatex_error(data, errors)),
}),
"bib" => hayagriva::io::from_biblatex_str(data).map_err(|errors| {
eco_vec![format_biblatex_error(source_span, source, data, errors)]
}),
_ => bail!( _ => bail!(
source_span, data.source.span,
"unknown bibliography format (must be .yml/.yaml or .bib)" "unknown bibliography format (must be .yml/.yaml or .bib)"
), ),
} }
} else { } else {
// If we just got bytes, we need to guess. If it can be decoded as // If we just got bytes, we need to guess. If it can be decoded as
// hayagriva YAML, we'll use that. // hayagriva YAML, we'll use that.
let haya_err = match hayagriva::io::from_yaml_str(data) { let haya_err = match hayagriva::io::from_yaml_str(str) {
Ok(library) => return Ok(library), Ok(library) => return Ok(library),
Err(err) => err, Err(err) => err,
}; };
// If it can be decoded as BibLaTeX, we use that isntead. // If it can be decoded as BibLaTeX, we use that isntead.
let bib_errs = match hayagriva::io::from_biblatex_str(data) { let bib_errs = match hayagriva::io::from_biblatex_str(str) {
// If the file is almost valid yaml, but contains no `@` character // If the file is almost valid yaml, but contains no `@` character
// it will be successfully parsed as an empty BibLaTeX library, // it will be successfully parsed as an empty BibLaTeX library,
// since BibLaTeX does support arbitrary text outside of entries. // since BibLaTeX does support arbitrary text outside of entries.
@ -425,7 +400,7 @@ fn decode_library(
// and emit the more appropriate error. // and emit the more appropriate error.
let mut yaml = 0; let mut yaml = 0;
let mut biblatex = 0; let mut biblatex = 0;
for c in data.chars() { for c in str.chars() {
match c { match c {
':' => yaml += 1, ':' => yaml += 1,
'{' => biblatex += 1, '{' => biblatex += 1,
@ -435,53 +410,30 @@ fn decode_library(
match bib_errs { match bib_errs {
Some(bib_errs) if biblatex >= yaml => { Some(bib_errs) if biblatex >= yaml => {
bail!(format_biblatex_error(source_span, source, data, bib_errs)) Err(format_biblatex_error(data, bib_errs))
}
_ => {
if let Some(loc) = haya_err.location() {
let line = loc.line();
bail!(source_span, "failed to parse YAML ({line}: {haya_err})")
} else {
bail!(source_span, "failed to parse YAML ({haya_err})")
}
} }
_ => Err(format_yaml_error(data, haya_err)),
} }
} }
} }
/// Format a BibLaTeX loading error. /// Format a BibLaTeX loading error.
fn format_biblatex_error( fn format_biblatex_error(
source_span: Span, data: &Data,
source: LibSource,
data: &str,
errors: Vec<BibLaTeXError>, errors: Vec<BibLaTeXError>,
) -> SourceDiagnostic { ) -> EcoVec<SourceDiagnostic> {
// TODO: return multiple errors? // TODO: return multiple errors?
let Some(error) = errors.first() else { let Some(error) = errors.into_iter().next() else {
return match source { // TODO: can this even happen, should we just unwrap?
LibSource::Path(file_id) => { return data.err_at(ReportPos::None, "failed to parse BibLaTeX", "???");
let span = Span::from_range(file_id, 0..0);
error!(span, "failed to parse BibLaTeX file")
}
LibSource::Bytes => error!(source_span, "failed to parse BibLaTeX"),
};
}; };
let (range, msg) = match error { let (range, msg) = match error {
BibLaTeXError::Parse(error) => (&error.span, error.kind.to_string()), BibLaTeXError::Parse(error) => (error.span, error.kind.to_string()),
BibLaTeXError::Type(error) => (&error.span, error.kind.to_string()), BibLaTeXError::Type(error) => (error.span, error.kind.to_string()),
}; };
match source { data.err_at(range, "failed to parse BibLaTeX", msg)
LibSource::Path(file_id) => {
let span = Span::from_range(file_id, range.clone());
error!(span, "failed to parse BibLaTeX file ({msg})")
}
LibSource::Bytes => {
let line = data.get(..range.start).unwrap_or_default().lines().count();
error!(source_span, "failed to parse BibLaTeX ({line}: {msg})")
}
}
} }
/// A loaded CSL style. /// A loaded CSL style.
@ -498,7 +450,7 @@ impl CslStyle {
CslSource::Named(style) => Self::from_archived(*style), CslSource::Named(style) => Self::from_archived(*style),
CslSource::Normal(source) => { CslSource::Normal(source) => {
let data = Spanned::new(source, span).load(world)?; let data = Spanned::new(source, span).load(world)?;
Self::from_data(data).at(span)? Self::from_data(&data)?
} }
}; };
Ok(Derived::new(source, style)) Ok(Derived::new(source, style))
@ -519,8 +471,8 @@ impl CslStyle {
/// Load a CSL style from file contents. /// Load a CSL style from file contents.
#[comemo::memoize] #[comemo::memoize]
pub fn from_data(data: Bytes) -> StrResult<CslStyle> { pub fn from_data(data: &Data) -> SourceResult<CslStyle> {
let text = data.as_str().map_err(FileError::from)?; let text = data.as_str()?;
citationberg::IndependentStyle::from_xml(text) citationberg::IndependentStyle::from_xml(text)
.map(|style| { .map(|style| {
Self(Arc::new(ManuallyHash::new( Self(Arc::new(ManuallyHash::new(
@ -528,7 +480,7 @@ impl CslStyle {
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)), typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
))) )))
}) })
.map_err(|err| eco_format!("failed to load CSL style ({err})")) .map_err(|err| data.err_at(ReportPos::None, "failed to load CSL style", err))
} }
/// Get the underlying independent style. /// Get the underlying independent style.

View File

@ -3,23 +3,23 @@ use std::ops::Range;
use std::sync::{Arc, LazyLock}; use std::sync::{Arc, LazyLock};
use comemo::Tracked; use comemo::Tracked;
use ecow::{eco_format, EcoString, EcoVec}; use ecow::{EcoString, EcoVec};
use syntect::highlighting as synt; use syntect::highlighting::{self as synt};
use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
use typst_syntax::{split_newlines, LinkedNode, Span, Spanned}; use typst_syntax::{split_newlines, LinkedNode, Span, Spanned};
use typst_utils::ManuallyHash; use typst_utils::ManuallyHash;
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
use super::Lang; use super::Lang;
use crate::diag::{At, FileError, SourceResult, StrResult}; use crate::diag::{SourceDiagnostic, SourceResult};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{ use crate::foundations::{
cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed, cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
}; };
use crate::html::{tag, HtmlElem}; use crate::html::{tag, HtmlElem};
use crate::layout::{BlockBody, BlockElem, Em, HAlignment}; use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
use crate::loading::{DataSource, Load}; use crate::loading::{Data, DataSource, LineCol, Load, ReportPos};
use crate::model::{Figurable, ParElem}; use crate::model::{Figurable, ParElem};
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize}; use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
use crate::visualize::Color; use crate::visualize::Color;
@ -540,32 +540,18 @@ impl RawSyntax {
sources: Spanned<OneOrMultiple<DataSource>>, sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> { ) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
let data = sources.load(world)?; let data = sources.load(world)?;
let list = sources let list = data.iter().map(Self::decode).collect::<SourceResult<_>>()?;
.v
.0
.iter()
.zip(&data)
.map(|(source, data)| Self::decode(source, data))
.collect::<StrResult<_>>()
.at(sources.span)?;
Ok(Derived::new(sources.v, list)) Ok(Derived::new(sources.v, list))
} }
/// Decode a syntax from a loaded source. /// Decode a syntax from a loaded source.
#[comemo::memoize] #[comemo::memoize]
#[typst_macros::time(name = "load syntaxes")] #[typst_macros::time(name = "load syntaxes")]
fn decode(source: &DataSource, data: &Bytes) -> StrResult<RawSyntax> { fn decode(data: &Data) -> SourceResult<RawSyntax> {
let src = data.as_str().map_err(FileError::from)?; let str = data.as_str()?;
let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err(
|err| match source { let syntax = SyntaxDefinition::load_from_str(str, false, None)
DataSource::Path(path) => { .map_err(|err| format_syntax_error(data, err))?;
eco_format!("failed to parse syntax file `{path}` ({err})")
}
DataSource::Bytes(_) => {
eco_format!("failed to parse syntax ({err})")
}
},
)?;
let mut builder = SyntaxSetBuilder::new(); let mut builder = SyntaxSetBuilder::new();
builder.add(syntax); builder.add(syntax);
@ -582,6 +568,24 @@ impl RawSyntax {
} }
} }
fn format_syntax_error(data: &Data, error: ParseSyntaxError) -> EcoVec<SourceDiagnostic> {
let pos = syntax_error_pos(&error);
data.err_at(pos, "failed to parse syntax", error)
}
fn syntax_error_pos(error: &ParseSyntaxError) -> ReportPos {
match error {
ParseSyntaxError::InvalidYaml(scan_error) => {
let m = scan_error.marker();
ReportPos::Full(
m.index()..m.index(),
LineCol::one_based(m.line(), m.col() + 1),
)
}
_ => ReportPos::None,
}
}
/// A loaded syntect theme. /// A loaded syntect theme.
#[derive(Debug, Clone, PartialEq, Hash)] #[derive(Debug, Clone, PartialEq, Hash)]
pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>); pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
@ -593,16 +597,16 @@ impl RawTheme {
source: Spanned<DataSource>, source: Spanned<DataSource>,
) -> SourceResult<Derived<DataSource, Self>> { ) -> SourceResult<Derived<DataSource, Self>> {
let data = source.load(world)?; let data = source.load(world)?;
let theme = Self::decode(&data).at(source.span)?; let theme = Self::decode(&data)?;
Ok(Derived::new(source.v, theme)) Ok(Derived::new(source.v, theme))
} }
/// Decode a theme from bytes. /// Decode a theme from bytes.
#[comemo::memoize] #[comemo::memoize]
fn decode(data: &Bytes) -> StrResult<RawTheme> { fn decode(data: &Data) -> SourceResult<RawTheme> {
let mut cursor = std::io::Cursor::new(data.as_slice()); let mut cursor = std::io::Cursor::new(data.bytes.as_slice());
let theme = synt::ThemeSet::load_from_reader(&mut cursor) let theme = synt::ThemeSet::load_from_reader(&mut cursor)
.map_err(|err| eco_format!("failed to parse theme ({err})"))?; .map_err(|err| format_theme_error(data, err))?;
Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data))))) Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data)))))
} }
@ -612,6 +616,17 @@ impl RawTheme {
} }
} }
fn format_theme_error(
data: &Data,
error: syntect::LoadingError,
) -> EcoVec<SourceDiagnostic> {
let pos = match &error {
syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
_ => ReportPos::None,
};
data.err_at(pos, "failed to parse theme", error)
}
/// A highlighted line of raw text. /// A highlighted line of raw text.
/// ///
/// This is a helper element that is synthesized by [`raw`] elements. /// This is a helper element that is synthesized by [`raw`] elements.

View File

@ -66,7 +66,7 @@ pub struct ImageElem {
#[parse( #[parse(
let source = args.expect::<Spanned<DataSource>>("source")?; let source = args.expect::<Spanned<DataSource>>("source")?;
let data = source.load(engine.world)?; let data = source.load(engine.world)?;
Derived::new(source.v, data) Derived::new(source.v, data.bytes)
)] )]
pub source: Derived<DataSource, Bytes>, pub source: Derived<DataSource, Bytes>,
@ -155,7 +155,7 @@ pub struct ImageElem {
#[parse(match args.named::<Spanned<Smart<DataSource>>>("icc")? { #[parse(match args.named::<Spanned<Smart<DataSource>>>("icc")? {
Some(Spanned { v: Smart::Custom(source), span }) => Some(Smart::Custom({ Some(Spanned { v: Smart::Custom(source), span }) => Some(Smart::Custom({
let data = Spanned::new(&source, span).load(engine.world)?; let data = Spanned::new(&source, span).load(engine.world)?;
Derived::new(source, data) Derived::new(source, data.bytes)
})), })),
Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto), Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
None => None, None => None,

View File

@ -9,6 +9,7 @@ use siphasher::sip128::{Hasher128, SipHasher13};
use crate::diag::{format_xml_like_error, StrResult}; use crate::diag::{format_xml_like_error, StrResult};
use crate::foundations::Bytes; use crate::foundations::Bytes;
use crate::layout::Axes; use crate::layout::Axes;
use crate::loading::Data;
use crate::text::{ use crate::text::{
Font, FontBook, FontFlags, FontStretch, FontStyle, FontVariant, FontWeight, Font, FontBook, FontFlags, FontStretch, FontStyle, FontVariant, FontWeight,
}; };
@ -133,7 +134,12 @@ fn format_usvg_error(error: usvg::Error) -> EcoString {
usvg::Error::InvalidSize => { usvg::Error::InvalidSize => {
"failed to parse SVG (width, height, or viewbox is invalid)".into() "failed to parse SVG (width, height, or viewbox is invalid)".into()
} }
usvg::Error::ParsingFailed(error) => format_xml_like_error("SVG", error), usvg::Error::ParsingFailed(error) => {
format_xml_like_error("SVG", &Data::dummy(), error)
.pop()
.unwrap()
.message
}
} }
} }