feat: show external error messages in csv, json, toml, xml, yaml, and syntax/theme files

This commit is contained in:
Tobias Schmitz 2025-05-16 12:12:09 +02:00
parent 9e820f883e
commit 7e6c3b4159
No known key found for this signature in database
14 changed files with 419 additions and 218 deletions

View File

@ -12,6 +12,7 @@ use typst_syntax::package::{PackageSpec, PackageVersion};
use typst_syntax::{Span, Spanned, SyntaxError};
use crate::engine::Engine;
use crate::loading::{Data, LineCol};
use crate::{World, WorldExt};
/// Early-return with a [`StrResult`] or [`SourceResult`].
@ -569,30 +570,28 @@ impl From<PackageError> for EcoString {
}
/// Format a user-facing error message for an XML-like file format.
pub fn format_xml_like_error(format: &str, error: roxmltree::Error) -> EcoString {
match error {
roxmltree::Error::UnexpectedCloseTag(expected, actual, pos) => {
eco_format!(
"failed to parse {format} (found closing tag '{actual}' \
instead of '{expected}' in line {})",
pos.row
)
pub fn format_xml_like_error(
format: &str,
data: &Data,
error: roxmltree::Error,
) -> EcoVec<SourceDiagnostic> {
let pos = LineCol::one_based(error.pos().row as usize, error.pos().col as usize);
let msg = format!("failed to parse {format}");
let err = match error {
roxmltree::Error::UnexpectedCloseTag(expected, actual, _) => {
format!("found closing tag '{actual}' instead of '{expected}'")
}
roxmltree::Error::UnknownEntityReference(entity, pos) => {
eco_format!(
"failed to parse {format} (unknown entity '{entity}' in line {})",
pos.row
)
roxmltree::Error::UnknownEntityReference(entity, _) => {
format!("unknown entity '{entity}'")
}
roxmltree::Error::DuplicatedAttribute(attr, pos) => {
eco_format!(
"failed to parse {format} (duplicate attribute '{attr}' in line {})",
pos.row
)
roxmltree::Error::DuplicatedAttribute(attr, _) => {
format!("duplicate attribute '{attr}'")
}
roxmltree::Error::NoRootNode => {
eco_format!("failed to parse {format} (missing root node)")
format!("missing root node")
}
err => eco_format!("failed to parse {format} ({err})"),
}
err => err.to_string(),
};
data.err_at(pos, msg, err)
}

View File

@ -152,7 +152,7 @@ pub fn plugin(
source: Spanned<DataSource>,
) -> SourceResult<Module> {
let data = source.load(engine.world)?;
Plugin::module(data).at(source.span)
Plugin::module(data.bytes).at(source.span)
}
#[scope]

View File

@ -24,7 +24,7 @@ pub fn cbor(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
ciborium::from_reader(data.as_slice())
ciborium::from_reader(data.bytes.as_slice())
.map_err(|err| eco_format!("failed to parse CBOR ({err})"))
.at(source.span)
}

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoString};
use ecow::EcoVec;
use typst_syntax::Spanned;
use crate::diag::{bail, At, SourceResult};
use crate::diag::{bail, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
use crate::loading::{DataSource, Load, Readable};
use crate::loading::{Data, DataSource, LineCol, Load, Readable, ReportPos};
/// Reads structured data from a CSV file.
///
@ -53,7 +53,7 @@ pub fn csv(
// Counting lines from 1 by default.
let mut line_offset: usize = 1;
let mut reader = builder.from_reader(data.as_slice());
let mut reader = builder.from_reader(data.bytes.as_slice());
let mut headers: Option<::csv::StringRecord> = None;
if has_headers {
@ -62,9 +62,8 @@ pub fn csv(
headers = Some(
reader
.headers()
.map_err(|err| format_csv_error(err, 1))
.at(source.span)?
.clone(),
.cloned()
.map_err(|err| format_csv_error(&data, err, 1))?,
);
}
@ -74,7 +73,7 @@ pub fn csv(
// incorrect with `has_headers` set to `false`. See issue:
// https://github.com/BurntSushi/rust-csv/issues/184
let line = line + line_offset;
let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?;
let row = result.map_err(|err| format_csv_error(&data, err, line))?;
let item = if let Some(headers) = &headers {
let mut dict = Dict::new();
for (field, value) in headers.iter().zip(&row) {
@ -164,15 +163,25 @@ cast! {
}
/// Format the user-facing CSV error message.
fn format_csv_error(err: ::csv::Error, line: usize) -> EcoString {
fn format_csv_error(
data: &Data,
err: ::csv::Error,
line: usize,
) -> EcoVec<SourceDiagnostic> {
let msg = "failed to parse CSV";
let pos = (err.kind().position())
.map(|pos| {
let start = pos.byte() as usize;
ReportPos::Range(start..start)
})
.unwrap_or(LineCol::one_based(line, 1).into());
match err.kind() {
::csv::ErrorKind::Utf8 { .. } => "file is not valid utf-8".into(),
::csv::ErrorKind::Utf8 { .. } => data.err_at(pos, msg, "file is not valid utf-8"),
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
eco_format!(
"failed to parse CSV (found {len} instead of \
{expected_len} fields in line {line})"
)
let err =
format!("found {len} instead of {expected_len} fields in line {line}");
data.err_at(pos, msg, err)
}
_ => eco_format!("failed to parse CSV ({err})"),
_ => data.err_at(pos, "failed to parse CSV", err),
}
}

View File

@ -4,7 +4,7 @@ use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Readable};
use crate::loading::{DataSource, LineCol, Load, Readable};
/// Reads structured data from a JSON file.
///
@ -55,9 +55,10 @@ pub fn json(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
serde_json::from_slice(data.as_slice())
.map_err(|err| eco_format!("failed to parse JSON ({err})"))
.at(source.span)
serde_json::from_slice(data.bytes.as_slice()).map_err(|err| {
let pos = LineCol::one_based(err.line(), err.column());
data.err_at(pos, "failed to parse JSON", err)
})
}
#[scope]

View File

@ -16,8 +16,8 @@ mod xml_;
mod yaml_;
use comemo::Tracked;
use ecow::EcoString;
use typst_syntax::Spanned;
use ecow::{eco_vec, EcoString, EcoVec};
use typst_syntax::{FileId, Span, Spanned};
pub use self::cbor_::*;
pub use self::csv_::*;
@ -27,7 +27,7 @@ pub use self::toml_::*;
pub use self::xml_::*;
pub use self::yaml_::*;
use crate::diag::{At, SourceResult};
use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
use crate::foundations::OneOrMultiple;
use crate::foundations::{cast, Bytes, Scope, Str};
use crate::World;
@ -74,39 +74,44 @@ pub trait Load {
}
impl Load for Spanned<DataSource> {
type Output = Bytes;
type Output = Data;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> {
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
self.as_ref().load(world)
}
}
impl Load for Spanned<&DataSource> {
type Output = Bytes;
type Output = Data;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> {
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
match &self.v {
DataSource::Path(path) => {
let file_id = self.span.resolve_path(path).at(self.span)?;
world.file(file_id).at(self.span)
let bytes = world.file(file_id).at(self.span)?;
let source = Spanned::new(LoadSource::Path(file_id), self.span);
Ok(Data::new(source, bytes))
}
DataSource::Bytes(bytes) => {
let source = Spanned::new(LoadSource::Bytes, self.span);
Ok(Data::new(source, bytes.clone()))
}
DataSource::Bytes(bytes) => Ok(bytes.clone()),
}
}
}
impl Load for Spanned<OneOrMultiple<DataSource>> {
type Output = Vec<Bytes>;
type Output = Vec<Data>;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> {
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
self.as_ref().load(world)
}
}
impl Load for Spanned<&OneOrMultiple<DataSource>> {
type Output = Vec<Bytes>;
type Output = Vec<Data>;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> {
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output> {
self.v
.0
.iter()
@ -115,6 +120,222 @@ impl Load for Spanned<&OneOrMultiple<DataSource>> {
}
}
/// Data loaded from a [`DataSource`].
#[derive(Clone, Hash)]
pub struct Data {
pub source: Spanned<LoadSource>,
pub bytes: Bytes,
}
impl Data {
pub fn dummy() -> Self {
Data::new(
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
Bytes::new([]),
)
}
pub fn new(source: Spanned<LoadSource>, bytes: Bytes) -> Self {
Self { source, bytes }
}
pub fn as_str(&self) -> SourceResult<&str> {
self.bytes.as_str().map_err(|err| {
// TODO: should the error even be reported in the file if it's possibly binary?
let start = err.valid_up_to();
let end = start + err.error_len().unwrap_or(0);
self.err_at(start..end, "failed to convert to string", FileError::from(err))
})
}
/// Report an error, possibly in an external file.
pub fn err_at(
&self,
pos: impl Into<ReportPos>,
msg: impl std::fmt::Display,
error: impl std::fmt::Display,
) -> EcoVec<SourceDiagnostic> {
let pos = pos.into();
let error = match self.source.v {
LoadSource::Path(file_id) => {
if let Some(range) = pos.range(self.bytes.as_slice()) {
let span = Span::from_range(file_id, range);
return eco_vec!(error!(span, "{msg} ({error})"));
}
// Either there was no range provided, or resolving the range
// from the line/column failed. If present report the possibly
// wrong line/column anyway.
let span = Span::from_range(file_id, 0..self.bytes.len());
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
let (line, col) = pair.numbers();
error!(span, "{msg} ({error} at {line}:{col})")
} else {
error!(span, "{msg} ({error})")
}
}
LoadSource::Bytes => {
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
let (line, col) = pair.numbers();
error!(self.source.span, "{msg} ({error} at {line}:{col})")
} else {
error!(self.source.span, "{msg} ({error})")
}
}
};
eco_vec![error]
}
}
#[derive(Debug, Default)]
pub enum ReportPos {
/// Contains the range, and the 0-based line/column.
Full(std::ops::Range<usize>, LineCol),
/// Contains the range.
Range(std::ops::Range<usize>),
/// Contains the 0-based line/column.
LineCol(LineCol),
#[default]
None,
}
impl From<std::ops::Range<usize>> for ReportPos {
fn from(value: std::ops::Range<usize>) -> Self {
Self::Range(value)
}
}
impl From<LineCol> for ReportPos {
fn from(value: LineCol) -> Self {
Self::LineCol(value)
}
}
impl ReportPos {
fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
match self {
ReportPos::Full(range, _) => Some(range.clone()),
ReportPos::Range(range) => Some(range.clone()),
&ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
ReportPos::None => None,
}
}
fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
match self {
&ReportPos::Full(_, pair) => Some(pair),
ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
&ReportPos::LineCol(pair) => Some(pair),
ReportPos::None => None,
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct LineCol {
/// The 0-based line.
line: usize,
/// The 0-based column.
col: usize,
}
impl LineCol {
/// Constructs the line/column pair from 0-based indices.
pub fn zero_based(line: usize, col: usize) -> Self {
Self { line, col }
}
/// Constructs the line/column pair from 1-based numbers.
pub fn one_based(line: usize, col: usize) -> Self {
Self {
line: line.saturating_sub(1),
col: col.saturating_sub(1),
}
}
// TODO: this function should only return None if the position is out of
// bounds not if there is invalid utf-8
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
let bytes = &bytes[..pos];
let mut line = 0;
let line_start = memchr::memchr_iter(b'\n', bytes)
.inspect(|_| line += 1)
.last()
.map(|i| i + 1)
.unwrap_or(bytes.len());
// TODO: streaming-utf8 decoding ignore invalid characters
// might neeed to update error reporting too (use utf8_iter)
let str = std::str::from_utf8(&bytes[line_start..]).ok()?;
let col = str.chars().count();
Some(LineCol::zero_based(line, col))
}
pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
let line_offset = if let Some(idx) = self.line.checked_sub(1) {
memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
} else {
0
};
let col_offset = col_offset(line_offset, self.col, bytes)?;
let pos = line_offset + col_offset;
Some(pos)
}
pub fn byte_range(
range: std::ops::Range<Self>,
bytes: &[u8],
) -> Option<std::ops::Range<usize>> {
let mut line_iter = memchr::memchr_iter(b'\n', bytes);
let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
line_iter.nth(idx).map(|i| i + 1)?
} else {
0
};
let line_delta = range.end.line - range.start.line;
let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
line_iter.nth(idx).map(|i| i + 1)?
} else {
start_line_offset
};
let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
let start = start_line_offset + start_col_offset;
let end = end_line_offset + end_col_offset;
Some(start..end)
}
pub fn numbers(&self) -> (usize, usize) {
(self.line + 1, self.col + 1)
}
}
// TODO: this function should only return None if the position is out of
// bounds not if there is invalid utf-8
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
let line = &bytes[line_offset..];
// TODO: streaming-utf8 decoding ignore invalid characters
// might neeed to update error reporting too (use utf8_iter)
// validate the whole line, so it can be displayed
let len = memchr::memchr(b'\n', line).unwrap_or(line.len());
let str = std::str::from_utf8(&line[..len]).ok()?;
if let Some(idx) = col.checked_sub(1) {
str.char_indices().nth(idx).map(|(i, c)| i + c.len_utf8())
} else {
Some(0)
}
}
/// A loaded [`DataSource`].
#[derive(Clone, Copy, Hash)]
pub enum LoadSource {
Path(FileId),
Bytes,
}
/// A value that can be read from a file.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum Readable {

View File

@ -1,11 +1,10 @@
use ecow::EcoString;
use typst_syntax::Spanned;
use crate::diag::{At, FileError, SourceResult};
use crate::diag::SourceResult;
use crate::engine::Engine;
use crate::foundations::{func, Cast};
use crate::loading::Readable;
use crate::World;
use crate::loading::{DataSource, Load, Readable};
/// Reads plain text or data from a file.
///
@ -36,14 +35,10 @@ pub fn read(
#[default(Some(Encoding::Utf8))]
encoding: Option<Encoding>,
) -> SourceResult<Readable> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
let data = path.map(DataSource::Path).load(engine.world)?;
Ok(match encoding {
None => Readable::Bytes(data),
Some(Encoding::Utf8) => {
Readable::Str(data.to_str().map_err(FileError::from).at(span)?)
}
None => Readable::Bytes(data.bytes),
Some(Encoding::Utf8) => Readable::Str(data.as_str()?.into()),
})
}

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoString};
use typst_syntax::{is_newline, Spanned};
use ecow::{eco_format, EcoVec};
use typst_syntax::Spanned;
use crate::diag::{At, FileError, SourceResult};
use crate::diag::{At, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Readable};
use crate::loading::{Data, DataSource, Load, Readable, ReportPos};
/// Reads structured data from a TOML file.
///
@ -33,10 +33,8 @@ pub fn toml(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
let raw = data.as_str().map_err(FileError::from).at(source.span)?;
::toml::from_str(raw)
.map_err(|err| format_toml_error(err, raw))
.at(source.span)
let raw = data.as_str()?;
::toml::from_str(raw).map_err(|err| format_toml_error(&data, err))
}
#[scope]
@ -71,15 +69,7 @@ impl toml {
}
/// Format the user-facing TOML error message.
fn format_toml_error(error: ::toml::de::Error, raw: &str) -> EcoString {
if let Some(head) = error.span().and_then(|range| raw.get(..range.start)) {
let line = head.lines().count();
let column = 1 + head.chars().rev().take_while(|&c| !is_newline(c)).count();
eco_format!(
"failed to parse TOML ({} at line {line} column {column})",
error.message(),
)
} else {
eco_format!("failed to parse TOML ({})", error.message())
}
fn format_toml_error(data: &Data, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
let pos = error.span().map(ReportPos::Range).unwrap_or_default();
data.err_at(pos, "failed to parse TOML", error.message())
}

View File

@ -1,11 +1,11 @@
use ecow::EcoString;
use ecow::EcoVec;
use roxmltree::ParsingOptions;
use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, At, FileError, SourceResult};
use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
use crate::loading::{DataSource, Load, Readable};
use crate::loading::{Data, DataSource, Load, Readable};
/// Reads structured data from an XML file.
///
@ -62,13 +62,12 @@ pub fn xml(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
let text = data.as_str().map_err(FileError::from).at(source.span)?;
let text = data.as_str()?;
let document = roxmltree::Document::parse_with_options(
text,
ParsingOptions { allow_dtd: true, ..Default::default() },
)
.map_err(format_xml_error)
.at(source.span)?;
.map_err(|err| format_xml_error(&data, err))?;
Ok(convert_xml(document.root()))
}
@ -111,6 +110,6 @@ fn convert_xml(node: roxmltree::Node) -> Value {
}
/// Format the user-facing XML error message.
fn format_xml_error(error: roxmltree::Error) -> EcoString {
format_xml_like_error("XML", error)
fn format_xml_error(data: &Data, error: roxmltree::Error) -> EcoVec<SourceDiagnostic> {
format_xml_like_error("XML", data, error)
}

View File

@ -1,10 +1,10 @@
use ecow::eco_format;
use ecow::{eco_format, EcoVec};
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::diag::{At, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, Load, Readable};
use crate::loading::{Data, DataSource, LineCol, Load, Readable, ReportPos};
/// Reads structured data from a YAML file.
///
@ -45,9 +45,8 @@ pub fn yaml(
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
serde_yaml::from_slice(data.as_slice())
.map_err(|err| eco_format!("failed to parse YAML ({err})"))
.at(source.span)
serde_yaml::from_slice(data.bytes.as_slice())
.map_err(|err| format_yaml_error(&data, err))
}
#[scope]
@ -76,3 +75,18 @@ impl yaml {
.at(span)
}
}
pub fn format_yaml_error(
data: &Data,
error: serde_yaml::Error,
) -> EcoVec<SourceDiagnostic> {
let pos = error
.location()
.map(|loc| {
let line_col = LineCol::one_based(loc.line(), loc.column());
let range = loc.index()..loc.index();
ReportPos::Full(range, line_col)
})
.unwrap_or_default();
data.err_at(pos, "failed to parse YAML", error)
}

View File

@ -7,7 +7,7 @@ use std::path::Path;
use std::sync::{Arc, LazyLock};
use comemo::{Track, Tracked};
use ecow::{eco_format, eco_vec, EcoString, EcoVec};
use ecow::{eco_format, EcoString, EcoVec};
use hayagriva::archive::ArchivedStyle;
use hayagriva::io::BibLaTeXError;
use hayagriva::{
@ -16,12 +16,11 @@ use hayagriva::{
};
use indexmap::IndexMap;
use smallvec::{smallvec, SmallVec};
use typst_syntax::{FileId, Span, Spanned};
use typst_syntax::{Span, Spanned};
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
use crate::diag::{
bail, error, At, FileError, HintedStrResult, SourceDiagnostic, SourceResult,
StrResult,
bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
};
use crate::engine::{Engine, Sink};
use crate::foundations::{
@ -34,7 +33,7 @@ use crate::layout::{
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
Sides, Sizing, TrackSizings,
};
use crate::loading::{DataSource, Load};
use crate::loading::{format_yaml_error, Data, DataSource, Load, LoadSource, ReportPos};
use crate::model::{
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
Url,
@ -291,46 +290,28 @@ impl LocalName for Packed<BibliographyElem> {
#[derive(Clone, PartialEq, Hash)]
pub struct Bibliography(Arc<ManuallyHash<IndexMap<Label, hayagriva::Entry>>>);
#[derive(Clone, Copy, Hash)]
enum LibSource {
Path(FileId),
Bytes,
}
impl Bibliography {
/// Load a bibliography from data sources.
fn load(
world: Tracked<dyn World + '_>,
sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Self>> {
let data = (sources.v.0.iter())
.map(|source| match source {
DataSource::Path(path) => {
let file_id = sources.span.resolve_path(path).at(sources.span)?;
let bytes = world.file(file_id).at(sources.span)?;
Ok((LibSource::Path(file_id), bytes))
}
DataSource::Bytes(bytes) => Ok((LibSource::Bytes, bytes.clone())),
})
.collect::<SourceResult<Vec<_>>>()?;
let bibliography = Self::decode(sources.span, &data)?;
let data = sources.load(world)?;
let bibliography = Self::decode(&data)?;
Ok(Derived::new(sources.v, bibliography))
}
/// Decode a bibliography from loaded data sources.
#[comemo::memoize]
#[typst_macros::time(name = "load bibliography")]
fn decode(
source_span: Span,
data: &[(LibSource, Bytes)],
) -> SourceResult<Bibliography> {
fn decode(data: &[Data]) -> SourceResult<Bibliography> {
let mut map = IndexMap::new();
// TODO: store spans of entries for duplicate key error messages
let mut duplicates = Vec::<EcoString>::new();
// We might have multiple bib/yaml files
for (source, bytes) in data.iter() {
let library = decode_library(source_span, *source, bytes)?;
for d in data.iter() {
let library = decode_library(d)?;
for entry in library {
match map.entry(Label::new(PicoStr::intern(entry.key()))) {
indexmap::map::Entry::Vacant(vacant) => {
@ -344,8 +325,10 @@ impl Bibliography {
}
if !duplicates.is_empty() {
// TODO: errors with spans of source files
bail!(source_span, "duplicate bibliography keys: {}", duplicates.join(", "));
// TODO: errors with spans of source files,
// requires hayagriva entries to store the range
let span = data.first().unwrap().source.span;
bail!(span, "duplicate bibliography keys: {}", duplicates.join(", "));
}
Ok(Bibliography(Arc::new(ManuallyHash::new(map, typst_utils::hash128(data)))))
@ -371,14 +354,10 @@ impl Debug for Bibliography {
}
/// Decode on library from one data source.
fn decode_library(
source_span: Span,
source: LibSource,
data: &Bytes,
) -> SourceResult<Library> {
let data = data.as_str().map_err(FileError::from).at(source_span)?;
fn decode_library(data: &Data) -> SourceResult<Library> {
let str = data.as_str()?;
if let LibSource::Path(file_id) = source {
if let LoadSource::Path(file_id) = data.source.v {
// If we got a path, use the extension to determine whether it is
// YAML or BibLaTeX.
let ext = file_id
@ -389,29 +368,25 @@ fn decode_library(
.unwrap_or_default();
match ext.to_lowercase().as_str() {
"yml" | "yaml" => hayagriva::io::from_yaml_str(data).map_err(|err| {
let start = err.location().map(|loc| loc.index()).unwrap_or(0);
let span = Span::from_range(file_id, start..start);
eco_vec![error!(span, "failed to parse YAML {err}")]
}),
"bib" => hayagriva::io::from_biblatex_str(data).map_err(|errors| {
eco_vec![format_biblatex_error(source_span, source, data, errors)]
}),
"yml" | "yaml" => hayagriva::io::from_yaml_str(str)
.map_err(|err| format_yaml_error(data, err)),
"bib" => hayagriva::io::from_biblatex_str(str)
.map_err(|errors| format_biblatex_error(data, errors)),
_ => bail!(
source_span,
data.source.span,
"unknown bibliography format (must be .yml/.yaml or .bib)"
),
}
} else {
// If we just got bytes, we need to guess. If it can be decoded as
// hayagriva YAML, we'll use that.
let haya_err = match hayagriva::io::from_yaml_str(data) {
let haya_err = match hayagriva::io::from_yaml_str(str) {
Ok(library) => return Ok(library),
Err(err) => err,
};
// If it can be decoded as BibLaTeX, we use that isntead.
let bib_errs = match hayagriva::io::from_biblatex_str(data) {
let bib_errs = match hayagriva::io::from_biblatex_str(str) {
// If the file is almost valid yaml, but contains no `@` character
// it will be successfully parsed as an empty BibLaTeX library,
// since BibLaTeX does support arbitrary text outside of entries.
@ -425,7 +400,7 @@ fn decode_library(
// and emit the more appropriate error.
let mut yaml = 0;
let mut biblatex = 0;
for c in data.chars() {
for c in str.chars() {
match c {
':' => yaml += 1,
'{' => biblatex += 1,
@ -435,53 +410,30 @@ fn decode_library(
match bib_errs {
Some(bib_errs) if biblatex >= yaml => {
bail!(format_biblatex_error(source_span, source, data, bib_errs))
}
_ => {
if let Some(loc) = haya_err.location() {
let line = loc.line();
bail!(source_span, "failed to parse YAML ({line}: {haya_err})")
} else {
bail!(source_span, "failed to parse YAML ({haya_err})")
}
Err(format_biblatex_error(data, bib_errs))
}
_ => Err(format_yaml_error(data, haya_err)),
}
}
}
/// Format a BibLaTeX loading error.
fn format_biblatex_error(
source_span: Span,
source: LibSource,
data: &str,
data: &Data,
errors: Vec<BibLaTeXError>,
) -> SourceDiagnostic {
) -> EcoVec<SourceDiagnostic> {
// TODO: return multiple errors?
let Some(error) = errors.first() else {
return match source {
LibSource::Path(file_id) => {
let span = Span::from_range(file_id, 0..0);
error!(span, "failed to parse BibLaTeX file")
}
LibSource::Bytes => error!(source_span, "failed to parse BibLaTeX"),
};
let Some(error) = errors.into_iter().next() else {
// TODO: can this even happen, should we just unwrap?
return data.err_at(ReportPos::None, "failed to parse BibLaTeX", "???");
};
let (range, msg) = match error {
BibLaTeXError::Parse(error) => (&error.span, error.kind.to_string()),
BibLaTeXError::Type(error) => (&error.span, error.kind.to_string()),
BibLaTeXError::Parse(error) => (error.span, error.kind.to_string()),
BibLaTeXError::Type(error) => (error.span, error.kind.to_string()),
};
match source {
LibSource::Path(file_id) => {
let span = Span::from_range(file_id, range.clone());
error!(span, "failed to parse BibLaTeX file ({msg})")
}
LibSource::Bytes => {
let line = data.get(..range.start).unwrap_or_default().lines().count();
error!(source_span, "failed to parse BibLaTeX ({line}: {msg})")
}
}
data.err_at(range, "failed to parse BibLaTeX", msg)
}
/// A loaded CSL style.
@ -498,7 +450,7 @@ impl CslStyle {
CslSource::Named(style) => Self::from_archived(*style),
CslSource::Normal(source) => {
let data = Spanned::new(source, span).load(world)?;
Self::from_data(data).at(span)?
Self::from_data(&data)?
}
};
Ok(Derived::new(source, style))
@ -519,8 +471,8 @@ impl CslStyle {
/// Load a CSL style from file contents.
#[comemo::memoize]
pub fn from_data(data: Bytes) -> StrResult<CslStyle> {
let text = data.as_str().map_err(FileError::from)?;
pub fn from_data(data: &Data) -> SourceResult<CslStyle> {
let text = data.as_str()?;
citationberg::IndependentStyle::from_xml(text)
.map(|style| {
Self(Arc::new(ManuallyHash::new(
@ -528,7 +480,7 @@ impl CslStyle {
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
)))
})
.map_err(|err| eco_format!("failed to load CSL style ({err})"))
.map_err(|err| data.err_at(ReportPos::None, "failed to load CSL style", err))
}
/// Get the underlying independent style.

View File

@ -3,23 +3,23 @@ use std::ops::Range;
use std::sync::{Arc, LazyLock};
use comemo::Tracked;
use ecow::{eco_format, EcoString, EcoVec};
use syntect::highlighting as synt;
use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
use ecow::{EcoString, EcoVec};
use syntect::highlighting::{self as synt};
use syntect::parsing::{ParseSyntaxError, SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
use typst_syntax::{split_newlines, LinkedNode, Span, Spanned};
use typst_utils::ManuallyHash;
use unicode_segmentation::UnicodeSegmentation;
use super::Lang;
use crate::diag::{At, FileError, SourceResult, StrResult};
use crate::diag::{SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed,
PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
};
use crate::html::{tag, HtmlElem};
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
use crate::loading::{DataSource, Load};
use crate::loading::{Data, DataSource, LineCol, Load, ReportPos};
use crate::model::{Figurable, ParElem};
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
use crate::visualize::Color;
@ -540,32 +540,18 @@ impl RawSyntax {
sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
let data = sources.load(world)?;
let list = sources
.v
.0
.iter()
.zip(&data)
.map(|(source, data)| Self::decode(source, data))
.collect::<StrResult<_>>()
.at(sources.span)?;
let list = data.iter().map(Self::decode).collect::<SourceResult<_>>()?;
Ok(Derived::new(sources.v, list))
}
/// Decode a syntax from a loaded source.
#[comemo::memoize]
#[typst_macros::time(name = "load syntaxes")]
fn decode(source: &DataSource, data: &Bytes) -> StrResult<RawSyntax> {
let src = data.as_str().map_err(FileError::from)?;
let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err(
|err| match source {
DataSource::Path(path) => {
eco_format!("failed to parse syntax file `{path}` ({err})")
}
DataSource::Bytes(_) => {
eco_format!("failed to parse syntax ({err})")
}
},
)?;
fn decode(data: &Data) -> SourceResult<RawSyntax> {
let str = data.as_str()?;
let syntax = SyntaxDefinition::load_from_str(str, false, None)
.map_err(|err| format_syntax_error(data, err))?;
let mut builder = SyntaxSetBuilder::new();
builder.add(syntax);
@ -582,6 +568,24 @@ impl RawSyntax {
}
}
fn format_syntax_error(data: &Data, error: ParseSyntaxError) -> EcoVec<SourceDiagnostic> {
let pos = syntax_error_pos(&error);
data.err_at(pos, "failed to parse syntax", error)
}
fn syntax_error_pos(error: &ParseSyntaxError) -> ReportPos {
match error {
ParseSyntaxError::InvalidYaml(scan_error) => {
let m = scan_error.marker();
ReportPos::Full(
m.index()..m.index(),
LineCol::one_based(m.line(), m.col() + 1),
)
}
_ => ReportPos::None,
}
}
/// A loaded syntect theme.
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
@ -593,16 +597,16 @@ impl RawTheme {
source: Spanned<DataSource>,
) -> SourceResult<Derived<DataSource, Self>> {
let data = source.load(world)?;
let theme = Self::decode(&data).at(source.span)?;
let theme = Self::decode(&data)?;
Ok(Derived::new(source.v, theme))
}
/// Decode a theme from bytes.
#[comemo::memoize]
fn decode(data: &Bytes) -> StrResult<RawTheme> {
let mut cursor = std::io::Cursor::new(data.as_slice());
fn decode(data: &Data) -> SourceResult<RawTheme> {
let mut cursor = std::io::Cursor::new(data.bytes.as_slice());
let theme = synt::ThemeSet::load_from_reader(&mut cursor)
.map_err(|err| eco_format!("failed to parse theme ({err})"))?;
.map_err(|err| format_theme_error(data, err))?;
Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data)))))
}
@ -612,6 +616,17 @@ impl RawTheme {
}
}
fn format_theme_error(
data: &Data,
error: syntect::LoadingError,
) -> EcoVec<SourceDiagnostic> {
let pos = match &error {
syntect::LoadingError::ParseSyntax(err, _) => syntax_error_pos(err),
_ => ReportPos::None,
};
data.err_at(pos, "failed to parse theme", error)
}
/// A highlighted line of raw text.
///
/// This is a helper element that is synthesized by [`raw`] elements.

View File

@ -66,7 +66,7 @@ pub struct ImageElem {
#[parse(
let source = args.expect::<Spanned<DataSource>>("source")?;
let data = source.load(engine.world)?;
Derived::new(source.v, data)
Derived::new(source.v, data.bytes)
)]
pub source: Derived<DataSource, Bytes>,
@ -155,7 +155,7 @@ pub struct ImageElem {
#[parse(match args.named::<Spanned<Smart<DataSource>>>("icc")? {
Some(Spanned { v: Smart::Custom(source), span }) => Some(Smart::Custom({
let data = Spanned::new(&source, span).load(engine.world)?;
Derived::new(source, data)
Derived::new(source, data.bytes)
})),
Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
None => None,

View File

@ -9,6 +9,7 @@ use siphasher::sip128::{Hasher128, SipHasher13};
use crate::diag::{format_xml_like_error, StrResult};
use crate::foundations::Bytes;
use crate::layout::Axes;
use crate::loading::Data;
use crate::text::{
Font, FontBook, FontFlags, FontStretch, FontStyle, FontVariant, FontWeight,
};
@ -133,7 +134,12 @@ fn format_usvg_error(error: usvg::Error) -> EcoString {
usvg::Error::InvalidSize => {
"failed to parse SVG (width, height, or viewbox is invalid)".into()
}
usvg::Error::ParsingFailed(error) => format_xml_like_error("SVG", error),
usvg::Error::ParsingFailed(error) => {
format_xml_like_error("SVG", &Data::dummy(), error)
.pop()
.unwrap()
.message
}
}
}