diff --git a/crates/typst-eval/src/import.rs b/crates/typst-eval/src/import.rs index 5b67c0608..2060d25f1 100644 --- a/crates/typst-eval/src/import.rs +++ b/crates/typst-eval/src/import.rs @@ -211,7 +211,7 @@ fn resolve_package( // Evaluate the manifest. let manifest_id = FileId::new(Some(spec.clone()), VirtualPath::new("typst.toml")); let bytes = engine.world.file(manifest_id).at(span)?; - let string = std::str::from_utf8(&bytes).map_err(FileError::from).at(span)?; + let string = bytes.as_str().map_err(FileError::from).at(span)?; let manifest: PackageManifest = toml::from_str(string) .map_err(|err| eco_format!("package manifest is malformed ({})", err.message())) .at(span)?; diff --git a/crates/typst-ide/src/complete.rs b/crates/typst-ide/src/complete.rs index c22ea7e40..0f8abddb7 100644 --- a/crates/typst-ide/src/complete.rs +++ b/crates/typst-ide/src/complete.rs @@ -817,19 +817,8 @@ fn param_value_completions<'a>( ) { if param.name == "font" { ctx.font_completions(); - } else if param.name == "path" { - ctx.file_completions_with_extensions(match func.name() { - Some("image") => &["png", "jpg", "jpeg", "gif", "svg", "svgz"], - Some("csv") => &["csv"], - Some("plugin") => &["wasm"], - Some("cbor") => &["cbor"], - Some("json") => &["json"], - Some("toml") => &["toml"], - Some("xml") => &["xml"], - Some("yaml") => &["yml", "yaml"], - Some("bibliography") => &["bib", "yml", "yaml"], - _ => &[], - }); + } else if let Some(extensions) = path_completion(func, param) { + ctx.file_completions_with_extensions(extensions); } else if func.name() == Some("figure") && param.name == "body" { ctx.snippet_completion("image", "image(\"${}\"),", "An image in a figure."); ctx.snippet_completion("table", "table(\n ${}\n),", "A table in a figure."); @@ -838,6 +827,28 @@ fn param_value_completions<'a>( ctx.cast_completions(¶m.input); } +/// Returns which file extensions to complete for the given parameter if any. +fn path_completion(func: &Func, param: &ParamInfo) -> Option<&'static [&'static str]> { + Some(match (func.name(), param.name) { + (Some("image"), "source") => &["png", "jpg", "jpeg", "gif", "svg", "svgz"], + (Some("csv"), "source") => &["csv"], + (Some("plugin"), "source") => &["wasm"], + (Some("cbor"), "source") => &["cbor"], + (Some("json"), "source") => &["json"], + (Some("toml"), "source") => &["toml"], + (Some("xml"), "source") => &["xml"], + (Some("yaml"), "source") => &["yml", "yaml"], + (Some("bibliography"), "sources") => &["bib", "yml", "yaml"], + (Some("bibliography"), "style") => &["csl"], + (Some("cite"), "style") => &["csl"], + (Some("raw"), "syntaxes") => &["sublime-syntax"], + (Some("raw"), "theme") => &["tmtheme"], + (Some("embed"), "path") => &[], + (None, "path") => &[], + _ => return None, + }) +} + /// Resolve a callee expression to a global function. fn resolve_global_callee<'a>( ctx: &CompletionContext<'a>, diff --git a/crates/typst-layout/src/image.rs b/crates/typst-layout/src/image.rs index 59e2c0210..e521b993f 100644 --- a/crates/typst-layout/src/image.rs +++ b/crates/typst-layout/src/image.rs @@ -1,13 +1,13 @@ use std::ffi::OsStr; -use typst_library::diag::{bail, warning, At, SourceResult, StrResult}; +use typst_library::diag::{warning, At, SourceResult, StrResult}; use typst_library::engine::Engine; -use typst_library::foundations::{Packed, Smart, StyleChain}; +use typst_library::foundations::{Bytes, Derived, Packed, Smart, StyleChain}; use typst_library::introspection::Locator; use typst_library::layout::{ Abs, Axes, FixedAlignment, Frame, FrameItem, Point, Region, Size, }; -use typst_library::loading::Readable; +use typst_library::loading::DataSource; use typst_library::text::families; use typst_library::visualize::{ Curve, Image, ImageElem, ImageFit, ImageFormat, RasterFormat, VectorFormat, @@ -26,17 +26,17 @@ pub fn layout_image( // Take the format that was explicitly defined, or parse the extension, // or try to detect the format. - let data = elem.data(); + let Derived { source, derived: data } = &elem.source; let format = match elem.format(styles) { Smart::Custom(v) => v, - Smart::Auto => determine_format(elem.path().as_str(), data).at(span)?, + Smart::Auto => determine_format(source, data).at(span)?, }; // Warn the user if the image contains a foreign object. Not perfect // because the svg could also be encoded, but that's an edge case. if format == ImageFormat::Vector(VectorFormat::Svg) { let has_foreign_object = - data.as_str().is_some_and(|s| s.contains(" StrResult { - let ext = std::path::Path::new(path) - .extension() - .and_then(OsStr::to_str) - .unwrap_or_default() - .to_lowercase(); +/// Try to determine the image format based on the data. +fn determine_format(source: &DataSource, data: &Bytes) -> StrResult { + if let DataSource::Path(path) = source { + let ext = std::path::Path::new(path.as_str()) + .extension() + .and_then(OsStr::to_str) + .unwrap_or_default() + .to_lowercase(); - Ok(match ext.as_str() { - "png" => ImageFormat::Raster(RasterFormat::Png), - "jpg" | "jpeg" => ImageFormat::Raster(RasterFormat::Jpg), - "gif" => ImageFormat::Raster(RasterFormat::Gif), - "svg" | "svgz" => ImageFormat::Vector(VectorFormat::Svg), - _ => match &data { - Readable::Str(_) => ImageFormat::Vector(VectorFormat::Svg), - Readable::Bytes(bytes) => match RasterFormat::detect(bytes) { - Some(f) => ImageFormat::Raster(f), - None => bail!("unknown image format"), - }, - }, - }) + match ext.as_str() { + "png" => return Ok(ImageFormat::Raster(RasterFormat::Png)), + "jpg" | "jpeg" => return Ok(ImageFormat::Raster(RasterFormat::Jpg)), + "gif" => return Ok(ImageFormat::Raster(RasterFormat::Gif)), + "svg" | "svgz" => return Ok(ImageFormat::Vector(VectorFormat::Svg)), + _ => {} + } + } + + Ok(ImageFormat::detect(data).ok_or("unknown image format")?) } diff --git a/crates/typst-library/src/foundations/array.rs b/crates/typst-library/src/foundations/array.rs index 4667ee765..e79a4e930 100644 --- a/crates/typst-library/src/foundations/array.rs +++ b/crates/typst-library/src/foundations/array.rs @@ -1124,6 +1124,53 @@ impl FromValue for SmallVec<[T; N]> { } } +/// One element, or multiple provided as an array. +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct OneOrMultiple(pub Vec); + +impl Reflect for OneOrMultiple { + fn input() -> CastInfo { + T::input() + Array::input() + } + + fn output() -> CastInfo { + T::output() + Array::output() + } + + fn castable(value: &Value) -> bool { + Array::castable(value) || T::castable(value) + } +} + +impl IntoValue for OneOrMultiple { + fn into_value(self) -> Value { + self.0.into_value() + } +} + +impl FromValue for OneOrMultiple { + fn from_value(value: Value) -> HintedStrResult { + if T::castable(&value) { + return Ok(Self(vec![T::from_value(value)?])); + } + if Array::castable(&value) { + return Ok(Self( + Array::from_value(value)? + .into_iter() + .map(|value| T::from_value(value)) + .collect::>()?, + )); + } + Err(Self::error(&value)) + } +} + +impl Default for OneOrMultiple { + fn default() -> Self { + Self(vec![]) + } +} + /// The error message when the array is empty. #[cold] fn array_is_empty() -> EcoString { diff --git a/crates/typst-library/src/foundations/bytes.rs b/crates/typst-library/src/foundations/bytes.rs index 20034d074..d633c99ad 100644 --- a/crates/typst-library/src/foundations/bytes.rs +++ b/crates/typst-library/src/foundations/bytes.rs @@ -2,6 +2,7 @@ use std::any::Any; use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::ops::{Add, AddAssign, Deref}; +use std::str::Utf8Error; use std::sync::Arc; use ecow::{eco_format, EcoString}; @@ -80,16 +81,37 @@ impl Bytes { self.as_slice().is_empty() } - /// Return a view into the buffer. + /// Return a view into the bytes. pub fn as_slice(&self) -> &[u8] { self } - /// Return a copy of the buffer as a vector. + /// Try to view the bytes as an UTF-8 string. + /// + /// If these bytes were created via `Bytes::from_string`, UTF-8 validation + /// is skipped. + pub fn as_str(&self) -> Result<&str, Utf8Error> { + self.inner().as_str() + } + + /// Return a copy of the bytes as a vector. pub fn to_vec(&self) -> Vec { self.as_slice().to_vec() } + /// Try to turn the bytes into a `Str`. + /// + /// - If these bytes were created via `Bytes::from_string::`, the + /// string is cloned directly. + /// - If these bytes were created via `Bytes::from_string`, but from a + /// different type of string, UTF-8 validation is still skipped. + pub fn to_str(&self) -> Result { + match self.inner().as_any().downcast_ref::() { + Some(string) => Ok(string.clone()), + None => self.as_str().map(Into::into), + } + } + /// Resolve an index or throw an out of bounds error. fn locate(&self, index: i64) -> StrResult { self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len())) @@ -104,6 +126,11 @@ impl Bytes { if index >= 0 { Some(index) } else { (len as i64).checked_add(index) }; wrapped.and_then(|v| usize::try_from(v).ok()).filter(|&v| v <= len) } + + /// Access the inner `dyn Bytelike`. + fn inner(&self) -> &dyn Bytelike { + &**self.0 + } } #[scope] @@ -203,7 +230,7 @@ impl Deref for Bytes { type Target = [u8]; fn deref(&self) -> &Self::Target { - self.0.as_bytes() + self.inner().as_bytes() } } @@ -262,6 +289,8 @@ impl Serialize for Bytes { /// Any type that can back a byte buffer. trait Bytelike: Send + Sync { fn as_bytes(&self) -> &[u8]; + fn as_str(&self) -> Result<&str, Utf8Error>; + fn as_any(&self) -> &dyn Any; fn as_any_mut(&mut self) -> &mut dyn Any; } @@ -273,6 +302,14 @@ where self.as_ref() } + fn as_str(&self) -> Result<&str, Utf8Error> { + std::str::from_utf8(self.as_ref()) + } + + fn as_any(&self) -> &dyn Any { + self + } + fn as_any_mut(&mut self) -> &mut dyn Any { self } @@ -295,6 +332,14 @@ where self.0.as_ref().as_bytes() } + fn as_str(&self) -> Result<&str, Utf8Error> { + Ok(self.0.as_ref()) + } + + fn as_any(&self) -> &dyn Any { + self + } + fn as_any_mut(&mut self) -> &mut dyn Any { self } diff --git a/crates/typst-library/src/foundations/cast.rs b/crates/typst-library/src/foundations/cast.rs index 84f38f36e..38f409c67 100644 --- a/crates/typst-library/src/foundations/cast.rs +++ b/crates/typst-library/src/foundations/cast.rs @@ -13,7 +13,9 @@ use typst_syntax::{Span, Spanned}; use unicode_math_class::MathClass; use crate::diag::{At, HintedStrResult, HintedString, SourceResult, StrResult}; -use crate::foundations::{array, repr, NativeElement, Packed, Repr, Str, Type, Value}; +use crate::foundations::{ + array, repr, Fold, NativeElement, Packed, Repr, Str, Type, Value, +}; /// Determine details of a type. /// @@ -497,3 +499,58 @@ cast! { /// An operator that can be both unary or binary like `+`. "vary" => MathClass::Vary, } + +/// A type that contains a user-visible source portion and something that is +/// derived from it, but not user-visible. +/// +/// An example usage would be `source` being a `DataSource` and `derived` a +/// TextMate theme parsed from it. With `Derived`, we can store both parts in +/// the `RawElem::theme` field and get automatic nice `Reflect` and `IntoValue` +/// impls. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub struct Derived { + /// The source portion. + pub source: S, + /// The derived portion. + pub derived: D, +} + +impl Derived { + /// Create a new instance from the `source` and the `derived` data. + pub fn new(source: S, derived: D) -> Self { + Self { source, derived } + } +} + +impl Reflect for Derived { + fn input() -> CastInfo { + S::input() + } + + fn output() -> CastInfo { + S::output() + } + + fn castable(value: &Value) -> bool { + S::castable(value) + } + + fn error(found: &Value) -> HintedString { + S::error(found) + } +} + +impl IntoValue for Derived { + fn into_value(self) -> Value { + self.source.into_value() + } +} + +impl Fold for Derived { + fn fold(self, outer: Self) -> Self { + Self { + source: self.source.fold(outer.source), + derived: self.derived.fold(outer.derived), + } + } +} diff --git a/crates/typst-library/src/foundations/plugin.rs b/crates/typst-library/src/foundations/plugin.rs index a7c341d8c..adf23a47c 100644 --- a/crates/typst-library/src/foundations/plugin.rs +++ b/crates/typst-library/src/foundations/plugin.rs @@ -9,7 +9,7 @@ use wasmi::{AsContext, AsContextMut}; use crate::diag::{bail, At, SourceResult, StrResult}; use crate::engine::Engine; use crate::foundations::{func, repr, scope, ty, Bytes}; -use crate::World; +use crate::loading::{DataSource, Load}; /// A WebAssembly plugin. /// @@ -154,15 +154,13 @@ impl Plugin { pub fn construct( /// The engine. engine: &mut Engine, - /// Path to a WebAssembly file. + /// A path to a WebAssembly file or raw WebAssembly bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - Plugin::new(data).at(span) + let data = source.load(engine.world)?; + Plugin::new(data).at(source.span) } } diff --git a/crates/typst-library/src/foundations/str.rs b/crates/typst-library/src/foundations/str.rs index 4025d1ab3..2e90b3071 100644 --- a/crates/typst-library/src/foundations/str.rs +++ b/crates/typst-library/src/foundations/str.rs @@ -784,11 +784,7 @@ cast! { v: f64 => Self::Str(repr::display_float(v).into()), v: Decimal => Self::Str(format_str!("{}", v)), v: Version => Self::Str(format_str!("{}", v)), - v: Bytes => Self::Str( - std::str::from_utf8(&v) - .map_err(|_| "bytes are not valid utf-8")? - .into() - ), + v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid utf-8")?), v: Label => Self::Str(v.resolve().as_str().into()), v: Type => Self::Str(v.long_name().into()), v: Str => Self::Str(v), diff --git a/crates/typst-library/src/foundations/styles.rs b/crates/typst-library/src/foundations/styles.rs index 7354719e9..37094dcd8 100644 --- a/crates/typst-library/src/foundations/styles.rs +++ b/crates/typst-library/src/foundations/styles.rs @@ -12,7 +12,8 @@ use typst_utils::LazyHash; use crate::diag::{SourceResult, Trace, Tracepoint}; use crate::engine::Engine; use crate::foundations::{ - cast, ty, Content, Context, Element, Func, NativeElement, Repr, Selector, + cast, ty, Content, Context, Element, Func, NativeElement, OneOrMultiple, Repr, + Selector, }; use crate::text::{FontFamily, FontList, TextElem}; @@ -939,6 +940,13 @@ impl Fold for SmallVec<[T; N]> { } } +impl Fold for OneOrMultiple { + fn fold(self, mut outer: Self) -> Self { + outer.0.extend(self.0); + outer + } +} + /// A variant of fold for foldable optional (`Option`) values where an inner /// `None` value isn't respected (contrary to `Option`'s usual `Fold` /// implementation, with which folding with an inner `None` always returns diff --git a/crates/typst-library/src/loading/cbor.rs b/crates/typst-library/src/loading/cbor.rs index a03e5c998..13d551201 100644 --- a/crates/typst-library/src/loading/cbor.rs +++ b/crates/typst-library/src/loading/cbor.rs @@ -1,10 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::eco_format; use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Bytes, Value}; -use crate::World; +use crate::loading::{DataSource, Load}; /// Reads structured data from a CBOR file. /// @@ -21,29 +21,31 @@ use crate::World; pub fn cbor( /// The engine. engine: &mut Engine, - /// Path to a CBOR file. + /// A path to a CBOR file or raw CBOR bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - cbor::decode(Spanned::new(data, span)) + let data = source.load(engine.world)?; + ciborium::from_reader(data.as_slice()) + .map_err(|err| eco_format!("failed to parse CBOR ({err})")) + .at(source.span) } #[scope] impl cbor { /// Reads structured data from CBOR bytes. + /// + /// This function is deprecated. The [`cbor`] function now accepts bytes + /// directly. #[func(title = "Decode CBOR")] pub fn decode( - /// cbor data. + /// The engine. + engine: &mut Engine, + /// CBOR data. data: Spanned, ) -> SourceResult { - let Spanned { v: data, span } = data; - ciborium::from_reader(data.as_slice()) - .map_err(|err| eco_format!("failed to parse CBOR ({err})")) - .at(span) + cbor(engine, data.map(DataSource::Bytes)) } /// Encode structured data into CBOR bytes. diff --git a/crates/typst-library/src/loading/csv.rs b/crates/typst-library/src/loading/csv.rs index 6822505d3..8171c4832 100644 --- a/crates/typst-library/src/loading/csv.rs +++ b/crates/typst-library/src/loading/csv.rs @@ -4,8 +4,7 @@ use typst_syntax::Spanned; use crate::diag::{bail, At, SourceResult}; use crate::engine::Engine; use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a CSV file. /// @@ -28,10 +27,10 @@ use crate::World; pub fn csv( /// The engine. engine: &mut Engine, - /// Path to a CSV file. + /// Path to a CSV file or raw CSV bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, /// The delimiter that separates columns in the CSV file. /// Must be a single ASCII character. #[named] @@ -48,17 +47,63 @@ pub fn csv( #[default(RowType::Array)] row_type: RowType, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type) + let data = source.load(engine.world)?; + + let mut builder = ::csv::ReaderBuilder::new(); + let has_headers = row_type == RowType::Dict; + builder.has_headers(has_headers); + builder.delimiter(delimiter.0 as u8); + + // Counting lines from 1 by default. + let mut line_offset: usize = 1; + let mut reader = builder.from_reader(data.as_slice()); + let mut headers: Option<::csv::StringRecord> = None; + + if has_headers { + // Counting lines from 2 because we have a header. + line_offset += 1; + headers = Some( + reader + .headers() + .map_err(|err| format_csv_error(err, 1)) + .at(source.span)? + .clone(), + ); + } + + let mut array = Array::new(); + for (line, result) in reader.records().enumerate() { + // Original solution was to use line from error, but that is + // incorrect with `has_headers` set to `false`. See issue: + // https://github.com/BurntSushi/rust-csv/issues/184 + let line = line + line_offset; + let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?; + let item = if let Some(headers) = &headers { + let mut dict = Dict::new(); + for (field, value) in headers.iter().zip(&row) { + dict.insert(field.into(), value.into_value()); + } + dict.into_value() + } else { + let sub = row.into_iter().map(|field| field.into_value()).collect(); + Value::Array(sub) + }; + array.push(item); + } + + Ok(array) } #[scope] impl csv { /// Reads structured data from a CSV string/bytes. + /// + /// This function is deprecated. The [`csv`] function now accepts bytes + /// directly. #[func(title = "Decode CSV")] pub fn decode( + /// The engine. + engine: &mut Engine, /// CSV data. data: Spanned, /// The delimiter that separates columns in the CSV file. @@ -77,51 +122,7 @@ impl csv { #[default(RowType::Array)] row_type: RowType, ) -> SourceResult { - let Spanned { v: data, span } = data; - let has_headers = row_type == RowType::Dict; - - let mut builder = ::csv::ReaderBuilder::new(); - builder.has_headers(has_headers); - builder.delimiter(delimiter.0 as u8); - - // Counting lines from 1 by default. - let mut line_offset: usize = 1; - let mut reader = builder.from_reader(data.as_slice()); - let mut headers: Option<::csv::StringRecord> = None; - - if has_headers { - // Counting lines from 2 because we have a header. - line_offset += 1; - headers = Some( - reader - .headers() - .map_err(|err| format_csv_error(err, 1)) - .at(span)? - .clone(), - ); - } - - let mut array = Array::new(); - for (line, result) in reader.records().enumerate() { - // Original solution was to use line from error, but that is - // incorrect with `has_headers` set to `false`. See issue: - // https://github.com/BurntSushi/rust-csv/issues/184 - let line = line + line_offset; - let row = result.map_err(|err| format_csv_error(err, line)).at(span)?; - let item = if let Some(headers) = &headers { - let mut dict = Dict::new(); - for (field, value) in headers.iter().zip(&row) { - dict.insert(field.into(), value.into_value()); - } - dict.into_value() - } else { - let sub = row.into_iter().map(|field| field.into_value()).collect(); - Value::Array(sub) - }; - array.push(item); - } - - Ok(array) + csv(engine, data.map(Readable::into_source), delimiter, row_type) } } diff --git a/crates/typst-library/src/loading/json.rs b/crates/typst-library/src/loading/json.rs index 597cf4cc6..3128d77da 100644 --- a/crates/typst-library/src/loading/json.rs +++ b/crates/typst-library/src/loading/json.rs @@ -1,11 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::eco_format; use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a JSON file. /// @@ -53,29 +52,31 @@ use crate::World; pub fn json( /// The engine. engine: &mut Engine, - /// Path to a JSON file. + /// Path to a JSON file or raw JSON bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - json::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + serde_json::from_slice(data.as_slice()) + .map_err(|err| eco_format!("failed to parse JSON ({err})")) + .at(source.span) } #[scope] impl json { /// Reads structured data from a JSON string/bytes. + /// + /// This function is deprecated. The [`json`] function now accepts bytes + /// directly. #[func(title = "Decode JSON")] pub fn decode( + /// The engine. + engine: &mut Engine, /// JSON data. data: Spanned, ) -> SourceResult { - let Spanned { v: data, span } = data; - serde_json::from_slice(data.as_slice()) - .map_err(|err| eco_format!("failed to parse JSON ({err})")) - .at(span) + json(engine, data.map(Readable::into_source)) } /// Encodes structured data into a JSON string. diff --git a/crates/typst-library/src/loading/mod.rs b/crates/typst-library/src/loading/mod.rs index 120b3e3af..171ae651a 100644 --- a/crates/typst-library/src/loading/mod.rs +++ b/crates/typst-library/src/loading/mod.rs @@ -15,6 +15,10 @@ mod xml_; #[path = "yaml.rs"] mod yaml_; +use comemo::Tracked; +use ecow::EcoString; +use typst_syntax::Spanned; + pub use self::cbor_::*; pub use self::csv_::*; pub use self::json_::*; @@ -23,7 +27,10 @@ pub use self::toml_::*; pub use self::xml_::*; pub use self::yaml_::*; +use crate::diag::{At, SourceResult}; +use crate::foundations::OneOrMultiple; use crate::foundations::{cast, category, Bytes, Category, Scope, Str}; +use crate::World; /// Data loading from external files. /// @@ -44,6 +51,76 @@ pub(super) fn define(global: &mut Scope) { global.define_func::(); } +/// Something we can retrieve byte data from. +#[derive(Debug, Clone, PartialEq, Hash)] +pub enum DataSource { + /// A path to a file. + Path(EcoString), + /// Raw bytes. + Bytes(Bytes), +} + +cast! { + DataSource, + self => match self { + Self::Path(v) => v.into_value(), + Self::Bytes(v) => v.into_value(), + }, + v: EcoString => Self::Path(v), + v: Bytes => Self::Bytes(v), +} + +/// Loads data from a path or provided bytes. +pub trait Load { + /// Bytes or a list of bytes (if there are multiple sources). + type Output; + + /// Load the bytes. + fn load(&self, world: Tracked) -> SourceResult; +} + +impl Load for Spanned { + type Output = Bytes; + + fn load(&self, world: Tracked) -> SourceResult { + self.as_ref().load(world) + } +} + +impl Load for Spanned<&DataSource> { + type Output = Bytes; + + fn load(&self, world: Tracked) -> SourceResult { + match &self.v { + DataSource::Path(path) => { + let file_id = self.span.resolve_path(path).at(self.span)?; + world.file(file_id).at(self.span) + } + DataSource::Bytes(bytes) => Ok(bytes.clone()), + } + } +} + +impl Load for Spanned> { + type Output = Vec; + + fn load(&self, world: Tracked) -> SourceResult> { + self.as_ref().load(world) + } +} + +impl Load for Spanned<&OneOrMultiple> { + type Output = Vec; + + fn load(&self, world: Tracked) -> SourceResult> { + self.v + .0 + .iter() + .map(|source| Spanned::new(source, self.span).load(world)) + .collect() + } +} + /// A value that can be read from a file. #[derive(Debug, Clone, PartialEq, Hash)] pub enum Readable { @@ -54,26 +131,16 @@ pub enum Readable { } impl Readable { - pub fn as_slice(&self) -> &[u8] { - match self { - Self::Bytes(v) => v, - Self::Str(v) => v.as_bytes(), - } - } - - pub fn as_str(&self) -> Option<&str> { - match self { - Self::Str(v) => Some(v.as_str()), - Self::Bytes(v) => std::str::from_utf8(v).ok(), - } - } - pub fn into_bytes(self) -> Bytes { match self { Self::Bytes(v) => v, Self::Str(v) => Bytes::from_string(v), } } + + pub fn into_source(self) -> DataSource { + DataSource::Bytes(self.into_bytes()) + } } cast! { diff --git a/crates/typst-library/src/loading/read.rs b/crates/typst-library/src/loading/read.rs index 23e6e27e7..bf363f846 100644 --- a/crates/typst-library/src/loading/read.rs +++ b/crates/typst-library/src/loading/read.rs @@ -1,7 +1,7 @@ use ecow::EcoString; use typst_syntax::Spanned; -use crate::diag::{At, SourceResult}; +use crate::diag::{At, FileError, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, Cast}; use crate::loading::Readable; @@ -42,12 +42,9 @@ pub fn read( let data = engine.world.file(id).at(span)?; Ok(match encoding { None => Readable::Bytes(data), - Some(Encoding::Utf8) => Readable::Str( - std::str::from_utf8(&data) - .map_err(|_| "file is not valid utf-8") - .at(span)? - .into(), - ), + Some(Encoding::Utf8) => { + Readable::Str(data.to_str().map_err(FileError::from).at(span)?) + } }) } diff --git a/crates/typst-library/src/loading/toml.rs b/crates/typst-library/src/loading/toml.rs index 5167703ef..e3a01cdd5 100644 --- a/crates/typst-library/src/loading/toml.rs +++ b/crates/typst-library/src/loading/toml.rs @@ -1,11 +1,10 @@ use ecow::{eco_format, EcoString}; use typst_syntax::{is_newline, Spanned}; -use crate::diag::{At, SourceResult}; +use crate::diag::{At, FileError, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a TOML file. /// @@ -31,32 +30,32 @@ use crate::World; pub fn toml( /// The engine. engine: &mut Engine, - /// Path to a TOML file. + /// A path to a TOML file or raw TOML bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - toml::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + let raw = data.as_str().map_err(FileError::from).at(source.span)?; + ::toml::from_str(raw) + .map_err(|err| format_toml_error(err, raw)) + .at(source.span) } #[scope] impl toml { /// Reads structured data from a TOML string/bytes. + /// + /// This function is deprecated. The [`toml`] function now accepts bytes + /// directly. #[func(title = "Decode TOML")] pub fn decode( + /// The engine. + engine: &mut Engine, /// TOML data. data: Spanned, ) -> SourceResult { - let Spanned { v: data, span } = data; - let raw = std::str::from_utf8(data.as_slice()) - .map_err(|_| "file is not valid utf-8") - .at(span)?; - ::toml::from_str(raw) - .map_err(|err| format_toml_error(err, raw)) - .at(span) + toml(engine, data.map(Readable::into_source)) } /// Encodes structured data into a TOML string. diff --git a/crates/typst-library/src/loading/xml.rs b/crates/typst-library/src/loading/xml.rs index 3b1a9674b..53ec3d93b 100644 --- a/crates/typst-library/src/loading/xml.rs +++ b/crates/typst-library/src/loading/xml.rs @@ -5,8 +5,7 @@ use typst_syntax::Spanned; use crate::diag::{format_xml_like_error, At, FileError, SourceResult}; use crate::engine::Engine; use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from an XML file. /// @@ -60,36 +59,36 @@ use crate::World; pub fn xml( /// The engine. engine: &mut Engine, - /// Path to an XML file. + /// A path to an XML file or raw XML bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - xml::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + let text = data.as_str().map_err(FileError::from).at(source.span)?; + let document = roxmltree::Document::parse_with_options( + text, + ParsingOptions { allow_dtd: true, ..Default::default() }, + ) + .map_err(format_xml_error) + .at(source.span)?; + Ok(convert_xml(document.root())) } #[scope] impl xml { /// Reads structured data from an XML string/bytes. + /// + /// This function is deprecated. The [`xml`] function now accepts bytes + /// directly. #[func(title = "Decode XML")] pub fn decode( + /// The engine. + engine: &mut Engine, /// XML data. data: Spanned, ) -> SourceResult { - let Spanned { v: data, span } = data; - let text = std::str::from_utf8(data.as_slice()) - .map_err(FileError::from) - .at(span)?; - let document = roxmltree::Document::parse_with_options( - text, - ParsingOptions { allow_dtd: true, ..Default::default() }, - ) - .map_err(format_xml_error) - .at(span)?; - Ok(convert_xml(document.root())) + xml(engine, data.map(Readable::into_source)) } } diff --git a/crates/typst-library/src/loading/yaml.rs b/crates/typst-library/src/loading/yaml.rs index 0e8ca3fb0..2eb26be8f 100644 --- a/crates/typst-library/src/loading/yaml.rs +++ b/crates/typst-library/src/loading/yaml.rs @@ -1,11 +1,10 @@ -use ecow::{eco_format, EcoString}; +use ecow::eco_format; use typst_syntax::Spanned; use crate::diag::{At, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::Readable; -use crate::World; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a YAML file. /// @@ -43,29 +42,31 @@ use crate::World; pub fn yaml( /// The engine. engine: &mut Engine, - /// Path to a YAML file. + /// A path to a YAML file or raw YAML bytes. /// - /// For more details, see the [Paths section]($syntax/#paths). - path: Spanned, + /// For more details about paths, see the [Paths section]($syntax/#paths). + source: Spanned, ) -> SourceResult { - let Spanned { v: path, span } = path; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - yaml::decode(Spanned::new(Readable::Bytes(data), span)) + let data = source.load(engine.world)?; + serde_yaml::from_slice(data.as_slice()) + .map_err(|err| eco_format!("failed to parse YAML ({err})")) + .at(source.span) } #[scope] impl yaml { /// Reads structured data from a YAML string/bytes. + /// + /// This function is deprecated. The [`yaml`] function now accepts bytes + /// directly. #[func(title = "Decode YAML")] pub fn decode( + /// The engine. + engine: &mut Engine, /// YAML data. data: Spanned, ) -> SourceResult { - let Spanned { v: data, span } = data; - serde_yaml::from_slice(data.as_slice()) - .map_err(|err| eco_format!("failed to parse YAML ({err})")) - .at(span) + yaml(engine, data.map(Readable::into_source)) } /// Encode structured data into a YAML string. diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index 280ac4a42..4ab4ff22c 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -1,7 +1,7 @@ +use std::any::TypeId; use std::collections::HashMap; use std::ffi::OsStr; use std::fmt::{self, Debug, Formatter}; -use std::hash::{Hash, Hasher}; use std::num::NonZeroUsize; use std::path::Path; use std::sync::{Arc, LazyLock}; @@ -12,26 +12,26 @@ use hayagriva::archive::ArchivedStyle; use hayagriva::io::BibLaTeXError; use hayagriva::{ citationberg, BibliographyDriver, BibliographyRequest, CitationItem, CitationRequest, - SpecificLocator, + Library, SpecificLocator, }; use indexmap::IndexMap; use smallvec::{smallvec, SmallVec}; -use typed_arena::Arena; use typst_syntax::{Span, Spanned}; -use typst_utils::{LazyHash, NonZeroExt, PicoStr}; +use typst_utils::{ManuallyHash, NonZeroExt, PicoStr}; use crate::diag::{bail, error, At, FileError, HintedStrResult, SourceResult, StrResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, ty, Args, Array, Bytes, CastInfo, Content, FromValue, IntoValue, Label, - NativeElement, Packed, Reflect, Repr, Scope, Show, ShowSet, Smart, Str, StyleChain, - Styles, Synthesize, Type, Value, + elem, Bytes, CastInfo, Content, Derived, FromValue, IntoValue, Label, NativeElement, + OneOrMultiple, Packed, Reflect, Scope, Show, ShowSet, Smart, StyleChain, Styles, + Synthesize, Value, }; use crate::introspection::{Introspector, Locatable, Location}; use crate::layout::{ BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem, Sizing, TrackSizings, VElem, }; +use crate::loading::{DataSource, Load}; use crate::model::{ CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem, Url, @@ -86,13 +86,20 @@ use crate::World; /// ``` #[elem(Locatable, Synthesize, Show, ShowSet, LocalName)] pub struct BibliographyElem { - /// Path(s) to Hayagriva `.yml` and/or BibLaTeX `.bib` files. + /// One or multiple paths to or raw bytes for Hayagriva `.yml` and/or + /// BibLaTeX `.bib` files. + /// + /// This can be a: + /// - A path string to load a bibliography file from the given path. For + /// more details about paths, see the [Paths section]($syntax/#paths). + /// - Raw bytes from which the bibliography should be decoded. + /// - An array where each item is one the above. #[required] #[parse( - let (paths, bibliography) = Bibliography::parse(engine, args)?; - paths + let sources = args.expect("sources")?; + Bibliography::load(engine.world, sources)? )] - pub path: BibliographyPaths, + pub sources: Derived, Bibliography>, /// The title of the bibliography. /// @@ -116,19 +123,22 @@ pub struct BibliographyElem { /// The bibliography style. /// - /// Should be either one of the built-in styles (see below) or a path to - /// a [CSL file](https://citationstyles.org/). Some of the styles listed - /// below appear twice, once with their full name and once with a short - /// alias. - #[parse(CslStyle::parse(engine, args)?)] - #[default(CslStyle::from_name("ieee").unwrap())] - pub style: CslStyle, - - /// The loaded bibliography. - #[internal] - #[required] - #[parse(bibliography)] - pub bibliography: Bibliography, + /// This can be: + /// - A string with the name of one of the built-in styles (see below). Some + /// of the styles listed below appear twice, once with their full name and + /// once with a short alias. + /// - A path string to a [CSL file](https://citationstyles.org/). For more + /// details about paths, see the [Paths section]($syntax/#paths). + /// - Raw bytes from which a CSL style should be decoded. + #[parse(match args.named::>("style")? { + Some(source) => Some(CslStyle::load(engine.world, source)?), + None => None, + })] + #[default({ + let default = ArchivedStyle::InstituteOfElectricalAndElectronicsEngineers; + Derived::new(CslSource::Named(default), CslStyle::from_archived(default)) + })] + pub style: Derived, /// The language setting where the bibliography is. #[internal] @@ -141,17 +151,6 @@ pub struct BibliographyElem { pub region: Option, } -/// A list of bibliography file paths. -#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] -pub struct BibliographyPaths(Vec); - -cast! { - BibliographyPaths, - self => self.0.into_value(), - v: EcoString => Self(vec![v]), - v: Array => Self(v.into_iter().map(Value::cast).collect::>()?), -} - impl BibliographyElem { /// Find the document's bibliography. pub fn find(introspector: Tracked) -> StrResult> { @@ -169,13 +168,12 @@ impl BibliographyElem { } /// Whether the bibliography contains the given key. - pub fn has(engine: &Engine, key: impl Into) -> bool { - let key = key.into(); + pub fn has(engine: &Engine, key: Label) -> bool { engine .introspector .query(&Self::elem().select()) .iter() - .any(|elem| elem.to_packed::().unwrap().bibliography().has(key)) + .any(|elem| elem.to_packed::().unwrap().sources.derived.has(key)) } /// Find all bibliography keys. @@ -183,9 +181,9 @@ impl BibliographyElem { let mut vec = vec![]; for elem in introspector.query(&Self::elem().select()).iter() { let this = elem.to_packed::().unwrap(); - for (key, entry) in this.bibliography().iter() { + for (key, entry) in this.sources.derived.iter() { let detail = entry.title().map(|title| title.value.to_str().into()); - vec.push((Label::new(key), detail)) + vec.push((key, detail)) } } vec @@ -282,63 +280,35 @@ impl LocalName for Packed { } /// A loaded bibliography. -#[derive(Clone, PartialEq)] -pub struct Bibliography { - map: Arc>, - hash: u128, -} +#[derive(Clone, PartialEq, Hash)] +pub struct Bibliography(Arc>>); impl Bibliography { - /// Parse the bibliography argument. - fn parse( - engine: &mut Engine, - args: &mut Args, - ) -> SourceResult<(BibliographyPaths, Bibliography)> { - let Spanned { v: paths, span } = - args.expect::>("path to bibliography file")?; - - // Load bibliography files. - let data = paths - .0 - .iter() - .map(|path| { - let id = span.resolve_path(path).at(span)?; - engine.world.file(id).at(span) - }) - .collect::>>()?; - - // Parse. - let bibliography = Self::load(&paths, &data).at(span)?; - - Ok((paths, bibliography)) + /// Load a bibliography from data sources. + fn load( + world: Tracked, + sources: Spanned>, + ) -> SourceResult, Self>> { + let data = sources.load(world)?; + let bibliography = Self::decode(&sources.v, &data).at(sources.span)?; + Ok(Derived::new(sources.v, bibliography)) } - /// Load bibliography entries from paths. + /// Decode a bibliography from loaded data sources. #[comemo::memoize] #[typst_macros::time(name = "load bibliography")] - fn load(paths: &BibliographyPaths, data: &[Bytes]) -> StrResult { + fn decode( + sources: &OneOrMultiple, + data: &[Bytes], + ) -> StrResult { let mut map = IndexMap::new(); let mut duplicates = Vec::::new(); // We might have multiple bib/yaml files - for (path, bytes) in paths.0.iter().zip(data) { - let src = std::str::from_utf8(bytes).map_err(FileError::from)?; - - let ext = Path::new(path.as_str()) - .extension() - .and_then(OsStr::to_str) - .unwrap_or_default(); - - let library = match ext.to_lowercase().as_str() { - "yml" | "yaml" => hayagriva::io::from_yaml_str(src) - .map_err(|err| eco_format!("failed to parse YAML ({err})"))?, - "bib" => hayagriva::io::from_biblatex_str(src) - .map_err(|errors| format_biblatex_error(path, src, errors))?, - _ => bail!("unknown bibliography format (must be .yml/.yaml or .bib)"), - }; - + for (source, data) in sources.0.iter().zip(data) { + let library = decode_library(source, data)?; for entry in library { - match map.entry(PicoStr::intern(entry.key())) { + match map.entry(Label::new(PicoStr::intern(entry.key()))) { indexmap::map::Entry::Vacant(vacant) => { vacant.insert(entry); } @@ -353,182 +323,210 @@ impl Bibliography { bail!("duplicate bibliography keys: {}", duplicates.join(", ")); } - Ok(Bibliography { - map: Arc::new(map), - hash: typst_utils::hash128(data), - }) + Ok(Bibliography(Arc::new(ManuallyHash::new(map, typst_utils::hash128(data))))) } - fn has(&self, key: impl Into) -> bool { - self.map.contains_key(&key.into()) + fn has(&self, key: Label) -> bool { + self.0.contains_key(&key) } - fn iter(&self) -> impl Iterator { - self.map.iter().map(|(&k, v)| (k, v)) + fn get(&self, key: Label) -> Option<&hayagriva::Entry> { + self.0.get(&key) + } + + fn iter(&self) -> impl Iterator { + self.0.iter().map(|(&k, v)| (k, v)) } } impl Debug for Bibliography { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.debug_set().entries(self.map.keys()).finish() + f.debug_set().entries(self.0.keys()).finish() } } -impl Hash for Bibliography { - fn hash(&self, state: &mut H) { - self.hash.hash(state); +/// Decode on library from one data source. +fn decode_library(source: &DataSource, data: &Bytes) -> StrResult { + let src = data.as_str().map_err(FileError::from)?; + + if let DataSource::Path(path) = source { + // If we got a path, use the extension to determine whether it is + // YAML or BibLaTeX. + let ext = Path::new(path.as_str()) + .extension() + .and_then(OsStr::to_str) + .unwrap_or_default(); + + match ext.to_lowercase().as_str() { + "yml" | "yaml" => hayagriva::io::from_yaml_str(src) + .map_err(|err| eco_format!("failed to parse YAML ({err})")), + "bib" => hayagriva::io::from_biblatex_str(src) + .map_err(|errors| format_biblatex_error(src, Some(path), errors)), + _ => bail!("unknown bibliography format (must be .yml/.yaml or .bib)"), + } + } else { + // If we just got bytes, we need to guess. If it can be decoded as + // hayagriva YAML, we'll use that. + let haya_err = match hayagriva::io::from_yaml_str(src) { + Ok(library) => return Ok(library), + Err(err) => err, + }; + + // If it can be decoded as BibLaTeX, we use that isntead. + let bib_errs = match hayagriva::io::from_biblatex_str(src) { + Ok(library) => return Ok(library), + Err(err) => err, + }; + + // If neither decoded correctly, check whether `:` or `{` appears + // more often to guess whether it's more likely to be YAML or BibLaTeX + // and emit the more appropriate error. + let mut yaml = 0; + let mut biblatex = 0; + for c in src.chars() { + match c { + ':' => yaml += 1, + '{' => biblatex += 1, + _ => {} + } + } + + if yaml > biblatex { + bail!("failed to parse YAML ({haya_err})") + } else { + Err(format_biblatex_error(src, None, bib_errs)) + } } } /// Format a BibLaTeX loading error. -fn format_biblatex_error(path: &str, src: &str, errors: Vec) -> EcoString { +fn format_biblatex_error( + src: &str, + path: Option<&str>, + errors: Vec, +) -> EcoString { let Some(error) = errors.first() else { - return eco_format!("failed to parse BibLaTeX file ({path})"); + return match path { + Some(path) => eco_format!("failed to parse BibLaTeX file ({path})"), + None => eco_format!("failed to parse BibLaTeX"), + }; }; let (span, msg) = match error { BibLaTeXError::Parse(error) => (&error.span, error.kind.to_string()), BibLaTeXError::Type(error) => (&error.span, error.kind.to_string()), }; + let line = src.get(..span.start).unwrap_or_default().lines().count(); - eco_format!("failed to parse BibLaTeX file ({path}:{line}: {msg})") + match path { + Some(path) => eco_format!("failed to parse BibLaTeX file ({path}:{line}: {msg})"), + None => eco_format!("failed to parse BibLaTeX ({line}: {msg})"), + } } /// A loaded CSL style. -#[ty(cast)] #[derive(Debug, Clone, PartialEq, Hash)] -pub struct CslStyle { - name: Option, - style: Arc>, -} +pub struct CslStyle(Arc>); impl CslStyle { - /// Parse the style argument. - pub fn parse(engine: &mut Engine, args: &mut Args) -> SourceResult> { - let Some(Spanned { v: string, span }) = - args.named::>("style")? - else { - return Ok(None); - }; - - Ok(Some(Self::parse_impl(engine, &string, span).at(span)?)) - } - - /// Parse the style argument with `Smart`. - pub fn parse_smart( - engine: &mut Engine, - args: &mut Args, - ) -> SourceResult>> { - let Some(Spanned { v: smart, span }) = - args.named::>>("style")? - else { - return Ok(None); - }; - - Ok(Some(match smart { - Smart::Auto => Smart::Auto, - Smart::Custom(string) => { - Smart::Custom(Self::parse_impl(engine, &string, span).at(span)?) + /// Load a CSL style from a data source. + pub fn load( + world: Tracked, + Spanned { v: source, span }: Spanned, + ) -> SourceResult> { + let style = match &source { + CslSource::Named(style) => Self::from_archived(*style), + CslSource::Normal(source) => { + let data = Spanned::new(source, span).load(world)?; + Self::from_data(data).at(span)? } - })) - } - - /// Parse internally. - fn parse_impl(engine: &mut Engine, string: &str, span: Span) -> StrResult { - let ext = Path::new(string) - .extension() - .and_then(OsStr::to_str) - .unwrap_or_default() - .to_lowercase(); - - if ext == "csl" { - let id = span.resolve_path(string)?; - let data = engine.world.file(id)?; - CslStyle::from_data(&data) - } else { - CslStyle::from_name(string) - } + }; + Ok(Derived::new(source, style)) } /// Load a built-in CSL style. #[comemo::memoize] - pub fn from_name(name: &str) -> StrResult { - match hayagriva::archive::ArchivedStyle::by_name(name).map(ArchivedStyle::get) { - Some(citationberg::Style::Independent(style)) => Ok(Self { - name: Some(name.into()), - style: Arc::new(LazyHash::new(style)), - }), - _ => bail!("unknown style: `{name}`"), + pub fn from_archived(archived: ArchivedStyle) -> CslStyle { + match archived.get() { + citationberg::Style::Independent(style) => Self(Arc::new(ManuallyHash::new( + style, + typst_utils::hash128(&(TypeId::of::(), archived)), + ))), + // Ensured by `test_bibliography_load_builtin_styles`. + _ => unreachable!("archive should not contain dependant styles"), } } /// Load a CSL style from file contents. #[comemo::memoize] - pub fn from_data(data: &Bytes) -> StrResult { - let text = std::str::from_utf8(data.as_slice()).map_err(FileError::from)?; + pub fn from_data(data: Bytes) -> StrResult { + let text = data.as_str().map_err(FileError::from)?; citationberg::IndependentStyle::from_xml(text) - .map(|style| Self { name: None, style: Arc::new(LazyHash::new(style)) }) + .map(|style| { + Self(Arc::new(ManuallyHash::new( + style, + typst_utils::hash128(&(TypeId::of::(), data)), + ))) + }) .map_err(|err| eco_format!("failed to load CSL style ({err})")) } /// Get the underlying independent style. pub fn get(&self) -> &citationberg::IndependentStyle { - self.style.as_ref() + self.0.as_ref() } } -// This Reflect impl is technically a bit wrong because it doesn't say what -// FromValue and IntoValue really do. Instead, it says what the `style` argument -// on `bibliography` and `cite` expect (through manual parsing). -impl Reflect for CslStyle { +/// Source for a CSL style. +#[derive(Debug, Clone, PartialEq, Hash)] +pub enum CslSource { + /// A predefined named style. + Named(ArchivedStyle), + /// A normal data source. + Normal(DataSource), +} + +impl Reflect for CslSource { #[comemo::memoize] fn input() -> CastInfo { - let ty = std::iter::once(CastInfo::Type(Type::of::())); - let options = hayagriva::archive::ArchivedStyle::all().iter().map(|name| { + let source = std::iter::once(DataSource::input()); + let names = ArchivedStyle::all().iter().map(|name| { CastInfo::Value(name.names()[0].into_value(), name.display_name()) }); - CastInfo::Union(ty.chain(options).collect()) + CastInfo::Union(source.into_iter().chain(names).collect()) } fn output() -> CastInfo { - EcoString::output() + DataSource::output() } fn castable(value: &Value) -> bool { - if let Value::Dyn(dynamic) = &value { - if dynamic.is::() { - return true; - } - } - - false + DataSource::castable(value) } } -impl FromValue for CslStyle { +impl FromValue for CslSource { fn from_value(value: Value) -> HintedStrResult { - if let Value::Dyn(dynamic) = &value { - if let Some(concrete) = dynamic.downcast::() { - return Ok(concrete.clone()); + if EcoString::castable(&value) { + let string = EcoString::from_value(value.clone())?; + if Path::new(string.as_str()).extension().is_none() { + let style = ArchivedStyle::by_name(&string) + .ok_or_else(|| eco_format!("unknown style: {}", string))?; + return Ok(CslSource::Named(style)); } } - Err(::error(&value)) + DataSource::from_value(value).map(CslSource::Normal) } } -impl IntoValue for CslStyle { +impl IntoValue for CslSource { fn into_value(self) -> Value { - Value::dynamic(self) - } -} - -impl Repr for CslStyle { - fn repr(&self) -> EcoString { - self.name - .as_ref() - .map(|name| name.repr()) - .unwrap_or_else(|| "..".into()) + match self { + // We prefer the shorter names which are at the back of the array. + Self::Named(v) => v.names().last().unwrap().into_value(), + Self::Normal(v) => v.into_value(), + } } } @@ -632,9 +630,8 @@ impl<'a> Generator<'a> { static LOCALES: LazyLock> = LazyLock::new(hayagriva::archive::locales); - let database = self.bibliography.bibliography(); - let bibliography_style = self.bibliography.style(StyleChain::default()); - let styles = Arena::new(); + let database = &self.bibliography.sources.derived; + let bibliography_style = &self.bibliography.style(StyleChain::default()).derived; // Process all citation groups. let mut driver = BibliographyDriver::new(); @@ -654,7 +651,7 @@ impl<'a> Generator<'a> { // Create infos and items for each child in the group. for child in children { let key = *child.key(); - let Some(entry) = database.map.get(&key.into_inner()) else { + let Some(entry) = database.get(key) else { errors.push(error!( child.span(), "key `{}` does not exist in the bibliography", @@ -695,8 +692,8 @@ impl<'a> Generator<'a> { } let style = match first.style(StyleChain::default()) { - Smart::Auto => &bibliography_style.style, - Smart::Custom(style) => styles.alloc(style.style), + Smart::Auto => bibliography_style.get(), + Smart::Custom(style) => style.derived.get(), }; self.infos.push(GroupInfo { @@ -727,7 +724,7 @@ impl<'a> Generator<'a> { // Add hidden items for everything if we should print the whole // bibliography. if self.bibliography.full(StyleChain::default()) { - for entry in database.map.values() { + for (_, entry) in database.iter() { driver.citation(CitationRequest::new( vec![CitationItem::new(entry, None, None, true, None)], bibliography_style.get(), @@ -1097,3 +1094,15 @@ fn locale(lang: Lang, region: Option) -> citationberg::LocaleCode { } citationberg::LocaleCode(value) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bibliography_load_builtin_styles() { + for &archived in ArchivedStyle::all() { + let _ = CslStyle::from_archived(archived); + } + } +} diff --git a/crates/typst-library/src/model/cite.rs b/crates/typst-library/src/model/cite.rs index ac0cfa790..29497993d 100644 --- a/crates/typst-library/src/model/cite.rs +++ b/crates/typst-library/src/model/cite.rs @@ -1,11 +1,14 @@ +use typst_syntax::Spanned; + use crate::diag::{error, At, HintedString, SourceResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, Cast, Content, Label, Packed, Show, Smart, StyleChain, Synthesize, + cast, elem, Cast, Content, Derived, Label, Packed, Show, Smart, StyleChain, + Synthesize, }; use crate::introspection::Locatable; use crate::model::bibliography::Works; -use crate::model::CslStyle; +use crate::model::{CslSource, CslStyle}; use crate::text::{Lang, Region, TextElem}; /// Cite a work from the bibliography. @@ -87,15 +90,24 @@ pub struct CiteElem { /// The citation style. /// - /// Should be either `{auto}`, one of the built-in styles (see below) or a - /// path to a [CSL file](https://citationstyles.org/). Some of the styles - /// listed below appear twice, once with their full name and once with a - /// short alias. - /// - /// When set to `{auto}`, automatically use the - /// [bibliography's style]($bibliography.style) for the citations. - #[parse(CslStyle::parse_smart(engine, args)?)] - pub style: Smart, + /// This can be: + /// - `{auto}` to automatically use the + /// [bibliography's style]($bibliography.style) for citations. + /// - A string with the name of one of the built-in styles (see below). Some + /// of the styles listed below appear twice, once with their full name and + /// once with a short alias. + /// - A path string to a [CSL file](https://citationstyles.org/). For more + /// details about paths, see the [Paths section]($syntax/#paths). + /// - Raw bytes from which a CSL style should be decoded. + #[parse(match args.named::>>("style")? { + Some(Spanned { v: Smart::Custom(source), span }) => Some(Smart::Custom( + CslStyle::load(engine.world, Spanned::new(source, span))? + )), + Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto), + None => None, + })] + #[borrowed] + pub style: Smart>, /// The text language setting where the citation is. #[internal] diff --git a/crates/typst-library/src/model/document.rs b/crates/typst-library/src/model/document.rs index 5124b2487..1bce6b357 100644 --- a/crates/typst-library/src/model/document.rs +++ b/crates/typst-library/src/model/document.rs @@ -3,8 +3,8 @@ use ecow::EcoString; use crate::diag::{bail, HintedStrResult, SourceResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, Args, Array, Construct, Content, Datetime, Fields, Smart, StyleChain, - Styles, Value, + cast, elem, Args, Array, Construct, Content, Datetime, Fields, OneOrMultiple, Smart, + StyleChain, Styles, Value, }; /// The root element of a document and its metadata. @@ -35,7 +35,7 @@ pub struct DocumentElem { /// The document's authors. #[ghost] - pub author: Author, + pub author: OneOrMultiple, /// The document's description. #[ghost] @@ -43,7 +43,7 @@ pub struct DocumentElem { /// The document's keywords. #[ghost] - pub keywords: Keywords, + pub keywords: OneOrMultiple, /// The document's creation date. /// @@ -93,7 +93,7 @@ cast! { pub struct DocumentInfo { /// The document's title. pub title: Option, - /// The document's author. + /// The document's author(s). pub author: Vec, /// The document's description. pub description: Option, diff --git a/crates/typst-library/src/pdf/embed.rs b/crates/typst-library/src/pdf/embed.rs index db4986225..f9ca3ca09 100644 --- a/crates/typst-library/src/pdf/embed.rs +++ b/crates/typst-library/src/pdf/embed.rs @@ -1,13 +1,10 @@ use ecow::EcoString; -use typst_syntax::{Span, Spanned}; +use typst_syntax::Spanned; -use crate::diag::{At, SourceResult, StrResult}; +use crate::diag::{At, SourceResult}; use crate::engine::Engine; -use crate::foundations::{ - elem, func, scope, Cast, Content, NativeElement, Packed, Show, StyleChain, -}; +use crate::foundations::{elem, Bytes, Cast, Content, Derived, Packed, Show, StyleChain}; use crate::introspection::Locatable; -use crate::loading::Readable; use crate::World; /// A file that will be embedded into the output PDF. @@ -33,33 +30,40 @@ use crate::World; /// - This element is ignored if exporting to a format other than PDF. /// - File embeddings are not currently supported for PDF/A-2, even if the /// embedded file conforms to PDF/A-1 or PDF/A-2. -#[elem(scope, Show, Locatable)] +#[elem(Show, Locatable)] pub struct EmbedElem { - /// Path to a file to be embedded. + /// Path of the file to be embedded. /// - /// For more details, see the [Paths section]($syntax/#paths). + /// Must always be specified, but is only read from if no data is provided + /// in the following argument. + /// + /// For more details about paths, see the [Paths section]($syntax/#paths). #[required] #[parse( let Spanned { v: path, span } = - args.expect::>("path to the file to be embedded")?; + args.expect::>("path")?; let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - path + // The derived part is the project-relative resolved path. + let resolved = id.vpath().as_rootless_path().to_string_lossy().replace("\\", "/").into(); + Derived::new(path.clone(), resolved) )] #[borrowed] - pub path: EcoString, + pub path: Derived, - /// The resolved project-relative path. - #[internal] + /// Raw file data, optionally. + /// + /// If omitted, the data is read from the specified path. + #[positional] + // Not actually required as an argument, but always present as a field. + // We can't distinguish between the two at the moment. #[required] - #[parse(id.vpath().as_rootless_path().to_string_lossy().replace("\\", "/").into())] - pub resolved_path: EcoString, - - /// The raw file data. - #[internal] - #[required] - #[parse(Readable::Bytes(data))] - pub data: Readable, + #[parse( + match args.find::()? { + Some(data) => data, + None => engine.world.file(id).at(span)?, + } + )] + pub data: Bytes, /// The relationship of the embedded file to the document. /// @@ -75,42 +79,6 @@ pub struct EmbedElem { pub description: Option, } -#[scope] -impl EmbedElem { - /// Decode a file embedding from bytes or a string. - #[func(title = "Embed Data")] - fn decode( - /// The call span of this function. - span: Span, - /// The path that will be written into the PDF. Typst will not read from - /// this path since the data is provided in the following argument. - path: EcoString, - /// The data to embed as a file. - data: Readable, - /// The relationship of the embedded file to the document. - #[named] - relationship: Option>, - /// The MIME type of the embedded file. - #[named] - mime_type: Option>, - /// A description for the embedded file. - #[named] - description: Option>, - ) -> StrResult { - let mut elem = EmbedElem::new(path.clone(), path, data); - if let Some(description) = description { - elem.push_description(description); - } - if let Some(mime_type) = mime_type { - elem.push_mime_type(mime_type); - } - if let Some(relationship) = relationship { - elem.push_relationship(relationship); - } - Ok(elem.pack().spanned(span)) - } -} - impl Show for Packed { fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { Ok(Content::empty()) diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs index 10a7cfee1..cd718d2a1 100644 --- a/crates/typst-library/src/text/raw.rs +++ b/crates/typst-library/src/text/raw.rs @@ -1,23 +1,25 @@ use std::cell::LazyCell; -use std::hash::Hash; use std::ops::Range; use std::sync::{Arc, LazyLock}; +use comemo::Tracked; use ecow::{eco_format, EcoString, EcoVec}; -use syntect::highlighting::{self as synt, Theme}; +use syntect::highlighting as synt; use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; use typst_syntax::{split_newlines, LinkedNode, Span, Spanned}; +use typst_utils::ManuallyHash; use unicode_segmentation::UnicodeSegmentation; use super::Lang; -use crate::diag::{At, FileError, HintedStrResult, SourceResult, StrResult}; +use crate::diag::{At, FileError, SourceResult, StrResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, scope, Args, Array, Bytes, Content, Fold, NativeElement, Packed, - PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, Value, + cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed, + PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, }; use crate::html::{tag, HtmlElem}; use crate::layout::{BlockBody, BlockElem, Em, HAlignment}; +use crate::loading::{DataSource, Load}; use crate::model::{Figurable, ParElem}; use crate::text::{ FontFamily, FontList, Hyphenate, LinebreakElem, LocalName, TextElem, TextSize, @@ -25,12 +27,6 @@ use crate::text::{ use crate::visualize::Color; use crate::World; -// Shorthand for highlighter closures. -type StyleFn<'a> = - &'a mut dyn FnMut(usize, &LinkedNode, Range, synt::Style) -> Content; -type LineFn<'a> = &'a mut dyn FnMut(usize, Range, &mut Vec); -type ThemeArgType = Smart>; - /// Raw text with optional syntax highlighting. /// /// Displays the text verbatim and in a monospace font. This is typically used @@ -186,9 +182,15 @@ pub struct RawElem { #[default(HAlignment::Start)] pub align: HAlignment, - /// One or multiple additional syntax definitions to load. The syntax - /// definitions should be in the - /// [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html). + /// Additional syntax definitions to load. The syntax definitions should be + /// in the [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html). + /// + /// You can pass any of the following values: + /// + /// - A path string to load a syntax file from the given path. For more + /// details about paths, see the [Paths section]($syntax/#paths). + /// - Raw bytes from which the syntax should be decoded. + /// - An array where each item is one the above. /// /// ````example /// #set raw(syntaxes: "SExpressions.sublime-syntax") @@ -201,22 +203,24 @@ pub struct RawElem { /// (* x (factorial (- x 1))))) /// ``` /// ```` - #[parse( - let (syntaxes, syntaxes_data) = parse_syntaxes(engine, args)?; - syntaxes - )] + #[parse(match args.named("syntaxes")? { + Some(sources) => Some(RawSyntax::load(engine.world, sources)?), + None => None, + })] #[fold] - pub syntaxes: SyntaxPaths, + pub syntaxes: Derived, Vec>, - /// The raw file buffers of syntax definition files. - #[internal] - #[parse(syntaxes_data)] - #[fold] - pub syntaxes_data: Vec, - - /// The theme to use for syntax highlighting. Theme files should be in the + /// The theme to use for syntax highlighting. Themes should be in the /// [`tmTheme` file format](https://www.sublimetext.com/docs/color_schemes_tmtheme.html). /// + /// You can pass any of the following values: + /// + /// - `{none}`: Disables syntax highlighting. + /// - `{auto}`: Highlights with Typst's default theme. + /// - A path string to load a theme file from the given path. For more + /// details about paths, see the [Paths section]($syntax/#paths). + /// - Raw bytes from which the theme should be decoded. + /// /// Applying a theme only affects the color of specifically highlighted /// text. It does not consider the theme's foreground and background /// properties, so that you retain control over the color of raw text. You @@ -224,8 +228,6 @@ pub struct RawElem { /// the background with a [filled block]($block.fill). You could also use /// the [`xml`] function to extract these properties from the theme. /// - /// Additionally, you can set the theme to `{none}` to disable highlighting. - /// /// ````example /// #set raw(theme: "halcyon.tmTheme") /// #show raw: it => block( @@ -240,18 +242,16 @@ pub struct RawElem { /// #let hi = "Hello World" /// ``` /// ```` - #[parse( - let (theme_path, theme_data) = parse_theme(engine, args)?; - theme_path - )] + #[parse(match args.named::>>>("theme")? { + Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom( + Some(RawTheme::load(engine.world, Spanned::new(source, span))?) + )), + Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)), + Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto), + None => None, + })] #[borrowed] - pub theme: ThemeArgType, - - /// The raw file buffer of syntax theme file. - #[internal] - #[parse(theme_data.map(Some))] - #[borrowed] - pub theme_data: Option, + pub theme: Smart>>, /// The size for a tab stop in spaces. A tab is replaced with enough spaces to /// align with the next multiple of the size. @@ -325,9 +325,6 @@ impl Packed { .map(|s| s.to_lowercase()) .or(Some("txt".into())); - let extra_syntaxes = LazyCell::new(|| { - load_syntaxes(&elem.syntaxes(styles), &elem.syntaxes_data(styles)).unwrap() - }); let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| { lines.into_iter().enumerate().map(|(i, (line, line_span))| { Packed::new(RawLine::new( @@ -340,17 +337,13 @@ impl Packed { }) }; - let theme = elem.theme(styles).as_ref().as_ref().map(|theme_path| { - theme_path.as_ref().map(|path| { - load_theme(path, elem.theme_data(styles).as_ref().as_ref().unwrap()) - .unwrap() - }) - }); - let theme: &Theme = match theme { + let syntaxes = LazyCell::new(|| elem.syntaxes(styles)); + let theme: &synt::Theme = match elem.theme(styles) { Smart::Auto => &RAW_THEME, - Smart::Custom(Some(ref theme)) => theme, + Smart::Custom(Some(theme)) => theme.derived.get(), Smart::Custom(None) => return non_highlighted_result(lines).collect(), }; + let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK); let mut seq = vec![]; @@ -391,13 +384,14 @@ impl Packed { ) .highlight(); } else if let Some((syntax_set, syntax)) = lang.and_then(|token| { - RAW_SYNTAXES - .find_syntax_by_token(&token) - .map(|syntax| (&*RAW_SYNTAXES, syntax)) - .or_else(|| { - extra_syntaxes - .find_syntax_by_token(&token) - .map(|syntax| (&**extra_syntaxes, syntax)) + // Prefer user-provided syntaxes over built-in ones. + syntaxes + .derived + .iter() + .map(|syntax| syntax.get()) + .chain(std::iter::once(&*RAW_SYNTAXES)) + .find_map(|set| { + set.find_syntax_by_token(&token).map(|syntax| (set, syntax)) }) }) { let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme); @@ -532,6 +526,89 @@ cast! { v: EcoString => Self::Text(v), } +/// A loaded syntax. +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct RawSyntax(Arc>); + +impl RawSyntax { + /// Load syntaxes from sources. + fn load( + world: Tracked, + sources: Spanned>, + ) -> SourceResult, Vec>> { + let data = sources.load(world)?; + let list = sources + .v + .0 + .iter() + .zip(&data) + .map(|(source, data)| Self::decode(source, data)) + .collect::>() + .at(sources.span)?; + Ok(Derived::new(sources.v, list)) + } + + /// Decode a syntax from a loaded source. + #[comemo::memoize] + #[typst_macros::time(name = "load syntaxes")] + fn decode(source: &DataSource, data: &Bytes) -> StrResult { + let src = data.as_str().map_err(FileError::from)?; + let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err( + |err| match source { + DataSource::Path(path) => { + eco_format!("failed to parse syntax file `{path}` ({err})") + } + DataSource::Bytes(_) => { + eco_format!("failed to parse syntax ({err})") + } + }, + )?; + + let mut builder = SyntaxSetBuilder::new(); + builder.add(syntax); + + Ok(RawSyntax(Arc::new(ManuallyHash::new( + builder.build(), + typst_utils::hash128(data), + )))) + } + + /// Return the underlying syntax set. + fn get(&self) -> &SyntaxSet { + self.0.as_ref() + } +} + +/// A loaded syntect theme. +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct RawTheme(Arc>); + +impl RawTheme { + /// Load a theme from a data source. + fn load( + world: Tracked, + source: Spanned, + ) -> SourceResult> { + let data = source.load(world)?; + let theme = Self::decode(&data).at(source.span)?; + Ok(Derived::new(source.v, theme)) + } + + /// Decode a theme from bytes. + #[comemo::memoize] + fn decode(data: &Bytes) -> StrResult { + let mut cursor = std::io::Cursor::new(data.as_slice()); + let theme = synt::ThemeSet::load_from_reader(&mut cursor) + .map_err(|err| eco_format!("failed to parse theme ({err})"))?; + Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data))))) + } + + /// Get the underlying syntect theme. + pub fn get(&self) -> &synt::Theme { + self.0.as_ref() + } +} + /// A highlighted line of raw text. /// /// This is a helper element that is synthesized by [`raw`] elements. @@ -593,6 +670,11 @@ struct ThemedHighlighter<'a> { line_fn: LineFn<'a>, } +// Shorthands for highlighter closures. +type StyleFn<'a> = + &'a mut dyn FnMut(usize, &LinkedNode, Range, synt::Style) -> Content; +type LineFn<'a> = &'a mut dyn FnMut(usize, Range, &mut Vec); + impl<'a> ThemedHighlighter<'a> { pub fn new( code: &'a str, @@ -738,108 +820,50 @@ fn to_syn(color: Color) -> synt::Color { synt::Color { r, g, b, a } } -/// A list of raw syntax file paths. -#[derive(Debug, Default, Clone, PartialEq, Hash)] -pub struct SyntaxPaths(Vec); - -cast! { - SyntaxPaths, - self => self.0.into_value(), - v: EcoString => Self(vec![v]), - v: Array => Self(v.into_iter().map(Value::cast).collect::>()?), -} - -impl Fold for SyntaxPaths { - fn fold(self, outer: Self) -> Self { - Self(self.0.fold(outer.0)) +/// Create a syntect theme item. +fn item( + scope: &str, + color: Option<&str>, + font_style: Option, +) -> synt::ThemeItem { + synt::ThemeItem { + scope: scope.parse().unwrap(), + style: synt::StyleModifier { + foreground: color.map(|s| to_syn(s.parse::().unwrap())), + background: None, + font_style, + }, } } -/// Load a syntax set from a list of syntax file paths. -#[comemo::memoize] -#[typst_macros::time(name = "load syntaxes")] -fn load_syntaxes(paths: &SyntaxPaths, bytes: &[Bytes]) -> StrResult> { - let mut out = SyntaxSetBuilder::new(); +/// Replace tabs with spaces to align with multiples of `tab_size`. +fn align_tabs(text: &str, tab_size: usize) -> EcoString { + let replacement = " ".repeat(tab_size); + let divisor = tab_size.max(1); + let amount = text.chars().filter(|&c| c == '\t').count(); - // We might have multiple sublime-syntax/yaml files - for (path, bytes) in paths.0.iter().zip(bytes.iter()) { - let src = std::str::from_utf8(bytes).map_err(FileError::from)?; - out.add(SyntaxDefinition::load_from_str(src, false, None).map_err(|err| { - eco_format!("failed to parse syntax file `{path}` ({err})") - })?); + let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size); + let mut column = 0; + + for grapheme in text.graphemes(true) { + match grapheme { + "\t" => { + let required = tab_size - column % divisor; + res.push_str(&replacement[..required]); + column += required; + } + "\n" => { + res.push_str(grapheme); + column = 0; + } + _ => { + res.push_str(grapheme); + column += 1; + } + } } - Ok(Arc::new(out.build())) -} - -/// Function to parse the syntaxes argument. -/// Much nicer than having it be part of the `element` macro. -fn parse_syntaxes( - engine: &mut Engine, - args: &mut Args, -) -> SourceResult<(Option, Option>)> { - let Some(Spanned { v: paths, span }) = - args.named::>("syntaxes")? - else { - return Ok((None, None)); - }; - - // Load syntax files. - let data = paths - .0 - .iter() - .map(|path| { - let id = span.resolve_path(path).at(span)?; - engine.world.file(id).at(span) - }) - .collect::>>()?; - - // Check that parsing works. - let _ = load_syntaxes(&paths, &data).at(span)?; - - Ok((Some(paths), Some(data))) -} - -#[comemo::memoize] -#[typst_macros::time(name = "load theme")] -fn load_theme(path: &str, bytes: &Bytes) -> StrResult> { - let mut cursor = std::io::Cursor::new(bytes.as_slice()); - - synt::ThemeSet::load_from_reader(&mut cursor) - .map(Arc::new) - .map_err(|err| eco_format!("failed to parse theme file `{path}` ({err})")) -} - -/// Function to parse the theme argument. -/// Much nicer than having it be part of the `element` macro. -fn parse_theme( - engine: &mut Engine, - args: &mut Args, -) -> SourceResult<(Option, Option)> { - let Some(Spanned { v: path, span }) = args.named::>("theme")? - else { - // Argument `theme` not found. - return Ok((None, None)); - }; - - let Smart::Custom(path) = path else { - // Argument `theme` is `auto`. - return Ok((Some(Smart::Auto), None)); - }; - - let Some(path) = path else { - // Argument `theme` is `none`. - return Ok((Some(Smart::Custom(None)), None)); - }; - - // Load theme file. - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - - // Check that parsing works. - let _ = load_theme(&path, &data).at(span)?; - - Ok((Some(Smart::Custom(Some(path))), Some(data))) + res } /// The syntect syntax definitions. @@ -886,49 +910,3 @@ pub static RAW_THEME: LazyLock = LazyLock::new(|| synt::Theme { item("markup.deleted, meta.diff.header.from-file", Some("#d73a49"), None), ], }); - -/// Create a syntect theme item. -fn item( - scope: &str, - color: Option<&str>, - font_style: Option, -) -> synt::ThemeItem { - synt::ThemeItem { - scope: scope.parse().unwrap(), - style: synt::StyleModifier { - foreground: color.map(|s| to_syn(s.parse::().unwrap())), - background: None, - font_style, - }, - } -} - -/// Replace tabs with spaces to align with multiples of `tab_size`. -fn align_tabs(text: &str, tab_size: usize) -> EcoString { - let replacement = " ".repeat(tab_size); - let divisor = tab_size.max(1); - let amount = text.chars().filter(|&c| c == '\t').count(); - - let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size); - let mut column = 0; - - for grapheme in text.graphemes(true) { - match grapheme { - "\t" => { - let required = tab_size - column % divisor; - res.push_str(&replacement[..required]); - column += required; - } - "\n" => { - res.push_str(grapheme); - column = 0; - } - _ => { - res.push_str(grapheme); - column += 1; - } - } - } - - res -} diff --git a/crates/typst-library/src/visualize/image/mod.rs b/crates/typst-library/src/visualize/image/mod.rs index 452bb65c1..0f0602011 100644 --- a/crates/typst-library/src/visualize/image/mod.rs +++ b/crates/typst-library/src/visualize/image/mod.rs @@ -14,14 +14,14 @@ use ecow::EcoString; use typst_syntax::{Span, Spanned}; use typst_utils::LazyHash; -use crate::diag::{At, SourceResult, StrResult}; +use crate::diag::{SourceResult, StrResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, func, scope, Bytes, Cast, Content, NativeElement, Packed, Show, Smart, - StyleChain, + cast, elem, func, scope, Bytes, Cast, Content, Derived, NativeElement, Packed, Show, + Smart, StyleChain, }; use crate::layout::{BlockElem, Length, Rel, Sizing}; -use crate::loading::Readable; +use crate::loading::{DataSource, Load, Readable}; use crate::model::Figurable; use crate::text::LocalName; use crate::World; @@ -46,25 +46,16 @@ use crate::World; /// ``` #[elem(scope, Show, LocalName, Figurable)] pub struct ImageElem { - /// Path to an image file. + /// A path to an image file or raw bytes making up an encoded image. /// - /// For more details, see the [Paths section]($syntax/#paths). + /// For more details about paths, see the [Paths section]($syntax/#paths). #[required] #[parse( - let Spanned { v: path, span } = - args.expect::>("path to image file")?; - let id = span.resolve_path(&path).at(span)?; - let data = engine.world.file(id).at(span)?; - path + let source = args.expect::>("source")?; + let data = source.load(engine.world)?; + Derived::new(source.v, data) )] - #[borrowed] - pub path: EcoString, - - /// The raw file data. - #[internal] - #[required] - #[parse(Readable::Bytes(data))] - pub data: Readable, + pub source: Derived, /// The image's format. Detected automatically by default. /// @@ -106,6 +97,9 @@ pub struct ImageElem { impl ImageElem { /// Decode a raster or vector graphic from bytes or a string. /// + /// This function is deprecated. The [`image`] function now accepts bytes + /// directly. + /// /// ```example /// #let original = read("diagram.svg") /// #let changed = original.replace( @@ -138,7 +132,9 @@ impl ImageElem { #[named] fit: Option, ) -> StrResult { - let mut elem = ImageElem::new(EcoString::new(), data); + let bytes = data.into_bytes(); + let source = Derived::new(DataSource::Bytes(bytes.clone()), bytes); + let mut elem = ImageElem::new(source); if let Some(format) = format { elem.push_format(format); } @@ -337,6 +333,22 @@ pub enum ImageFormat { Vector(VectorFormat), } +impl ImageFormat { + /// Try to detect the format of an image from data. + pub fn detect(data: &[u8]) -> Option { + if let Some(format) = RasterFormat::detect(data) { + return Some(Self::Raster(format)); + } + + // SVG or compressed SVG. + if data.starts_with(b"().unwrap(); - if embed.resolved_path.len() > Str::PDFA_LIMIT { + if embed.path.derived.len() > Str::PDFA_LIMIT { bail!(embed.span(), "embedded file path is too long"); } let id = embed_file(ctx, &mut chunk, embed)?; - if embedded_files.insert(embed.resolved_path.clone(), id).is_some() { + if embedded_files.insert(embed.path.derived.clone(), id).is_some() { bail!( elem.span(), - "duplicate embedded file for path `{}`", embed.resolved_path; + "duplicate embedded file for path `{}`", embed.path.derived; hint: "embedded file paths must be unique", ); } @@ -92,8 +92,8 @@ fn embed_file( embedded_file.finish(); let mut file_spec = chunk.file_spec(file_spec_dict_ref); - file_spec.path(Str(embed.resolved_path.as_bytes())); - file_spec.unic_file(TextStr(&embed.resolved_path)); + file_spec.path(Str(embed.path.derived.as_bytes())); + file_spec.unic_file(TextStr(&embed.path.derived)); file_spec .insert(Name(b"EF")) .dict() diff --git a/crates/typst-utils/src/hash.rs b/crates/typst-utils/src/hash.rs index 3dbadbe20..9687da20b 100644 --- a/crates/typst-utils/src/hash.rs +++ b/crates/typst-utils/src/hash.rs @@ -162,3 +162,74 @@ impl Debug for LazyHash { self.value.fmt(f) } } + +/// A wrapper type with a manually computed hash. +/// +/// This can be used to turn an unhashable type into a hashable one where the +/// hash is provided manually. Typically, the hash is derived from the data +/// which was used to construct to the unhashable type. +/// +/// For instance, you could hash the bytes that were parsed into an unhashable +/// data structure. +/// +/// # Equality +/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a +/// hash collision is reduced to an absolute minimum. Therefore, this type +/// additionally provides `PartialEq` and `Eq` implementations that compare by +/// hash instead of by value. For this to be correct, your hash implementation +/// **must feed all information relevant to the `PartialEq` impl to the +/// hasher.** +#[derive(Clone)] +pub struct ManuallyHash { + /// A manually computed hash. + hash: u128, + /// The underlying value. + value: T, +} + +impl ManuallyHash { + /// Wraps an item with a pre-computed hash. + /// + /// The hash should be computed with `typst_utils::hash128`. + #[inline] + pub fn new(value: T, hash: u128) -> Self { + Self { hash, value } + } + + /// Returns the wrapped value. + #[inline] + pub fn into_inner(self) -> T { + self.value + } +} + +impl Hash for ManuallyHash { + #[inline] + fn hash(&self, state: &mut H) { + state.write_u128(self.hash); + } +} + +impl Eq for ManuallyHash {} + +impl PartialEq for ManuallyHash { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash + } +} + +impl Deref for ManuallyHash { + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl Debug for ManuallyHash { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.value.fmt(f) + } +} diff --git a/crates/typst-utils/src/lib.rs b/crates/typst-utils/src/lib.rs index 61703250a..d392e4093 100644 --- a/crates/typst-utils/src/lib.rs +++ b/crates/typst-utils/src/lib.rs @@ -15,7 +15,7 @@ mod scalar; pub use self::bitset::{BitSet, SmallBitSet}; pub use self::deferred::Deferred; pub use self::duration::format_duration; -pub use self::hash::LazyHash; +pub use self::hash::{LazyHash, ManuallyHash}; pub use self::pico::{PicoStr, ResolvedPicoStr}; pub use self::round::{round_int_with_precision, round_with_precision}; pub use self::scalar::Scalar; diff --git a/tests/suite/pdf/embed.typ b/tests/suite/pdf/embed.typ index bb5c9316c..83f006d63 100644 --- a/tests/suite/pdf/embed.typ +++ b/tests/suite/pdf/embed.typ @@ -10,6 +10,16 @@ description: "Information about a secret project", ) +--- pdf-embed-bytes --- +#pdf.embed("hello.txt", read("/assets/text/hello.txt", encoding: none)) +#pdf.embed( + "a_file_name.txt", + read("/assets/text/hello.txt", encoding: none), + relationship: "supplement", + mime-type: "text/plain", + description: "A description", +) + --- pdf-embed-invalid-relationship --- #pdf.embed( "/assets/text/hello.txt", @@ -18,13 +28,3 @@ mime-type: "text/plain", description: "A test file", ) - ---- pdf-embed-decode --- -#pdf.embed.decode("hello.txt", read("/assets/text/hello.txt")) -#pdf.embed.decode( - "a_file_name.txt", - read("/assets/text/hello.txt"), - relationship: "supplement", - mime-type: "text/plain", - description: "A description", -)