Revamp data loading and deprecate decode functions (#5671)

This commit is contained in:
Laurenz 2025-01-09 10:34:16 +01:00 committed by GitHub
parent dacd6acd5e
commit e2b37fef33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 1000 additions and 722 deletions

View File

@ -211,7 +211,7 @@ fn resolve_package(
// Evaluate the manifest.
let manifest_id = FileId::new(Some(spec.clone()), VirtualPath::new("typst.toml"));
let bytes = engine.world.file(manifest_id).at(span)?;
let string = std::str::from_utf8(&bytes).map_err(FileError::from).at(span)?;
let string = bytes.as_str().map_err(FileError::from).at(span)?;
let manifest: PackageManifest = toml::from_str(string)
.map_err(|err| eco_format!("package manifest is malformed ({})", err.message()))
.at(span)?;

View File

@ -817,19 +817,8 @@ fn param_value_completions<'a>(
) {
if param.name == "font" {
ctx.font_completions();
} else if param.name == "path" {
ctx.file_completions_with_extensions(match func.name() {
Some("image") => &["png", "jpg", "jpeg", "gif", "svg", "svgz"],
Some("csv") => &["csv"],
Some("plugin") => &["wasm"],
Some("cbor") => &["cbor"],
Some("json") => &["json"],
Some("toml") => &["toml"],
Some("xml") => &["xml"],
Some("yaml") => &["yml", "yaml"],
Some("bibliography") => &["bib", "yml", "yaml"],
_ => &[],
});
} else if let Some(extensions) = path_completion(func, param) {
ctx.file_completions_with_extensions(extensions);
} else if func.name() == Some("figure") && param.name == "body" {
ctx.snippet_completion("image", "image(\"${}\"),", "An image in a figure.");
ctx.snippet_completion("table", "table(\n ${}\n),", "A table in a figure.");
@ -838,6 +827,28 @@ fn param_value_completions<'a>(
ctx.cast_completions(&param.input);
}
/// Returns which file extensions to complete for the given parameter if any.
fn path_completion(func: &Func, param: &ParamInfo) -> Option<&'static [&'static str]> {
Some(match (func.name(), param.name) {
(Some("image"), "source") => &["png", "jpg", "jpeg", "gif", "svg", "svgz"],
(Some("csv"), "source") => &["csv"],
(Some("plugin"), "source") => &["wasm"],
(Some("cbor"), "source") => &["cbor"],
(Some("json"), "source") => &["json"],
(Some("toml"), "source") => &["toml"],
(Some("xml"), "source") => &["xml"],
(Some("yaml"), "source") => &["yml", "yaml"],
(Some("bibliography"), "sources") => &["bib", "yml", "yaml"],
(Some("bibliography"), "style") => &["csl"],
(Some("cite"), "style") => &["csl"],
(Some("raw"), "syntaxes") => &["sublime-syntax"],
(Some("raw"), "theme") => &["tmtheme"],
(Some("embed"), "path") => &[],
(None, "path") => &[],
_ => return None,
})
}
/// Resolve a callee expression to a global function.
fn resolve_global_callee<'a>(
ctx: &CompletionContext<'a>,

View File

@ -1,13 +1,13 @@
use std::ffi::OsStr;
use typst_library::diag::{bail, warning, At, SourceResult, StrResult};
use typst_library::diag::{warning, At, SourceResult, StrResult};
use typst_library::engine::Engine;
use typst_library::foundations::{Packed, Smart, StyleChain};
use typst_library::foundations::{Bytes, Derived, Packed, Smart, StyleChain};
use typst_library::introspection::Locator;
use typst_library::layout::{
Abs, Axes, FixedAlignment, Frame, FrameItem, Point, Region, Size,
};
use typst_library::loading::Readable;
use typst_library::loading::DataSource;
use typst_library::text::families;
use typst_library::visualize::{
Curve, Image, ImageElem, ImageFit, ImageFormat, RasterFormat, VectorFormat,
@ -26,17 +26,17 @@ pub fn layout_image(
// Take the format that was explicitly defined, or parse the extension,
// or try to detect the format.
let data = elem.data();
let Derived { source, derived: data } = &elem.source;
let format = match elem.format(styles) {
Smart::Custom(v) => v,
Smart::Auto => determine_format(elem.path().as_str(), data).at(span)?,
Smart::Auto => determine_format(source, data).at(span)?,
};
// Warn the user if the image contains a foreign object. Not perfect
// because the svg could also be encoded, but that's an edge case.
if format == ImageFormat::Vector(VectorFormat::Svg) {
let has_foreign_object =
data.as_str().is_some_and(|s| s.contains("<foreignObject"));
data.as_str().is_ok_and(|s| s.contains("<foreignObject"));
if has_foreign_object {
engine.sink.warn(warning!(
@ -50,7 +50,7 @@ pub fn layout_image(
// Construct the image itself.
let image = Image::with_fonts(
data.clone().into_bytes(),
data.clone(),
format,
elem.alt(styles),
engine.world,
@ -119,25 +119,23 @@ pub fn layout_image(
Ok(frame)
}
/// Determine the image format based on path and data.
fn determine_format(path: &str, data: &Readable) -> StrResult<ImageFormat> {
let ext = std::path::Path::new(path)
.extension()
.and_then(OsStr::to_str)
.unwrap_or_default()
.to_lowercase();
/// Try to determine the image format based on the data.
fn determine_format(source: &DataSource, data: &Bytes) -> StrResult<ImageFormat> {
if let DataSource::Path(path) = source {
let ext = std::path::Path::new(path.as_str())
.extension()
.and_then(OsStr::to_str)
.unwrap_or_default()
.to_lowercase();
Ok(match ext.as_str() {
"png" => ImageFormat::Raster(RasterFormat::Png),
"jpg" | "jpeg" => ImageFormat::Raster(RasterFormat::Jpg),
"gif" => ImageFormat::Raster(RasterFormat::Gif),
"svg" | "svgz" => ImageFormat::Vector(VectorFormat::Svg),
_ => match &data {
Readable::Str(_) => ImageFormat::Vector(VectorFormat::Svg),
Readable::Bytes(bytes) => match RasterFormat::detect(bytes) {
Some(f) => ImageFormat::Raster(f),
None => bail!("unknown image format"),
},
},
})
match ext.as_str() {
"png" => return Ok(ImageFormat::Raster(RasterFormat::Png)),
"jpg" | "jpeg" => return Ok(ImageFormat::Raster(RasterFormat::Jpg)),
"gif" => return Ok(ImageFormat::Raster(RasterFormat::Gif)),
"svg" | "svgz" => return Ok(ImageFormat::Vector(VectorFormat::Svg)),
_ => {}
}
}
Ok(ImageFormat::detect(data).ok_or("unknown image format")?)
}

View File

@ -1124,6 +1124,53 @@ impl<T: FromValue, const N: usize> FromValue for SmallVec<[T; N]> {
}
}
/// One element, or multiple provided as an array.
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct OneOrMultiple<T>(pub Vec<T>);
impl<T: Reflect> Reflect for OneOrMultiple<T> {
fn input() -> CastInfo {
T::input() + Array::input()
}
fn output() -> CastInfo {
T::output() + Array::output()
}
fn castable(value: &Value) -> bool {
Array::castable(value) || T::castable(value)
}
}
impl<T: IntoValue + Clone> IntoValue for OneOrMultiple<T> {
fn into_value(self) -> Value {
self.0.into_value()
}
}
impl<T: FromValue> FromValue for OneOrMultiple<T> {
fn from_value(value: Value) -> HintedStrResult<Self> {
if T::castable(&value) {
return Ok(Self(vec![T::from_value(value)?]));
}
if Array::castable(&value) {
return Ok(Self(
Array::from_value(value)?
.into_iter()
.map(|value| T::from_value(value))
.collect::<HintedStrResult<_>>()?,
));
}
Err(Self::error(&value))
}
}
impl<T> Default for OneOrMultiple<T> {
fn default() -> Self {
Self(vec![])
}
}
/// The error message when the array is empty.
#[cold]
fn array_is_empty() -> EcoString {

View File

@ -2,6 +2,7 @@ use std::any::Any;
use std::fmt::{self, Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Add, AddAssign, Deref};
use std::str::Utf8Error;
use std::sync::Arc;
use ecow::{eco_format, EcoString};
@ -80,16 +81,37 @@ impl Bytes {
self.as_slice().is_empty()
}
/// Return a view into the buffer.
/// Return a view into the bytes.
pub fn as_slice(&self) -> &[u8] {
self
}
/// Return a copy of the buffer as a vector.
/// Try to view the bytes as an UTF-8 string.
///
/// If these bytes were created via `Bytes::from_string`, UTF-8 validation
/// is skipped.
pub fn as_str(&self) -> Result<&str, Utf8Error> {
self.inner().as_str()
}
/// Return a copy of the bytes as a vector.
pub fn to_vec(&self) -> Vec<u8> {
self.as_slice().to_vec()
}
/// Try to turn the bytes into a `Str`.
///
/// - If these bytes were created via `Bytes::from_string::<Str>`, the
/// string is cloned directly.
/// - If these bytes were created via `Bytes::from_string`, but from a
/// different type of string, UTF-8 validation is still skipped.
pub fn to_str(&self) -> Result<Str, Utf8Error> {
match self.inner().as_any().downcast_ref::<Str>() {
Some(string) => Ok(string.clone()),
None => self.as_str().map(Into::into),
}
}
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len()))
@ -104,6 +126,11 @@ impl Bytes {
if index >= 0 { Some(index) } else { (len as i64).checked_add(index) };
wrapped.and_then(|v| usize::try_from(v).ok()).filter(|&v| v <= len)
}
/// Access the inner `dyn Bytelike`.
fn inner(&self) -> &dyn Bytelike {
&**self.0
}
}
#[scope]
@ -203,7 +230,7 @@ impl Deref for Bytes {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.0.as_bytes()
self.inner().as_bytes()
}
}
@ -262,6 +289,8 @@ impl Serialize for Bytes {
/// Any type that can back a byte buffer.
trait Bytelike: Send + Sync {
fn as_bytes(&self) -> &[u8];
fn as_str(&self) -> Result<&str, Utf8Error>;
fn as_any(&self) -> &dyn Any;
fn as_any_mut(&mut self) -> &mut dyn Any;
}
@ -273,6 +302,14 @@ where
self.as_ref()
}
fn as_str(&self) -> Result<&str, Utf8Error> {
std::str::from_utf8(self.as_ref())
}
fn as_any(&self) -> &dyn Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
@ -295,6 +332,14 @@ where
self.0.as_ref().as_bytes()
}
fn as_str(&self) -> Result<&str, Utf8Error> {
Ok(self.0.as_ref())
}
fn as_any(&self) -> &dyn Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}

View File

@ -13,7 +13,9 @@ use typst_syntax::{Span, Spanned};
use unicode_math_class::MathClass;
use crate::diag::{At, HintedStrResult, HintedString, SourceResult, StrResult};
use crate::foundations::{array, repr, NativeElement, Packed, Repr, Str, Type, Value};
use crate::foundations::{
array, repr, Fold, NativeElement, Packed, Repr, Str, Type, Value,
};
/// Determine details of a type.
///
@ -497,3 +499,58 @@ cast! {
/// An operator that can be both unary or binary like `+`.
"vary" => MathClass::Vary,
}
/// A type that contains a user-visible source portion and something that is
/// derived from it, but not user-visible.
///
/// An example usage would be `source` being a `DataSource` and `derived` a
/// TextMate theme parsed from it. With `Derived`, we can store both parts in
/// the `RawElem::theme` field and get automatic nice `Reflect` and `IntoValue`
/// impls.
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)]
pub struct Derived<S, D> {
/// The source portion.
pub source: S,
/// The derived portion.
pub derived: D,
}
impl<S, D> Derived<S, D> {
/// Create a new instance from the `source` and the `derived` data.
pub fn new(source: S, derived: D) -> Self {
Self { source, derived }
}
}
impl<S: Reflect, D> Reflect for Derived<S, D> {
fn input() -> CastInfo {
S::input()
}
fn output() -> CastInfo {
S::output()
}
fn castable(value: &Value) -> bool {
S::castable(value)
}
fn error(found: &Value) -> HintedString {
S::error(found)
}
}
impl<S: IntoValue, D> IntoValue for Derived<S, D> {
fn into_value(self) -> Value {
self.source.into_value()
}
}
impl<S: Fold, D: Fold> Fold for Derived<S, D> {
fn fold(self, outer: Self) -> Self {
Self {
source: self.source.fold(outer.source),
derived: self.derived.fold(outer.derived),
}
}
}

View File

@ -9,7 +9,7 @@ use wasmi::{AsContext, AsContextMut};
use crate::diag::{bail, At, SourceResult, StrResult};
use crate::engine::Engine;
use crate::foundations::{func, repr, scope, ty, Bytes};
use crate::World;
use crate::loading::{DataSource, Load};
/// A WebAssembly plugin.
///
@ -154,15 +154,13 @@ impl Plugin {
pub fn construct(
/// The engine.
engine: &mut Engine,
/// Path to a WebAssembly file.
/// A path to a WebAssembly file or raw WebAssembly bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
) -> SourceResult<Plugin> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
Plugin::new(data).at(span)
let data = source.load(engine.world)?;
Plugin::new(data).at(source.span)
}
}

View File

@ -784,11 +784,7 @@ cast! {
v: f64 => Self::Str(repr::display_float(v).into()),
v: Decimal => Self::Str(format_str!("{}", v)),
v: Version => Self::Str(format_str!("{}", v)),
v: Bytes => Self::Str(
std::str::from_utf8(&v)
.map_err(|_| "bytes are not valid utf-8")?
.into()
),
v: Bytes => Self::Str(v.to_str().map_err(|_| "bytes are not valid utf-8")?),
v: Label => Self::Str(v.resolve().as_str().into()),
v: Type => Self::Str(v.long_name().into()),
v: Str => Self::Str(v),

View File

@ -12,7 +12,8 @@ use typst_utils::LazyHash;
use crate::diag::{SourceResult, Trace, Tracepoint};
use crate::engine::Engine;
use crate::foundations::{
cast, ty, Content, Context, Element, Func, NativeElement, Repr, Selector,
cast, ty, Content, Context, Element, Func, NativeElement, OneOrMultiple, Repr,
Selector,
};
use crate::text::{FontFamily, FontList, TextElem};
@ -939,6 +940,13 @@ impl<T, const N: usize> Fold for SmallVec<[T; N]> {
}
}
impl<T> Fold for OneOrMultiple<T> {
fn fold(self, mut outer: Self) -> Self {
outer.0.extend(self.0);
outer
}
}
/// A variant of fold for foldable optional (`Option<T>`) values where an inner
/// `None` value isn't respected (contrary to `Option`'s usual `Fold`
/// implementation, with which folding with an inner `None` always returns

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoString};
use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Bytes, Value};
use crate::World;
use crate::loading::{DataSource, Load};
/// Reads structured data from a CBOR file.
///
@ -21,29 +21,31 @@ use crate::World;
pub fn cbor(
/// The engine.
engine: &mut Engine,
/// Path to a CBOR file.
/// A path to a CBOR file or raw CBOR bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
cbor::decode(Spanned::new(data, span))
let data = source.load(engine.world)?;
ciborium::from_reader(data.as_slice())
.map_err(|err| eco_format!("failed to parse CBOR ({err})"))
.at(source.span)
}
#[scope]
impl cbor {
/// Reads structured data from CBOR bytes.
///
/// This function is deprecated. The [`cbor`] function now accepts bytes
/// directly.
#[func(title = "Decode CBOR")]
pub fn decode(
/// cbor data.
/// The engine.
engine: &mut Engine,
/// CBOR data.
data: Spanned<Bytes>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
ciborium::from_reader(data.as_slice())
.map_err(|err| eco_format!("failed to parse CBOR ({err})"))
.at(span)
cbor(engine, data.map(DataSource::Bytes))
}
/// Encode structured data into CBOR bytes.

View File

@ -4,8 +4,7 @@ use typst_syntax::Spanned;
use crate::diag::{bail, At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
use crate::loading::Readable;
use crate::World;
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a CSV file.
///
@ -28,10 +27,10 @@ use crate::World;
pub fn csv(
/// The engine.
engine: &mut Engine,
/// Path to a CSV file.
/// Path to a CSV file or raw CSV bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
/// The delimiter that separates columns in the CSV file.
/// Must be a single ASCII character.
#[named]
@ -48,17 +47,63 @@ pub fn csv(
#[default(RowType::Array)]
row_type: RowType,
) -> SourceResult<Array> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
self::csv::decode(Spanned::new(Readable::Bytes(data), span), delimiter, row_type)
let data = source.load(engine.world)?;
let mut builder = ::csv::ReaderBuilder::new();
let has_headers = row_type == RowType::Dict;
builder.has_headers(has_headers);
builder.delimiter(delimiter.0 as u8);
// Counting lines from 1 by default.
let mut line_offset: usize = 1;
let mut reader = builder.from_reader(data.as_slice());
let mut headers: Option<::csv::StringRecord> = None;
if has_headers {
// Counting lines from 2 because we have a header.
line_offset += 1;
headers = Some(
reader
.headers()
.map_err(|err| format_csv_error(err, 1))
.at(source.span)?
.clone(),
);
}
let mut array = Array::new();
for (line, result) in reader.records().enumerate() {
// Original solution was to use line from error, but that is
// incorrect with `has_headers` set to `false`. See issue:
// https://github.com/BurntSushi/rust-csv/issues/184
let line = line + line_offset;
let row = result.map_err(|err| format_csv_error(err, line)).at(source.span)?;
let item = if let Some(headers) = &headers {
let mut dict = Dict::new();
for (field, value) in headers.iter().zip(&row) {
dict.insert(field.into(), value.into_value());
}
dict.into_value()
} else {
let sub = row.into_iter().map(|field| field.into_value()).collect();
Value::Array(sub)
};
array.push(item);
}
Ok(array)
}
#[scope]
impl csv {
/// Reads structured data from a CSV string/bytes.
///
/// This function is deprecated. The [`csv`] function now accepts bytes
/// directly.
#[func(title = "Decode CSV")]
pub fn decode(
/// The engine.
engine: &mut Engine,
/// CSV data.
data: Spanned<Readable>,
/// The delimiter that separates columns in the CSV file.
@ -77,51 +122,7 @@ impl csv {
#[default(RowType::Array)]
row_type: RowType,
) -> SourceResult<Array> {
let Spanned { v: data, span } = data;
let has_headers = row_type == RowType::Dict;
let mut builder = ::csv::ReaderBuilder::new();
builder.has_headers(has_headers);
builder.delimiter(delimiter.0 as u8);
// Counting lines from 1 by default.
let mut line_offset: usize = 1;
let mut reader = builder.from_reader(data.as_slice());
let mut headers: Option<::csv::StringRecord> = None;
if has_headers {
// Counting lines from 2 because we have a header.
line_offset += 1;
headers = Some(
reader
.headers()
.map_err(|err| format_csv_error(err, 1))
.at(span)?
.clone(),
);
}
let mut array = Array::new();
for (line, result) in reader.records().enumerate() {
// Original solution was to use line from error, but that is
// incorrect with `has_headers` set to `false`. See issue:
// https://github.com/BurntSushi/rust-csv/issues/184
let line = line + line_offset;
let row = result.map_err(|err| format_csv_error(err, line)).at(span)?;
let item = if let Some(headers) = &headers {
let mut dict = Dict::new();
for (field, value) in headers.iter().zip(&row) {
dict.insert(field.into(), value.into_value());
}
dict.into_value()
} else {
let sub = row.into_iter().map(|field| field.into_value()).collect();
Value::Array(sub)
};
array.push(item);
}
Ok(array)
csv(engine, data.map(Readable::into_source), delimiter, row_type)
}
}

View File

@ -1,11 +1,10 @@
use ecow::{eco_format, EcoString};
use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::Readable;
use crate::World;
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a JSON file.
///
@ -53,29 +52,31 @@ use crate::World;
pub fn json(
/// The engine.
engine: &mut Engine,
/// Path to a JSON file.
/// Path to a JSON file or raw JSON bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
json::decode(Spanned::new(Readable::Bytes(data), span))
let data = source.load(engine.world)?;
serde_json::from_slice(data.as_slice())
.map_err(|err| eco_format!("failed to parse JSON ({err})"))
.at(source.span)
}
#[scope]
impl json {
/// Reads structured data from a JSON string/bytes.
///
/// This function is deprecated. The [`json`] function now accepts bytes
/// directly.
#[func(title = "Decode JSON")]
pub fn decode(
/// The engine.
engine: &mut Engine,
/// JSON data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
serde_json::from_slice(data.as_slice())
.map_err(|err| eco_format!("failed to parse JSON ({err})"))
.at(span)
json(engine, data.map(Readable::into_source))
}
/// Encodes structured data into a JSON string.

View File

@ -15,6 +15,10 @@ mod xml_;
#[path = "yaml.rs"]
mod yaml_;
use comemo::Tracked;
use ecow::EcoString;
use typst_syntax::Spanned;
pub use self::cbor_::*;
pub use self::csv_::*;
pub use self::json_::*;
@ -23,7 +27,10 @@ pub use self::toml_::*;
pub use self::xml_::*;
pub use self::yaml_::*;
use crate::diag::{At, SourceResult};
use crate::foundations::OneOrMultiple;
use crate::foundations::{cast, category, Bytes, Category, Scope, Str};
use crate::World;
/// Data loading from external files.
///
@ -44,6 +51,76 @@ pub(super) fn define(global: &mut Scope) {
global.define_func::<xml>();
}
/// Something we can retrieve byte data from.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum DataSource {
/// A path to a file.
Path(EcoString),
/// Raw bytes.
Bytes(Bytes),
}
cast! {
DataSource,
self => match self {
Self::Path(v) => v.into_value(),
Self::Bytes(v) => v.into_value(),
},
v: EcoString => Self::Path(v),
v: Bytes => Self::Bytes(v),
}
/// Loads data from a path or provided bytes.
pub trait Load {
/// Bytes or a list of bytes (if there are multiple sources).
type Output;
/// Load the bytes.
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Self::Output>;
}
impl Load for Spanned<DataSource> {
type Output = Bytes;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> {
self.as_ref().load(world)
}
}
impl Load for Spanned<&DataSource> {
type Output = Bytes;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Bytes> {
match &self.v {
DataSource::Path(path) => {
let file_id = self.span.resolve_path(path).at(self.span)?;
world.file(file_id).at(self.span)
}
DataSource::Bytes(bytes) => Ok(bytes.clone()),
}
}
}
impl Load for Spanned<OneOrMultiple<DataSource>> {
type Output = Vec<Bytes>;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> {
self.as_ref().load(world)
}
}
impl Load for Spanned<&OneOrMultiple<DataSource>> {
type Output = Vec<Bytes>;
fn load(&self, world: Tracked<dyn World + '_>) -> SourceResult<Vec<Bytes>> {
self.v
.0
.iter()
.map(|source| Spanned::new(source, self.span).load(world))
.collect()
}
}
/// A value that can be read from a file.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum Readable {
@ -54,26 +131,16 @@ pub enum Readable {
}
impl Readable {
pub fn as_slice(&self) -> &[u8] {
match self {
Self::Bytes(v) => v,
Self::Str(v) => v.as_bytes(),
}
}
pub fn as_str(&self) -> Option<&str> {
match self {
Self::Str(v) => Some(v.as_str()),
Self::Bytes(v) => std::str::from_utf8(v).ok(),
}
}
pub fn into_bytes(self) -> Bytes {
match self {
Self::Bytes(v) => v,
Self::Str(v) => Bytes::from_string(v),
}
}
pub fn into_source(self) -> DataSource {
DataSource::Bytes(self.into_bytes())
}
}
cast! {

View File

@ -1,7 +1,7 @@
use ecow::EcoString;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::diag::{At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, Cast};
use crate::loading::Readable;
@ -42,12 +42,9 @@ pub fn read(
let data = engine.world.file(id).at(span)?;
Ok(match encoding {
None => Readable::Bytes(data),
Some(Encoding::Utf8) => Readable::Str(
std::str::from_utf8(&data)
.map_err(|_| "file is not valid utf-8")
.at(span)?
.into(),
),
Some(Encoding::Utf8) => {
Readable::Str(data.to_str().map_err(FileError::from).at(span)?)
}
})
}

View File

@ -1,11 +1,10 @@
use ecow::{eco_format, EcoString};
use typst_syntax::{is_newline, Spanned};
use crate::diag::{At, SourceResult};
use crate::diag::{At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::Readable;
use crate::World;
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a TOML file.
///
@ -31,32 +30,32 @@ use crate::World;
pub fn toml(
/// The engine.
engine: &mut Engine,
/// Path to a TOML file.
/// A path to a TOML file or raw TOML bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
toml::decode(Spanned::new(Readable::Bytes(data), span))
let data = source.load(engine.world)?;
let raw = data.as_str().map_err(FileError::from).at(source.span)?;
::toml::from_str(raw)
.map_err(|err| format_toml_error(err, raw))
.at(source.span)
}
#[scope]
impl toml {
/// Reads structured data from a TOML string/bytes.
///
/// This function is deprecated. The [`toml`] function now accepts bytes
/// directly.
#[func(title = "Decode TOML")]
pub fn decode(
/// The engine.
engine: &mut Engine,
/// TOML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let raw = std::str::from_utf8(data.as_slice())
.map_err(|_| "file is not valid utf-8")
.at(span)?;
::toml::from_str(raw)
.map_err(|err| format_toml_error(err, raw))
.at(span)
toml(engine, data.map(Readable::into_source))
}
/// Encodes structured data into a TOML string.

View File

@ -5,8 +5,7 @@ use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
use crate::loading::Readable;
use crate::World;
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from an XML file.
///
@ -60,36 +59,36 @@ use crate::World;
pub fn xml(
/// The engine.
engine: &mut Engine,
/// Path to an XML file.
/// A path to an XML file or raw XML bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
xml::decode(Spanned::new(Readable::Bytes(data), span))
let data = source.load(engine.world)?;
let text = data.as_str().map_err(FileError::from).at(source.span)?;
let document = roxmltree::Document::parse_with_options(
text,
ParsingOptions { allow_dtd: true, ..Default::default() },
)
.map_err(format_xml_error)
.at(source.span)?;
Ok(convert_xml(document.root()))
}
#[scope]
impl xml {
/// Reads structured data from an XML string/bytes.
///
/// This function is deprecated. The [`xml`] function now accepts bytes
/// directly.
#[func(title = "Decode XML")]
pub fn decode(
/// The engine.
engine: &mut Engine,
/// XML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let text = std::str::from_utf8(data.as_slice())
.map_err(FileError::from)
.at(span)?;
let document = roxmltree::Document::parse_with_options(
text,
ParsingOptions { allow_dtd: true, ..Default::default() },
)
.map_err(format_xml_error)
.at(span)?;
Ok(convert_xml(document.root()))
xml(engine, data.map(Readable::into_source))
}
}

View File

@ -1,11 +1,10 @@
use ecow::{eco_format, EcoString};
use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::Readable;
use crate::World;
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a YAML file.
///
@ -43,29 +42,31 @@ use crate::World;
pub fn yaml(
/// The engine.
engine: &mut Engine,
/// Path to a YAML file.
/// A path to a YAML file or raw YAML bytes.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
/// For more details about paths, see the [Paths section]($syntax/#paths).
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
yaml::decode(Spanned::new(Readable::Bytes(data), span))
let data = source.load(engine.world)?;
serde_yaml::from_slice(data.as_slice())
.map_err(|err| eco_format!("failed to parse YAML ({err})"))
.at(source.span)
}
#[scope]
impl yaml {
/// Reads structured data from a YAML string/bytes.
///
/// This function is deprecated. The [`yaml`] function now accepts bytes
/// directly.
#[func(title = "Decode YAML")]
pub fn decode(
/// The engine.
engine: &mut Engine,
/// YAML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
serde_yaml::from_slice(data.as_slice())
.map_err(|err| eco_format!("failed to parse YAML ({err})"))
.at(span)
yaml(engine, data.map(Readable::into_source))
}
/// Encode structured data into a YAML string.

View File

@ -1,7 +1,7 @@
use std::any::TypeId;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fmt::{self, Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::num::NonZeroUsize;
use std::path::Path;
use std::sync::{Arc, LazyLock};
@ -12,26 +12,26 @@ use hayagriva::archive::ArchivedStyle;
use hayagriva::io::BibLaTeXError;
use hayagriva::{
citationberg, BibliographyDriver, BibliographyRequest, CitationItem, CitationRequest,
SpecificLocator,
Library, SpecificLocator,
};
use indexmap::IndexMap;
use smallvec::{smallvec, SmallVec};
use typed_arena::Arena;
use typst_syntax::{Span, Spanned};
use typst_utils::{LazyHash, NonZeroExt, PicoStr};
use typst_utils::{ManuallyHash, NonZeroExt, PicoStr};
use crate::diag::{bail, error, At, FileError, HintedStrResult, SourceResult, StrResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, ty, Args, Array, Bytes, CastInfo, Content, FromValue, IntoValue, Label,
NativeElement, Packed, Reflect, Repr, Scope, Show, ShowSet, Smart, Str, StyleChain,
Styles, Synthesize, Type, Value,
elem, Bytes, CastInfo, Content, Derived, FromValue, IntoValue, Label, NativeElement,
OneOrMultiple, Packed, Reflect, Scope, Show, ShowSet, Smart, StyleChain, Styles,
Synthesize, Value,
};
use crate::introspection::{Introspector, Locatable, Location};
use crate::layout::{
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
Sizing, TrackSizings, VElem,
};
use crate::loading::{DataSource, Load};
use crate::model::{
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
Url,
@ -86,13 +86,20 @@ use crate::World;
/// ```
#[elem(Locatable, Synthesize, Show, ShowSet, LocalName)]
pub struct BibliographyElem {
/// Path(s) to Hayagriva `.yml` and/or BibLaTeX `.bib` files.
/// One or multiple paths to or raw bytes for Hayagriva `.yml` and/or
/// BibLaTeX `.bib` files.
///
/// This can be a:
/// - A path string to load a bibliography file from the given path. For
/// more details about paths, see the [Paths section]($syntax/#paths).
/// - Raw bytes from which the bibliography should be decoded.
/// - An array where each item is one the above.
#[required]
#[parse(
let (paths, bibliography) = Bibliography::parse(engine, args)?;
paths
let sources = args.expect("sources")?;
Bibliography::load(engine.world, sources)?
)]
pub path: BibliographyPaths,
pub sources: Derived<OneOrMultiple<DataSource>, Bibliography>,
/// The title of the bibliography.
///
@ -116,19 +123,22 @@ pub struct BibliographyElem {
/// The bibliography style.
///
/// Should be either one of the built-in styles (see below) or a path to
/// a [CSL file](https://citationstyles.org/). Some of the styles listed
/// below appear twice, once with their full name and once with a short
/// alias.
#[parse(CslStyle::parse(engine, args)?)]
#[default(CslStyle::from_name("ieee").unwrap())]
pub style: CslStyle,
/// The loaded bibliography.
#[internal]
#[required]
#[parse(bibliography)]
pub bibliography: Bibliography,
/// This can be:
/// - A string with the name of one of the built-in styles (see below). Some
/// of the styles listed below appear twice, once with their full name and
/// once with a short alias.
/// - A path string to a [CSL file](https://citationstyles.org/). For more
/// details about paths, see the [Paths section]($syntax/#paths).
/// - Raw bytes from which a CSL style should be decoded.
#[parse(match args.named::<Spanned<CslSource>>("style")? {
Some(source) => Some(CslStyle::load(engine.world, source)?),
None => None,
})]
#[default({
let default = ArchivedStyle::InstituteOfElectricalAndElectronicsEngineers;
Derived::new(CslSource::Named(default), CslStyle::from_archived(default))
})]
pub style: Derived<CslSource, CslStyle>,
/// The language setting where the bibliography is.
#[internal]
@ -141,17 +151,6 @@ pub struct BibliographyElem {
pub region: Option<Region>,
}
/// A list of bibliography file paths.
#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
pub struct BibliographyPaths(Vec<EcoString>);
cast! {
BibliographyPaths,
self => self.0.into_value(),
v: EcoString => Self(vec![v]),
v: Array => Self(v.into_iter().map(Value::cast).collect::<HintedStrResult<_>>()?),
}
impl BibliographyElem {
/// Find the document's bibliography.
pub fn find(introspector: Tracked<Introspector>) -> StrResult<Packed<Self>> {
@ -169,13 +168,12 @@ impl BibliographyElem {
}
/// Whether the bibliography contains the given key.
pub fn has(engine: &Engine, key: impl Into<PicoStr>) -> bool {
let key = key.into();
pub fn has(engine: &Engine, key: Label) -> bool {
engine
.introspector
.query(&Self::elem().select())
.iter()
.any(|elem| elem.to_packed::<Self>().unwrap().bibliography().has(key))
.any(|elem| elem.to_packed::<Self>().unwrap().sources.derived.has(key))
}
/// Find all bibliography keys.
@ -183,9 +181,9 @@ impl BibliographyElem {
let mut vec = vec![];
for elem in introspector.query(&Self::elem().select()).iter() {
let this = elem.to_packed::<Self>().unwrap();
for (key, entry) in this.bibliography().iter() {
for (key, entry) in this.sources.derived.iter() {
let detail = entry.title().map(|title| title.value.to_str().into());
vec.push((Label::new(key), detail))
vec.push((key, detail))
}
}
vec
@ -282,63 +280,35 @@ impl LocalName for Packed<BibliographyElem> {
}
/// A loaded bibliography.
#[derive(Clone, PartialEq)]
pub struct Bibliography {
map: Arc<IndexMap<PicoStr, hayagriva::Entry>>,
hash: u128,
}
#[derive(Clone, PartialEq, Hash)]
pub struct Bibliography(Arc<ManuallyHash<IndexMap<Label, hayagriva::Entry>>>);
impl Bibliography {
/// Parse the bibliography argument.
fn parse(
engine: &mut Engine,
args: &mut Args,
) -> SourceResult<(BibliographyPaths, Bibliography)> {
let Spanned { v: paths, span } =
args.expect::<Spanned<BibliographyPaths>>("path to bibliography file")?;
// Load bibliography files.
let data = paths
.0
.iter()
.map(|path| {
let id = span.resolve_path(path).at(span)?;
engine.world.file(id).at(span)
})
.collect::<SourceResult<Vec<Bytes>>>()?;
// Parse.
let bibliography = Self::load(&paths, &data).at(span)?;
Ok((paths, bibliography))
/// Load a bibliography from data sources.
fn load(
world: Tracked<dyn World + '_>,
sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Self>> {
let data = sources.load(world)?;
let bibliography = Self::decode(&sources.v, &data).at(sources.span)?;
Ok(Derived::new(sources.v, bibliography))
}
/// Load bibliography entries from paths.
/// Decode a bibliography from loaded data sources.
#[comemo::memoize]
#[typst_macros::time(name = "load bibliography")]
fn load(paths: &BibliographyPaths, data: &[Bytes]) -> StrResult<Bibliography> {
fn decode(
sources: &OneOrMultiple<DataSource>,
data: &[Bytes],
) -> StrResult<Bibliography> {
let mut map = IndexMap::new();
let mut duplicates = Vec::<EcoString>::new();
// We might have multiple bib/yaml files
for (path, bytes) in paths.0.iter().zip(data) {
let src = std::str::from_utf8(bytes).map_err(FileError::from)?;
let ext = Path::new(path.as_str())
.extension()
.and_then(OsStr::to_str)
.unwrap_or_default();
let library = match ext.to_lowercase().as_str() {
"yml" | "yaml" => hayagriva::io::from_yaml_str(src)
.map_err(|err| eco_format!("failed to parse YAML ({err})"))?,
"bib" => hayagriva::io::from_biblatex_str(src)
.map_err(|errors| format_biblatex_error(path, src, errors))?,
_ => bail!("unknown bibliography format (must be .yml/.yaml or .bib)"),
};
for (source, data) in sources.0.iter().zip(data) {
let library = decode_library(source, data)?;
for entry in library {
match map.entry(PicoStr::intern(entry.key())) {
match map.entry(Label::new(PicoStr::intern(entry.key()))) {
indexmap::map::Entry::Vacant(vacant) => {
vacant.insert(entry);
}
@ -353,182 +323,210 @@ impl Bibliography {
bail!("duplicate bibliography keys: {}", duplicates.join(", "));
}
Ok(Bibliography {
map: Arc::new(map),
hash: typst_utils::hash128(data),
})
Ok(Bibliography(Arc::new(ManuallyHash::new(map, typst_utils::hash128(data)))))
}
fn has(&self, key: impl Into<PicoStr>) -> bool {
self.map.contains_key(&key.into())
fn has(&self, key: Label) -> bool {
self.0.contains_key(&key)
}
fn iter(&self) -> impl Iterator<Item = (PicoStr, &hayagriva::Entry)> {
self.map.iter().map(|(&k, v)| (k, v))
fn get(&self, key: Label) -> Option<&hayagriva::Entry> {
self.0.get(&key)
}
fn iter(&self) -> impl Iterator<Item = (Label, &hayagriva::Entry)> {
self.0.iter().map(|(&k, v)| (k, v))
}
}
impl Debug for Bibliography {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.debug_set().entries(self.map.keys()).finish()
f.debug_set().entries(self.0.keys()).finish()
}
}
impl Hash for Bibliography {
fn hash<H: Hasher>(&self, state: &mut H) {
self.hash.hash(state);
/// Decode on library from one data source.
fn decode_library(source: &DataSource, data: &Bytes) -> StrResult<Library> {
let src = data.as_str().map_err(FileError::from)?;
if let DataSource::Path(path) = source {
// If we got a path, use the extension to determine whether it is
// YAML or BibLaTeX.
let ext = Path::new(path.as_str())
.extension()
.and_then(OsStr::to_str)
.unwrap_or_default();
match ext.to_lowercase().as_str() {
"yml" | "yaml" => hayagriva::io::from_yaml_str(src)
.map_err(|err| eco_format!("failed to parse YAML ({err})")),
"bib" => hayagriva::io::from_biblatex_str(src)
.map_err(|errors| format_biblatex_error(src, Some(path), errors)),
_ => bail!("unknown bibliography format (must be .yml/.yaml or .bib)"),
}
} else {
// If we just got bytes, we need to guess. If it can be decoded as
// hayagriva YAML, we'll use that.
let haya_err = match hayagriva::io::from_yaml_str(src) {
Ok(library) => return Ok(library),
Err(err) => err,
};
// If it can be decoded as BibLaTeX, we use that isntead.
let bib_errs = match hayagriva::io::from_biblatex_str(src) {
Ok(library) => return Ok(library),
Err(err) => err,
};
// If neither decoded correctly, check whether `:` or `{` appears
// more often to guess whether it's more likely to be YAML or BibLaTeX
// and emit the more appropriate error.
let mut yaml = 0;
let mut biblatex = 0;
for c in src.chars() {
match c {
':' => yaml += 1,
'{' => biblatex += 1,
_ => {}
}
}
if yaml > biblatex {
bail!("failed to parse YAML ({haya_err})")
} else {
Err(format_biblatex_error(src, None, bib_errs))
}
}
}
/// Format a BibLaTeX loading error.
fn format_biblatex_error(path: &str, src: &str, errors: Vec<BibLaTeXError>) -> EcoString {
fn format_biblatex_error(
src: &str,
path: Option<&str>,
errors: Vec<BibLaTeXError>,
) -> EcoString {
let Some(error) = errors.first() else {
return eco_format!("failed to parse BibLaTeX file ({path})");
return match path {
Some(path) => eco_format!("failed to parse BibLaTeX file ({path})"),
None => eco_format!("failed to parse BibLaTeX"),
};
};
let (span, msg) = match error {
BibLaTeXError::Parse(error) => (&error.span, error.kind.to_string()),
BibLaTeXError::Type(error) => (&error.span, error.kind.to_string()),
};
let line = src.get(..span.start).unwrap_or_default().lines().count();
eco_format!("failed to parse BibLaTeX file ({path}:{line}: {msg})")
match path {
Some(path) => eco_format!("failed to parse BibLaTeX file ({path}:{line}: {msg})"),
None => eco_format!("failed to parse BibLaTeX ({line}: {msg})"),
}
}
/// A loaded CSL style.
#[ty(cast)]
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct CslStyle {
name: Option<EcoString>,
style: Arc<LazyHash<citationberg::IndependentStyle>>,
}
pub struct CslStyle(Arc<ManuallyHash<citationberg::IndependentStyle>>);
impl CslStyle {
/// Parse the style argument.
pub fn parse(engine: &mut Engine, args: &mut Args) -> SourceResult<Option<CslStyle>> {
let Some(Spanned { v: string, span }) =
args.named::<Spanned<EcoString>>("style")?
else {
return Ok(None);
};
Ok(Some(Self::parse_impl(engine, &string, span).at(span)?))
}
/// Parse the style argument with `Smart`.
pub fn parse_smart(
engine: &mut Engine,
args: &mut Args,
) -> SourceResult<Option<Smart<CslStyle>>> {
let Some(Spanned { v: smart, span }) =
args.named::<Spanned<Smart<EcoString>>>("style")?
else {
return Ok(None);
};
Ok(Some(match smart {
Smart::Auto => Smart::Auto,
Smart::Custom(string) => {
Smart::Custom(Self::parse_impl(engine, &string, span).at(span)?)
/// Load a CSL style from a data source.
pub fn load(
world: Tracked<dyn World + '_>,
Spanned { v: source, span }: Spanned<CslSource>,
) -> SourceResult<Derived<CslSource, Self>> {
let style = match &source {
CslSource::Named(style) => Self::from_archived(*style),
CslSource::Normal(source) => {
let data = Spanned::new(source, span).load(world)?;
Self::from_data(data).at(span)?
}
}))
}
/// Parse internally.
fn parse_impl(engine: &mut Engine, string: &str, span: Span) -> StrResult<CslStyle> {
let ext = Path::new(string)
.extension()
.and_then(OsStr::to_str)
.unwrap_or_default()
.to_lowercase();
if ext == "csl" {
let id = span.resolve_path(string)?;
let data = engine.world.file(id)?;
CslStyle::from_data(&data)
} else {
CslStyle::from_name(string)
}
};
Ok(Derived::new(source, style))
}
/// Load a built-in CSL style.
#[comemo::memoize]
pub fn from_name(name: &str) -> StrResult<CslStyle> {
match hayagriva::archive::ArchivedStyle::by_name(name).map(ArchivedStyle::get) {
Some(citationberg::Style::Independent(style)) => Ok(Self {
name: Some(name.into()),
style: Arc::new(LazyHash::new(style)),
}),
_ => bail!("unknown style: `{name}`"),
pub fn from_archived(archived: ArchivedStyle) -> CslStyle {
match archived.get() {
citationberg::Style::Independent(style) => Self(Arc::new(ManuallyHash::new(
style,
typst_utils::hash128(&(TypeId::of::<ArchivedStyle>(), archived)),
))),
// Ensured by `test_bibliography_load_builtin_styles`.
_ => unreachable!("archive should not contain dependant styles"),
}
}
/// Load a CSL style from file contents.
#[comemo::memoize]
pub fn from_data(data: &Bytes) -> StrResult<CslStyle> {
let text = std::str::from_utf8(data.as_slice()).map_err(FileError::from)?;
pub fn from_data(data: Bytes) -> StrResult<CslStyle> {
let text = data.as_str().map_err(FileError::from)?;
citationberg::IndependentStyle::from_xml(text)
.map(|style| Self { name: None, style: Arc::new(LazyHash::new(style)) })
.map(|style| {
Self(Arc::new(ManuallyHash::new(
style,
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
)))
})
.map_err(|err| eco_format!("failed to load CSL style ({err})"))
}
/// Get the underlying independent style.
pub fn get(&self) -> &citationberg::IndependentStyle {
self.style.as_ref()
self.0.as_ref()
}
}
// This Reflect impl is technically a bit wrong because it doesn't say what
// FromValue and IntoValue really do. Instead, it says what the `style` argument
// on `bibliography` and `cite` expect (through manual parsing).
impl Reflect for CslStyle {
/// Source for a CSL style.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum CslSource {
/// A predefined named style.
Named(ArchivedStyle),
/// A normal data source.
Normal(DataSource),
}
impl Reflect for CslSource {
#[comemo::memoize]
fn input() -> CastInfo {
let ty = std::iter::once(CastInfo::Type(Type::of::<Str>()));
let options = hayagriva::archive::ArchivedStyle::all().iter().map(|name| {
let source = std::iter::once(DataSource::input());
let names = ArchivedStyle::all().iter().map(|name| {
CastInfo::Value(name.names()[0].into_value(), name.display_name())
});
CastInfo::Union(ty.chain(options).collect())
CastInfo::Union(source.into_iter().chain(names).collect())
}
fn output() -> CastInfo {
EcoString::output()
DataSource::output()
}
fn castable(value: &Value) -> bool {
if let Value::Dyn(dynamic) = &value {
if dynamic.is::<Self>() {
return true;
}
}
false
DataSource::castable(value)
}
}
impl FromValue for CslStyle {
impl FromValue for CslSource {
fn from_value(value: Value) -> HintedStrResult<Self> {
if let Value::Dyn(dynamic) = &value {
if let Some(concrete) = dynamic.downcast::<Self>() {
return Ok(concrete.clone());
if EcoString::castable(&value) {
let string = EcoString::from_value(value.clone())?;
if Path::new(string.as_str()).extension().is_none() {
let style = ArchivedStyle::by_name(&string)
.ok_or_else(|| eco_format!("unknown style: {}", string))?;
return Ok(CslSource::Named(style));
}
}
Err(<Self as Reflect>::error(&value))
DataSource::from_value(value).map(CslSource::Normal)
}
}
impl IntoValue for CslStyle {
impl IntoValue for CslSource {
fn into_value(self) -> Value {
Value::dynamic(self)
}
}
impl Repr for CslStyle {
fn repr(&self) -> EcoString {
self.name
.as_ref()
.map(|name| name.repr())
.unwrap_or_else(|| "..".into())
match self {
// We prefer the shorter names which are at the back of the array.
Self::Named(v) => v.names().last().unwrap().into_value(),
Self::Normal(v) => v.into_value(),
}
}
}
@ -632,9 +630,8 @@ impl<'a> Generator<'a> {
static LOCALES: LazyLock<Vec<citationberg::Locale>> =
LazyLock::new(hayagriva::archive::locales);
let database = self.bibliography.bibliography();
let bibliography_style = self.bibliography.style(StyleChain::default());
let styles = Arena::new();
let database = &self.bibliography.sources.derived;
let bibliography_style = &self.bibliography.style(StyleChain::default()).derived;
// Process all citation groups.
let mut driver = BibliographyDriver::new();
@ -654,7 +651,7 @@ impl<'a> Generator<'a> {
// Create infos and items for each child in the group.
for child in children {
let key = *child.key();
let Some(entry) = database.map.get(&key.into_inner()) else {
let Some(entry) = database.get(key) else {
errors.push(error!(
child.span(),
"key `{}` does not exist in the bibliography",
@ -695,8 +692,8 @@ impl<'a> Generator<'a> {
}
let style = match first.style(StyleChain::default()) {
Smart::Auto => &bibliography_style.style,
Smart::Custom(style) => styles.alloc(style.style),
Smart::Auto => bibliography_style.get(),
Smart::Custom(style) => style.derived.get(),
};
self.infos.push(GroupInfo {
@ -727,7 +724,7 @@ impl<'a> Generator<'a> {
// Add hidden items for everything if we should print the whole
// bibliography.
if self.bibliography.full(StyleChain::default()) {
for entry in database.map.values() {
for (_, entry) in database.iter() {
driver.citation(CitationRequest::new(
vec![CitationItem::new(entry, None, None, true, None)],
bibliography_style.get(),
@ -1097,3 +1094,15 @@ fn locale(lang: Lang, region: Option<Region>) -> citationberg::LocaleCode {
}
citationberg::LocaleCode(value)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bibliography_load_builtin_styles() {
for &archived in ArchivedStyle::all() {
let _ = CslStyle::from_archived(archived);
}
}
}

View File

@ -1,11 +1,14 @@
use typst_syntax::Spanned;
use crate::diag::{error, At, HintedString, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, Cast, Content, Label, Packed, Show, Smart, StyleChain, Synthesize,
cast, elem, Cast, Content, Derived, Label, Packed, Show, Smart, StyleChain,
Synthesize,
};
use crate::introspection::Locatable;
use crate::model::bibliography::Works;
use crate::model::CslStyle;
use crate::model::{CslSource, CslStyle};
use crate::text::{Lang, Region, TextElem};
/// Cite a work from the bibliography.
@ -87,15 +90,24 @@ pub struct CiteElem {
/// The citation style.
///
/// Should be either `{auto}`, one of the built-in styles (see below) or a
/// path to a [CSL file](https://citationstyles.org/). Some of the styles
/// listed below appear twice, once with their full name and once with a
/// short alias.
///
/// When set to `{auto}`, automatically use the
/// [bibliography's style]($bibliography.style) for the citations.
#[parse(CslStyle::parse_smart(engine, args)?)]
pub style: Smart<CslStyle>,
/// This can be:
/// - `{auto}` to automatically use the
/// [bibliography's style]($bibliography.style) for citations.
/// - A string with the name of one of the built-in styles (see below). Some
/// of the styles listed below appear twice, once with their full name and
/// once with a short alias.
/// - A path string to a [CSL file](https://citationstyles.org/). For more
/// details about paths, see the [Paths section]($syntax/#paths).
/// - Raw bytes from which a CSL style should be decoded.
#[parse(match args.named::<Spanned<Smart<CslSource>>>("style")? {
Some(Spanned { v: Smart::Custom(source), span }) => Some(Smart::Custom(
CslStyle::load(engine.world, Spanned::new(source, span))?
)),
Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
None => None,
})]
#[borrowed]
pub style: Smart<Derived<CslSource, CslStyle>>,
/// The text language setting where the citation is.
#[internal]

View File

@ -3,8 +3,8 @@ use ecow::EcoString;
use crate::diag::{bail, HintedStrResult, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, Args, Array, Construct, Content, Datetime, Fields, Smart, StyleChain,
Styles, Value,
cast, elem, Args, Array, Construct, Content, Datetime, Fields, OneOrMultiple, Smart,
StyleChain, Styles, Value,
};
/// The root element of a document and its metadata.
@ -35,7 +35,7 @@ pub struct DocumentElem {
/// The document's authors.
#[ghost]
pub author: Author,
pub author: OneOrMultiple<EcoString>,
/// The document's description.
#[ghost]
@ -43,7 +43,7 @@ pub struct DocumentElem {
/// The document's keywords.
#[ghost]
pub keywords: Keywords,
pub keywords: OneOrMultiple<EcoString>,
/// The document's creation date.
///
@ -93,7 +93,7 @@ cast! {
pub struct DocumentInfo {
/// The document's title.
pub title: Option<EcoString>,
/// The document's author.
/// The document's author(s).
pub author: Vec<EcoString>,
/// The document's description.
pub description: Option<EcoString>,

View File

@ -1,13 +1,10 @@
use ecow::EcoString;
use typst_syntax::{Span, Spanned};
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult, StrResult};
use crate::diag::{At, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
elem, func, scope, Cast, Content, NativeElement, Packed, Show, StyleChain,
};
use crate::foundations::{elem, Bytes, Cast, Content, Derived, Packed, Show, StyleChain};
use crate::introspection::Locatable;
use crate::loading::Readable;
use crate::World;
/// A file that will be embedded into the output PDF.
@ -33,33 +30,40 @@ use crate::World;
/// - This element is ignored if exporting to a format other than PDF.
/// - File embeddings are not currently supported for PDF/A-2, even if the
/// embedded file conforms to PDF/A-1 or PDF/A-2.
#[elem(scope, Show, Locatable)]
#[elem(Show, Locatable)]
pub struct EmbedElem {
/// Path to a file to be embedded.
/// Path of the file to be embedded.
///
/// For more details, see the [Paths section]($syntax/#paths).
/// Must always be specified, but is only read from if no data is provided
/// in the following argument.
///
/// For more details about paths, see the [Paths section]($syntax/#paths).
#[required]
#[parse(
let Spanned { v: path, span } =
args.expect::<Spanned<EcoString>>("path to the file to be embedded")?;
args.expect::<Spanned<EcoString>>("path")?;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
path
// The derived part is the project-relative resolved path.
let resolved = id.vpath().as_rootless_path().to_string_lossy().replace("\\", "/").into();
Derived::new(path.clone(), resolved)
)]
#[borrowed]
pub path: EcoString,
pub path: Derived<EcoString, EcoString>,
/// The resolved project-relative path.
#[internal]
/// Raw file data, optionally.
///
/// If omitted, the data is read from the specified path.
#[positional]
// Not actually required as an argument, but always present as a field.
// We can't distinguish between the two at the moment.
#[required]
#[parse(id.vpath().as_rootless_path().to_string_lossy().replace("\\", "/").into())]
pub resolved_path: EcoString,
/// The raw file data.
#[internal]
#[required]
#[parse(Readable::Bytes(data))]
pub data: Readable,
#[parse(
match args.find::<Bytes>()? {
Some(data) => data,
None => engine.world.file(id).at(span)?,
}
)]
pub data: Bytes,
/// The relationship of the embedded file to the document.
///
@ -75,42 +79,6 @@ pub struct EmbedElem {
pub description: Option<EcoString>,
}
#[scope]
impl EmbedElem {
/// Decode a file embedding from bytes or a string.
#[func(title = "Embed Data")]
fn decode(
/// The call span of this function.
span: Span,
/// The path that will be written into the PDF. Typst will not read from
/// this path since the data is provided in the following argument.
path: EcoString,
/// The data to embed as a file.
data: Readable,
/// The relationship of the embedded file to the document.
#[named]
relationship: Option<Option<EmbeddedFileRelationship>>,
/// The MIME type of the embedded file.
#[named]
mime_type: Option<Option<EcoString>>,
/// A description for the embedded file.
#[named]
description: Option<Option<EcoString>>,
) -> StrResult<Content> {
let mut elem = EmbedElem::new(path.clone(), path, data);
if let Some(description) = description {
elem.push_description(description);
}
if let Some(mime_type) = mime_type {
elem.push_mime_type(mime_type);
}
if let Some(relationship) = relationship {
elem.push_relationship(relationship);
}
Ok(elem.pack().spanned(span))
}
}
impl Show for Packed<EmbedElem> {
fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult<Content> {
Ok(Content::empty())

View File

@ -1,23 +1,25 @@
use std::cell::LazyCell;
use std::hash::Hash;
use std::ops::Range;
use std::sync::{Arc, LazyLock};
use comemo::Tracked;
use ecow::{eco_format, EcoString, EcoVec};
use syntect::highlighting::{self as synt, Theme};
use syntect::highlighting as synt;
use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
use typst_syntax::{split_newlines, LinkedNode, Span, Spanned};
use typst_utils::ManuallyHash;
use unicode_segmentation::UnicodeSegmentation;
use super::Lang;
use crate::diag::{At, FileError, HintedStrResult, SourceResult, StrResult};
use crate::diag::{At, FileError, SourceResult, StrResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, scope, Args, Array, Bytes, Content, Fold, NativeElement, Packed,
PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, Value,
cast, elem, scope, Bytes, Content, Derived, NativeElement, OneOrMultiple, Packed,
PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem,
};
use crate::html::{tag, HtmlElem};
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
use crate::loading::{DataSource, Load};
use crate::model::{Figurable, ParElem};
use crate::text::{
FontFamily, FontList, Hyphenate, LinebreakElem, LocalName, TextElem, TextSize,
@ -25,12 +27,6 @@ use crate::text::{
use crate::visualize::Color;
use crate::World;
// Shorthand for highlighter closures.
type StyleFn<'a> =
&'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
type ThemeArgType = Smart<Option<EcoString>>;
/// Raw text with optional syntax highlighting.
///
/// Displays the text verbatim and in a monospace font. This is typically used
@ -186,9 +182,15 @@ pub struct RawElem {
#[default(HAlignment::Start)]
pub align: HAlignment,
/// One or multiple additional syntax definitions to load. The syntax
/// definitions should be in the
/// [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html).
/// Additional syntax definitions to load. The syntax definitions should be
/// in the [`sublime-syntax` file format](https://www.sublimetext.com/docs/syntax.html).
///
/// You can pass any of the following values:
///
/// - A path string to load a syntax file from the given path. For more
/// details about paths, see the [Paths section]($syntax/#paths).
/// - Raw bytes from which the syntax should be decoded.
/// - An array where each item is one the above.
///
/// ````example
/// #set raw(syntaxes: "SExpressions.sublime-syntax")
@ -201,22 +203,24 @@ pub struct RawElem {
/// (* x (factorial (- x 1)))))
/// ```
/// ````
#[parse(
let (syntaxes, syntaxes_data) = parse_syntaxes(engine, args)?;
syntaxes
)]
#[parse(match args.named("syntaxes")? {
Some(sources) => Some(RawSyntax::load(engine.world, sources)?),
None => None,
})]
#[fold]
pub syntaxes: SyntaxPaths,
pub syntaxes: Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>,
/// The raw file buffers of syntax definition files.
#[internal]
#[parse(syntaxes_data)]
#[fold]
pub syntaxes_data: Vec<Bytes>,
/// The theme to use for syntax highlighting. Theme files should be in the
/// The theme to use for syntax highlighting. Themes should be in the
/// [`tmTheme` file format](https://www.sublimetext.com/docs/color_schemes_tmtheme.html).
///
/// You can pass any of the following values:
///
/// - `{none}`: Disables syntax highlighting.
/// - `{auto}`: Highlights with Typst's default theme.
/// - A path string to load a theme file from the given path. For more
/// details about paths, see the [Paths section]($syntax/#paths).
/// - Raw bytes from which the theme should be decoded.
///
/// Applying a theme only affects the color of specifically highlighted
/// text. It does not consider the theme's foreground and background
/// properties, so that you retain control over the color of raw text. You
@ -224,8 +228,6 @@ pub struct RawElem {
/// the background with a [filled block]($block.fill). You could also use
/// the [`xml`] function to extract these properties from the theme.
///
/// Additionally, you can set the theme to `{none}` to disable highlighting.
///
/// ````example
/// #set raw(theme: "halcyon.tmTheme")
/// #show raw: it => block(
@ -240,18 +242,16 @@ pub struct RawElem {
/// #let hi = "Hello World"
/// ```
/// ````
#[parse(
let (theme_path, theme_data) = parse_theme(engine, args)?;
theme_path
)]
#[parse(match args.named::<Spanned<Smart<Option<DataSource>>>>("theme")? {
Some(Spanned { v: Smart::Custom(Some(source)), span }) => Some(Smart::Custom(
Some(RawTheme::load(engine.world, Spanned::new(source, span))?)
)),
Some(Spanned { v: Smart::Custom(None), .. }) => Some(Smart::Custom(None)),
Some(Spanned { v: Smart::Auto, .. }) => Some(Smart::Auto),
None => None,
})]
#[borrowed]
pub theme: ThemeArgType,
/// The raw file buffer of syntax theme file.
#[internal]
#[parse(theme_data.map(Some))]
#[borrowed]
pub theme_data: Option<Bytes>,
pub theme: Smart<Option<Derived<DataSource, RawTheme>>>,
/// The size for a tab stop in spaces. A tab is replaced with enough spaces to
/// align with the next multiple of the size.
@ -325,9 +325,6 @@ impl Packed<RawElem> {
.map(|s| s.to_lowercase())
.or(Some("txt".into()));
let extra_syntaxes = LazyCell::new(|| {
load_syntaxes(&elem.syntaxes(styles), &elem.syntaxes_data(styles)).unwrap()
});
let non_highlighted_result = |lines: EcoVec<(EcoString, Span)>| {
lines.into_iter().enumerate().map(|(i, (line, line_span))| {
Packed::new(RawLine::new(
@ -340,17 +337,13 @@ impl Packed<RawElem> {
})
};
let theme = elem.theme(styles).as_ref().as_ref().map(|theme_path| {
theme_path.as_ref().map(|path| {
load_theme(path, elem.theme_data(styles).as_ref().as_ref().unwrap())
.unwrap()
})
});
let theme: &Theme = match theme {
let syntaxes = LazyCell::new(|| elem.syntaxes(styles));
let theme: &synt::Theme = match elem.theme(styles) {
Smart::Auto => &RAW_THEME,
Smart::Custom(Some(ref theme)) => theme,
Smart::Custom(Some(theme)) => theme.derived.get(),
Smart::Custom(None) => return non_highlighted_result(lines).collect(),
};
let foreground = theme.settings.foreground.unwrap_or(synt::Color::BLACK);
let mut seq = vec![];
@ -391,13 +384,14 @@ impl Packed<RawElem> {
)
.highlight();
} else if let Some((syntax_set, syntax)) = lang.and_then(|token| {
RAW_SYNTAXES
.find_syntax_by_token(&token)
.map(|syntax| (&*RAW_SYNTAXES, syntax))
.or_else(|| {
extra_syntaxes
.find_syntax_by_token(&token)
.map(|syntax| (&**extra_syntaxes, syntax))
// Prefer user-provided syntaxes over built-in ones.
syntaxes
.derived
.iter()
.map(|syntax| syntax.get())
.chain(std::iter::once(&*RAW_SYNTAXES))
.find_map(|set| {
set.find_syntax_by_token(&token).map(|syntax| (set, syntax))
})
}) {
let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
@ -532,6 +526,89 @@ cast! {
v: EcoString => Self::Text(v),
}
/// A loaded syntax.
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct RawSyntax(Arc<ManuallyHash<SyntaxSet>>);
impl RawSyntax {
/// Load syntaxes from sources.
fn load(
world: Tracked<dyn World + '_>,
sources: Spanned<OneOrMultiple<DataSource>>,
) -> SourceResult<Derived<OneOrMultiple<DataSource>, Vec<RawSyntax>>> {
let data = sources.load(world)?;
let list = sources
.v
.0
.iter()
.zip(&data)
.map(|(source, data)| Self::decode(source, data))
.collect::<StrResult<_>>()
.at(sources.span)?;
Ok(Derived::new(sources.v, list))
}
/// Decode a syntax from a loaded source.
#[comemo::memoize]
#[typst_macros::time(name = "load syntaxes")]
fn decode(source: &DataSource, data: &Bytes) -> StrResult<RawSyntax> {
let src = data.as_str().map_err(FileError::from)?;
let syntax = SyntaxDefinition::load_from_str(src, false, None).map_err(
|err| match source {
DataSource::Path(path) => {
eco_format!("failed to parse syntax file `{path}` ({err})")
}
DataSource::Bytes(_) => {
eco_format!("failed to parse syntax ({err})")
}
},
)?;
let mut builder = SyntaxSetBuilder::new();
builder.add(syntax);
Ok(RawSyntax(Arc::new(ManuallyHash::new(
builder.build(),
typst_utils::hash128(data),
))))
}
/// Return the underlying syntax set.
fn get(&self) -> &SyntaxSet {
self.0.as_ref()
}
}
/// A loaded syntect theme.
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct RawTheme(Arc<ManuallyHash<synt::Theme>>);
impl RawTheme {
/// Load a theme from a data source.
fn load(
world: Tracked<dyn World + '_>,
source: Spanned<DataSource>,
) -> SourceResult<Derived<DataSource, Self>> {
let data = source.load(world)?;
let theme = Self::decode(&data).at(source.span)?;
Ok(Derived::new(source.v, theme))
}
/// Decode a theme from bytes.
#[comemo::memoize]
fn decode(data: &Bytes) -> StrResult<RawTheme> {
let mut cursor = std::io::Cursor::new(data.as_slice());
let theme = synt::ThemeSet::load_from_reader(&mut cursor)
.map_err(|err| eco_format!("failed to parse theme ({err})"))?;
Ok(RawTheme(Arc::new(ManuallyHash::new(theme, typst_utils::hash128(data)))))
}
/// Get the underlying syntect theme.
pub fn get(&self) -> &synt::Theme {
self.0.as_ref()
}
}
/// A highlighted line of raw text.
///
/// This is a helper element that is synthesized by [`raw`] elements.
@ -593,6 +670,11 @@ struct ThemedHighlighter<'a> {
line_fn: LineFn<'a>,
}
// Shorthands for highlighter closures.
type StyleFn<'a> =
&'a mut dyn FnMut(usize, &LinkedNode, Range<usize>, synt::Style) -> Content;
type LineFn<'a> = &'a mut dyn FnMut(usize, Range<usize>, &mut Vec<Content>);
impl<'a> ThemedHighlighter<'a> {
pub fn new(
code: &'a str,
@ -738,108 +820,50 @@ fn to_syn(color: Color) -> synt::Color {
synt::Color { r, g, b, a }
}
/// A list of raw syntax file paths.
#[derive(Debug, Default, Clone, PartialEq, Hash)]
pub struct SyntaxPaths(Vec<EcoString>);
cast! {
SyntaxPaths,
self => self.0.into_value(),
v: EcoString => Self(vec![v]),
v: Array => Self(v.into_iter().map(Value::cast).collect::<HintedStrResult<_>>()?),
}
impl Fold for SyntaxPaths {
fn fold(self, outer: Self) -> Self {
Self(self.0.fold(outer.0))
/// Create a syntect theme item.
fn item(
scope: &str,
color: Option<&str>,
font_style: Option<synt::FontStyle>,
) -> synt::ThemeItem {
synt::ThemeItem {
scope: scope.parse().unwrap(),
style: synt::StyleModifier {
foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
background: None,
font_style,
},
}
}
/// Load a syntax set from a list of syntax file paths.
#[comemo::memoize]
#[typst_macros::time(name = "load syntaxes")]
fn load_syntaxes(paths: &SyntaxPaths, bytes: &[Bytes]) -> StrResult<Arc<SyntaxSet>> {
let mut out = SyntaxSetBuilder::new();
/// Replace tabs with spaces to align with multiples of `tab_size`.
fn align_tabs(text: &str, tab_size: usize) -> EcoString {
let replacement = " ".repeat(tab_size);
let divisor = tab_size.max(1);
let amount = text.chars().filter(|&c| c == '\t').count();
// We might have multiple sublime-syntax/yaml files
for (path, bytes) in paths.0.iter().zip(bytes.iter()) {
let src = std::str::from_utf8(bytes).map_err(FileError::from)?;
out.add(SyntaxDefinition::load_from_str(src, false, None).map_err(|err| {
eco_format!("failed to parse syntax file `{path}` ({err})")
})?);
let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
let mut column = 0;
for grapheme in text.graphemes(true) {
match grapheme {
"\t" => {
let required = tab_size - column % divisor;
res.push_str(&replacement[..required]);
column += required;
}
"\n" => {
res.push_str(grapheme);
column = 0;
}
_ => {
res.push_str(grapheme);
column += 1;
}
}
}
Ok(Arc::new(out.build()))
}
/// Function to parse the syntaxes argument.
/// Much nicer than having it be part of the `element` macro.
fn parse_syntaxes(
engine: &mut Engine,
args: &mut Args,
) -> SourceResult<(Option<SyntaxPaths>, Option<Vec<Bytes>>)> {
let Some(Spanned { v: paths, span }) =
args.named::<Spanned<SyntaxPaths>>("syntaxes")?
else {
return Ok((None, None));
};
// Load syntax files.
let data = paths
.0
.iter()
.map(|path| {
let id = span.resolve_path(path).at(span)?;
engine.world.file(id).at(span)
})
.collect::<SourceResult<Vec<Bytes>>>()?;
// Check that parsing works.
let _ = load_syntaxes(&paths, &data).at(span)?;
Ok((Some(paths), Some(data)))
}
#[comemo::memoize]
#[typst_macros::time(name = "load theme")]
fn load_theme(path: &str, bytes: &Bytes) -> StrResult<Arc<synt::Theme>> {
let mut cursor = std::io::Cursor::new(bytes.as_slice());
synt::ThemeSet::load_from_reader(&mut cursor)
.map(Arc::new)
.map_err(|err| eco_format!("failed to parse theme file `{path}` ({err})"))
}
/// Function to parse the theme argument.
/// Much nicer than having it be part of the `element` macro.
fn parse_theme(
engine: &mut Engine,
args: &mut Args,
) -> SourceResult<(Option<ThemeArgType>, Option<Bytes>)> {
let Some(Spanned { v: path, span }) = args.named::<Spanned<ThemeArgType>>("theme")?
else {
// Argument `theme` not found.
return Ok((None, None));
};
let Smart::Custom(path) = path else {
// Argument `theme` is `auto`.
return Ok((Some(Smart::Auto), None));
};
let Some(path) = path else {
// Argument `theme` is `none`.
return Ok((Some(Smart::Custom(None)), None));
};
// Load theme file.
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
// Check that parsing works.
let _ = load_theme(&path, &data).at(span)?;
Ok((Some(Smart::Custom(Some(path))), Some(data)))
res
}
/// The syntect syntax definitions.
@ -886,49 +910,3 @@ pub static RAW_THEME: LazyLock<synt::Theme> = LazyLock::new(|| synt::Theme {
item("markup.deleted, meta.diff.header.from-file", Some("#d73a49"), None),
],
});
/// Create a syntect theme item.
fn item(
scope: &str,
color: Option<&str>,
font_style: Option<synt::FontStyle>,
) -> synt::ThemeItem {
synt::ThemeItem {
scope: scope.parse().unwrap(),
style: synt::StyleModifier {
foreground: color.map(|s| to_syn(s.parse::<Color>().unwrap())),
background: None,
font_style,
},
}
}
/// Replace tabs with spaces to align with multiples of `tab_size`.
fn align_tabs(text: &str, tab_size: usize) -> EcoString {
let replacement = " ".repeat(tab_size);
let divisor = tab_size.max(1);
let amount = text.chars().filter(|&c| c == '\t').count();
let mut res = EcoString::with_capacity(text.len() - amount + amount * tab_size);
let mut column = 0;
for grapheme in text.graphemes(true) {
match grapheme {
"\t" => {
let required = tab_size - column % divisor;
res.push_str(&replacement[..required]);
column += required;
}
"\n" => {
res.push_str(grapheme);
column = 0;
}
_ => {
res.push_str(grapheme);
column += 1;
}
}
}
res
}

View File

@ -14,14 +14,14 @@ use ecow::EcoString;
use typst_syntax::{Span, Spanned};
use typst_utils::LazyHash;
use crate::diag::{At, SourceResult, StrResult};
use crate::diag::{SourceResult, StrResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, func, scope, Bytes, Cast, Content, NativeElement, Packed, Show, Smart,
StyleChain,
cast, elem, func, scope, Bytes, Cast, Content, Derived, NativeElement, Packed, Show,
Smart, StyleChain,
};
use crate::layout::{BlockElem, Length, Rel, Sizing};
use crate::loading::Readable;
use crate::loading::{DataSource, Load, Readable};
use crate::model::Figurable;
use crate::text::LocalName;
use crate::World;
@ -46,25 +46,16 @@ use crate::World;
/// ```
#[elem(scope, Show, LocalName, Figurable)]
pub struct ImageElem {
/// Path to an image file.
/// A path to an image file or raw bytes making up an encoded image.
///
/// For more details, see the [Paths section]($syntax/#paths).
/// For more details about paths, see the [Paths section]($syntax/#paths).
#[required]
#[parse(
let Spanned { v: path, span } =
args.expect::<Spanned<EcoString>>("path to image file")?;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
path
let source = args.expect::<Spanned<DataSource>>("source")?;
let data = source.load(engine.world)?;
Derived::new(source.v, data)
)]
#[borrowed]
pub path: EcoString,
/// The raw file data.
#[internal]
#[required]
#[parse(Readable::Bytes(data))]
pub data: Readable,
pub source: Derived<DataSource, Bytes>,
/// The image's format. Detected automatically by default.
///
@ -106,6 +97,9 @@ pub struct ImageElem {
impl ImageElem {
/// Decode a raster or vector graphic from bytes or a string.
///
/// This function is deprecated. The [`image`] function now accepts bytes
/// directly.
///
/// ```example
/// #let original = read("diagram.svg")
/// #let changed = original.replace(
@ -138,7 +132,9 @@ impl ImageElem {
#[named]
fit: Option<ImageFit>,
) -> StrResult<Content> {
let mut elem = ImageElem::new(EcoString::new(), data);
let bytes = data.into_bytes();
let source = Derived::new(DataSource::Bytes(bytes.clone()), bytes);
let mut elem = ImageElem::new(source);
if let Some(format) = format {
elem.push_format(format);
}
@ -337,6 +333,22 @@ pub enum ImageFormat {
Vector(VectorFormat),
}
impl ImageFormat {
/// Try to detect the format of an image from data.
pub fn detect(data: &[u8]) -> Option<Self> {
if let Some(format) = RasterFormat::detect(data) {
return Some(Self::Raster(format));
}
// SVG or compressed SVG.
if data.starts_with(b"<svg") || data.starts_with(&[0x1f, 0x8b]) {
return Some(Self::Vector(VectorFormat::Svg));
}
None
}
}
/// A vector graphics format.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
pub enum VectorFormat {

View File

@ -110,6 +110,7 @@ impl Hash for Repr {
// all used fonts gives us something similar.
self.data.hash(state);
self.font_hash.hash(state);
self.flatten_text.hash(state);
}
}

View File

@ -33,15 +33,15 @@ pub fn write_embedded_files(
}
let embed = elem.to_packed::<EmbedElem>().unwrap();
if embed.resolved_path.len() > Str::PDFA_LIMIT {
if embed.path.derived.len() > Str::PDFA_LIMIT {
bail!(embed.span(), "embedded file path is too long");
}
let id = embed_file(ctx, &mut chunk, embed)?;
if embedded_files.insert(embed.resolved_path.clone(), id).is_some() {
if embedded_files.insert(embed.path.derived.clone(), id).is_some() {
bail!(
elem.span(),
"duplicate embedded file for path `{}`", embed.resolved_path;
"duplicate embedded file for path `{}`", embed.path.derived;
hint: "embedded file paths must be unique",
);
}
@ -92,8 +92,8 @@ fn embed_file(
embedded_file.finish();
let mut file_spec = chunk.file_spec(file_spec_dict_ref);
file_spec.path(Str(embed.resolved_path.as_bytes()));
file_spec.unic_file(TextStr(&embed.resolved_path));
file_spec.path(Str(embed.path.derived.as_bytes()));
file_spec.unic_file(TextStr(&embed.path.derived));
file_spec
.insert(Name(b"EF"))
.dict()

View File

@ -162,3 +162,74 @@ impl<T: Debug> Debug for LazyHash<T> {
self.value.fmt(f)
}
}
/// A wrapper type with a manually computed hash.
///
/// This can be used to turn an unhashable type into a hashable one where the
/// hash is provided manually. Typically, the hash is derived from the data
/// which was used to construct to the unhashable type.
///
/// For instance, you could hash the bytes that were parsed into an unhashable
/// data structure.
///
/// # Equality
/// Because Typst uses high-quality 128 bit hashes in all places, the risk of a
/// hash collision is reduced to an absolute minimum. Therefore, this type
/// additionally provides `PartialEq` and `Eq` implementations that compare by
/// hash instead of by value. For this to be correct, your hash implementation
/// **must feed all information relevant to the `PartialEq` impl to the
/// hasher.**
#[derive(Clone)]
pub struct ManuallyHash<T: ?Sized> {
/// A manually computed hash.
hash: u128,
/// The underlying value.
value: T,
}
impl<T> ManuallyHash<T> {
/// Wraps an item with a pre-computed hash.
///
/// The hash should be computed with `typst_utils::hash128`.
#[inline]
pub fn new(value: T, hash: u128) -> Self {
Self { hash, value }
}
/// Returns the wrapped value.
#[inline]
pub fn into_inner(self) -> T {
self.value
}
}
impl<T: ?Sized> Hash for ManuallyHash<T> {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u128(self.hash);
}
}
impl<T: ?Sized> Eq for ManuallyHash<T> {}
impl<T: ?Sized> PartialEq for ManuallyHash<T> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.hash == other.hash
}
}
impl<T: ?Sized> Deref for ManuallyHash<T> {
type Target = T;
#[inline]
fn deref(&self) -> &Self::Target {
&self.value
}
}
impl<T: Debug> Debug for ManuallyHash<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.value.fmt(f)
}
}

View File

@ -15,7 +15,7 @@ mod scalar;
pub use self::bitset::{BitSet, SmallBitSet};
pub use self::deferred::Deferred;
pub use self::duration::format_duration;
pub use self::hash::LazyHash;
pub use self::hash::{LazyHash, ManuallyHash};
pub use self::pico::{PicoStr, ResolvedPicoStr};
pub use self::round::{round_int_with_precision, round_with_precision};
pub use self::scalar::Scalar;

View File

@ -10,6 +10,16 @@
description: "Information about a secret project",
)
--- pdf-embed-bytes ---
#pdf.embed("hello.txt", read("/assets/text/hello.txt", encoding: none))
#pdf.embed(
"a_file_name.txt",
read("/assets/text/hello.txt", encoding: none),
relationship: "supplement",
mime-type: "text/plain",
description: "A description",
)
--- pdf-embed-invalid-relationship ---
#pdf.embed(
"/assets/text/hello.txt",
@ -18,13 +28,3 @@
mime-type: "text/plain",
description: "A test file",
)
--- pdf-embed-decode ---
#pdf.embed.decode("hello.txt", read("/assets/text/hello.txt"))
#pdf.embed.decode(
"a_file_name.txt",
read("/assets/text/hello.txt"),
relationship: "supplement",
mime-type: "text/plain",
description: "A description",
)