diff --git a/crates/typst-cli/src/args.rs b/crates/typst-cli/src/args.rs index 83c4c8f9e..d6855d100 100644 --- a/crates/typst-cli/src/args.rs +++ b/crates/typst-cli/src/args.rs @@ -473,6 +473,9 @@ pub enum PdfStandard { /// PDF/A-2b. #[value(name = "a-2b")] A_2b, + /// PDF/A-3b. + #[value(name = "a-3b")] + A_3b, } display_possible_values!(PdfStandard); diff --git a/crates/typst-cli/src/compile.rs b/crates/typst-cli/src/compile.rs index adeef0f2d..515a777a2 100644 --- a/crates/typst-cli/src/compile.rs +++ b/crates/typst-cli/src/compile.rs @@ -136,6 +136,7 @@ impl CompileConfig { .map(|standard| match standard { PdfStandard::V_1_7 => typst_pdf::PdfStandard::V_1_7, PdfStandard::A_2b => typst_pdf::PdfStandard::A_2b, + PdfStandard::A_3b => typst_pdf::PdfStandard::A_3b, }) .collect::>(); PdfStandards::new(&list)? diff --git a/crates/typst-library/src/lib.rs b/crates/typst-library/src/lib.rs index 87b2fcb44..2ea77eaa5 100644 --- a/crates/typst-library/src/lib.rs +++ b/crates/typst-library/src/lib.rs @@ -21,6 +21,7 @@ pub mod layout; pub mod loading; pub mod math; pub mod model; +pub mod pdf; pub mod routines; pub mod symbols; pub mod text; @@ -249,6 +250,7 @@ fn global(math: Module, inputs: Dict, features: &Features) -> Module { self::introspection::define(&mut global); self::loading::define(&mut global); self::symbols::define(&mut global); + self::pdf::define(&mut global); global.reset_category(); if features.is_enabled(Feature::Html) { global.define_module(self::html::module()); diff --git a/crates/typst-library/src/pdf/embed.rs b/crates/typst-library/src/pdf/embed.rs new file mode 100644 index 000000000..db4986225 --- /dev/null +++ b/crates/typst-library/src/pdf/embed.rs @@ -0,0 +1,131 @@ +use ecow::EcoString; +use typst_syntax::{Span, Spanned}; + +use crate::diag::{At, SourceResult, StrResult}; +use crate::engine::Engine; +use crate::foundations::{ + elem, func, scope, Cast, Content, NativeElement, Packed, Show, StyleChain, +}; +use crate::introspection::Locatable; +use crate::loading::Readable; +use crate::World; + +/// A file that will be embedded into the output PDF. +/// +/// This can be used to distribute additional files that are related to the PDF +/// within it. PDF readers will display the files in a file listing. +/// +/// Some international standards use this mechanism to embed machine-readable +/// data (e.g., ZUGFeRD/Factur-X for invoices) that mirrors the visual content +/// of the PDF. +/// +/// # Example +/// ```typ +/// #pdf.embed( +/// "experiment.csv", +/// relationship: "supplement", +/// mime-type: "text/csv", +/// description: "Raw Oxygen readings from the Arctic experiment", +/// ) +/// ``` +/// +/// # Notes +/// - This element is ignored if exporting to a format other than PDF. +/// - File embeddings are not currently supported for PDF/A-2, even if the +/// embedded file conforms to PDF/A-1 or PDF/A-2. +#[elem(scope, Show, Locatable)] +pub struct EmbedElem { + /// Path to a file to be embedded. + /// + /// For more details, see the [Paths section]($syntax/#paths). + #[required] + #[parse( + let Spanned { v: path, span } = + args.expect::>("path to the file to be embedded")?; + let id = span.resolve_path(&path).at(span)?; + let data = engine.world.file(id).at(span)?; + path + )] + #[borrowed] + pub path: EcoString, + + /// The resolved project-relative path. + #[internal] + #[required] + #[parse(id.vpath().as_rootless_path().to_string_lossy().replace("\\", "/").into())] + pub resolved_path: EcoString, + + /// The raw file data. + #[internal] + #[required] + #[parse(Readable::Bytes(data))] + pub data: Readable, + + /// The relationship of the embedded file to the document. + /// + /// Ignored if export doesn't target PDF/A-3. + pub relationship: Option, + + /// The MIME type of the embedded file. + #[borrowed] + pub mime_type: Option, + + /// A description for the embedded file. + #[borrowed] + pub description: Option, +} + +#[scope] +impl EmbedElem { + /// Decode a file embedding from bytes or a string. + #[func(title = "Embed Data")] + fn decode( + /// The call span of this function. + span: Span, + /// The path that will be written into the PDF. Typst will not read from + /// this path since the data is provided in the following argument. + path: EcoString, + /// The data to embed as a file. + data: Readable, + /// The relationship of the embedded file to the document. + #[named] + relationship: Option>, + /// The MIME type of the embedded file. + #[named] + mime_type: Option>, + /// A description for the embedded file. + #[named] + description: Option>, + ) -> StrResult { + let mut elem = EmbedElem::new(path.clone(), path, data); + if let Some(description) = description { + elem.push_description(description); + } + if let Some(mime_type) = mime_type { + elem.push_mime_type(mime_type); + } + if let Some(relationship) = relationship { + elem.push_relationship(relationship); + } + Ok(elem.pack().spanned(span)) + } +} + +impl Show for Packed { + fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { + Ok(Content::empty()) + } +} + +/// The relationship of an embedded file with the document. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum EmbeddedFileRelationship { + /// The PDF document was created from the source file. + Source, + /// The file was used to derive a visual presentation in the PDF. + Data, + /// An alternative representation of the document. + Alternative, + /// Additional resources for the document. + Supplement, +} diff --git a/crates/typst-library/src/pdf/mod.rs b/crates/typst-library/src/pdf/mod.rs new file mode 100644 index 000000000..669835d4c --- /dev/null +++ b/crates/typst-library/src/pdf/mod.rs @@ -0,0 +1,24 @@ +//! PDF-specific functionality. + +mod embed; + +pub use self::embed::*; + +use crate::foundations::{category, Category, Module, Scope}; + +/// PDF-specific functionality. +#[category] +pub static PDF: Category; + +/// Hook up the `pdf` module. +pub(super) fn define(global: &mut Scope) { + global.category(PDF); + global.define_module(module()); +} + +/// Hook up all `pdf` definitions. +pub fn module() -> Module { + let mut scope = Scope::deduplicating(); + scope.define_elem::(); + Module::new("pdf", scope) +} diff --git a/crates/typst-pdf/src/catalog.rs b/crates/typst-pdf/src/catalog.rs index c4b0e2e83..709b01553 100644 --- a/crates/typst-pdf/src/catalog.rs +++ b/crates/typst-pdf/src/catalog.rs @@ -12,7 +12,7 @@ use typst_syntax::Span; use xmp_writer::{DateTime, LangId, RenditionClass, XmpWriter}; use crate::page::PdfPageLabel; -use crate::{hash_base64, outline, TextStrExt, Timezone, WithEverything}; +use crate::{hash_base64, outline, TextStrExt, Timestamp, Timezone, WithEverything}; /// Write the document catalog. pub fn write_catalog( @@ -86,23 +86,10 @@ pub fn write_catalog( info.keywords(TextStr::trimmed(&joined)); xmp.pdf_keywords(&joined); } - - // (1) If the `document.date` is set to specific `datetime` or `none`, use it. - // (2) If the `document.date` is set to `auto` or not set, try to use the - // date from the options. - // (3) Otherwise, we don't write date metadata. - let (date, tz) = match (ctx.document.info.date, ctx.options.timestamp) { - (Smart::Custom(date), _) => (date, None), - (Smart::Auto, Some(timestamp)) => { - (Some(timestamp.datetime), Some(timestamp.timezone)) - } - _ => (None, None), - }; - if let Some(date) = date { - if let Some(pdf_date) = pdf_date(date, tz) { - info.creation_date(pdf_date); - info.modified_date(pdf_date); - } + let (date, tz) = document_date(ctx.document.info.date, ctx.options.timestamp); + if let Some(pdf_date) = date.and_then(|date| pdf_date(date, tz)) { + info.creation_date(pdf_date); + info.modified_date(pdf_date); } info.finish(); @@ -154,7 +141,7 @@ pub fn write_catalog( } // Assert dominance. - if ctx.options.standards.pdfa { + if let Some((part, conformance)) = ctx.options.standards.pdfa_part { let mut extension_schemas = xmp.extension_schemas(); extension_schemas .xmp_media_management() @@ -162,8 +149,8 @@ pub fn write_catalog( .describe_instance_id(); extension_schemas.pdf().properties().describe_all(); extension_schemas.finish(); - xmp.pdfa_part(2); - xmp.pdfa_conformance("B"); + xmp.pdfa_part(part); + xmp.pdfa_conformance(conformance); } let xmp_buf = xmp.finish(None); @@ -182,13 +169,35 @@ pub fn write_catalog( catalog.viewer_preferences().direction(dir); catalog.metadata(meta_ref); - // Write the named destination tree if there are any entries. - if !ctx.references.named_destinations.dests.is_empty() { + let has_dests = !ctx.references.named_destinations.dests.is_empty(); + let has_embeddings = !ctx.references.embedded_files.is_empty(); + + // Write the `/Names` dictionary. + if has_dests || has_embeddings { + // Write the named destination tree if there are any entries. let mut name_dict = catalog.names(); - let mut dests_name_tree = name_dict.destinations(); - let mut names = dests_name_tree.names(); - for &(name, dest_ref, ..) in &ctx.references.named_destinations.dests { - names.insert(Str(name.resolve().as_bytes()), dest_ref); + if has_dests { + let mut dests_name_tree = name_dict.destinations(); + let mut names = dests_name_tree.names(); + for &(name, dest_ref, ..) in &ctx.references.named_destinations.dests { + names.insert(Str(name.resolve().as_bytes()), dest_ref); + } + } + + if has_embeddings { + let mut embedded_files = name_dict.embedded_files(); + let mut names = embedded_files.names(); + for (name, file_ref) in &ctx.references.embedded_files { + names.insert(Str(name.as_bytes()), *file_ref); + } + } + } + + if has_embeddings && ctx.options.standards.pdfa { + // PDF 2.0, but ISO 19005-3 (PDF/A-3) Annex E allows it for PDF/A-3. + let mut associated_files = catalog.insert(Name(b"AF")).array().typed(); + for (_, file_ref) in ctx.references.embedded_files { + associated_files.item(file_ref).finish(); } } @@ -289,8 +298,27 @@ pub(crate) fn write_page_labels( result } +/// Resolve the document date. +/// +/// (1) If the `document.date` is set to specific `datetime` or `none`, use it. +/// (2) If the `document.date` is set to `auto` or not set, try to use the +/// date from the options. +/// (3) Otherwise, we don't write date metadata. +pub fn document_date( + document_date: Smart>, + timestamp: Option, +) -> (Option, Option) { + match (document_date, timestamp) { + (Smart::Custom(date), _) => (date, None), + (Smart::Auto, Some(timestamp)) => { + (Some(timestamp.datetime), Some(timestamp.timezone)) + } + _ => (None, None), + } +} + /// Converts a datetime to a pdf-writer date. -fn pdf_date(datetime: Datetime, tz: Option) -> Option { +pub fn pdf_date(datetime: Datetime, tz: Option) -> Option { let year = datetime.year().filter(|&y| y >= 0)? as u16; let mut pdf_date = pdf_writer::Date::new(year); diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs new file mode 100644 index 000000000..b32f6e45d --- /dev/null +++ b/crates/typst-pdf/src/embed.rs @@ -0,0 +1,122 @@ +use std::collections::BTreeMap; + +use ecow::EcoString; +use pdf_writer::types::AssociationKind; +use pdf_writer::{Filter, Finish, Name, Ref, Str, TextStr}; +use typst_library::diag::{bail, SourceResult}; +use typst_library::foundations::{NativeElement, Packed, StyleChain}; +use typst_library::pdf::{EmbedElem, EmbeddedFileRelationship}; + +use crate::catalog::{document_date, pdf_date}; +use crate::{deflate, NameExt, PdfChunk, StrExt, WithGlobalRefs}; + +/// Query for all [`EmbedElem`] and write them and their file specifications. +/// +/// This returns a map of embedding names and references so that we can later +/// add them to the catalog's `/Names` dictionary. +pub fn write_embedded_files( + ctx: &WithGlobalRefs, +) -> SourceResult<(PdfChunk, BTreeMap)> { + let mut chunk = PdfChunk::new(); + let mut embedded_files = BTreeMap::default(); + + let elements = ctx.document.introspector.query(&EmbedElem::elem().select()); + for elem in &elements { + if !ctx.options.standards.embedded_files { + // PDF/A-2 requires embedded files to be PDF/A-1 or PDF/A-2, + // which we don't currently check. + bail!( + elem.span(), + "file embeddings are not currently supported for PDF/A-2"; + hint: "PDF/A-3 supports arbitrary embedded files" + ); + } + + let embed = elem.to_packed::().unwrap(); + if embed.resolved_path.len() > Str::PDFA_LIMIT { + bail!(embed.span(), "embedded file path is too long"); + } + + let id = embed_file(ctx, &mut chunk, embed)?; + if embedded_files.insert(embed.resolved_path.clone(), id).is_some() { + bail!( + elem.span(), + "duplicate embedded file for path `{}`", embed.resolved_path; + hint: "embedded file paths must be unique", + ); + } + } + + Ok((chunk, embedded_files)) +} + +/// Write the embedded file stream and its file specification. +fn embed_file( + ctx: &WithGlobalRefs, + chunk: &mut PdfChunk, + embed: &Packed, +) -> SourceResult { + let embedded_file_stream_ref = chunk.alloc.bump(); + let file_spec_dict_ref = chunk.alloc.bump(); + + let data = embed.data().as_slice(); + let compressed = deflate(data); + + let mut embedded_file = chunk.embedded_file(embedded_file_stream_ref, &compressed); + embedded_file.filter(Filter::FlateDecode); + + if let Some(mime_type) = embed.mime_type(StyleChain::default()) { + if mime_type.len() > Name::PDFA_LIMIT { + bail!(embed.span(), "embedded file MIME type is too long"); + } + embedded_file.subtype(Name(mime_type.as_bytes())); + } else if ctx.options.standards.pdfa { + bail!(embed.span(), "embedded files must have a MIME type in PDF/A-3"); + } + + let mut params = embedded_file.params(); + params.size(data.len() as i32); + + let (date, tz) = document_date(ctx.document.info.date, ctx.options.timestamp); + if let Some(pdf_date) = date.and_then(|date| pdf_date(date, tz)) { + params.modification_date(pdf_date); + } else if ctx.options.standards.pdfa { + bail!( + embed.span(), + "the document must have a date when embedding files in PDF/A-3"; + hint: "`set document(date: none)` must not be used in this case" + ); + } + + params.finish(); + embedded_file.finish(); + + let mut file_spec = chunk.file_spec(file_spec_dict_ref); + file_spec.path(Str(embed.resolved_path.as_bytes())); + file_spec.unic_file(TextStr(&embed.resolved_path)); + file_spec + .insert(Name(b"EF")) + .dict() + .pair(Name(b"F"), embedded_file_stream_ref) + .pair(Name(b"UF"), embedded_file_stream_ref); + + if ctx.options.standards.pdfa { + // PDF 2.0, but ISO 19005-3 (PDF/A-3) Annex E allows it for PDF/A-3. + file_spec.association_kind(match embed.relationship(StyleChain::default()) { + Some(EmbeddedFileRelationship::Source) => AssociationKind::Source, + Some(EmbeddedFileRelationship::Data) => AssociationKind::Data, + Some(EmbeddedFileRelationship::Alternative) => AssociationKind::Alternative, + Some(EmbeddedFileRelationship::Supplement) => AssociationKind::Supplement, + None => AssociationKind::Unspecified, + }); + } + + if let Some(description) = embed.description(StyleChain::default()) { + if description.len() > Str::PDFA_LIMIT { + bail!(embed.span(), "embedded file description is too long"); + } + file_spec.description(TextStr(description)); + } + + Ok(file_spec_dict_ref) +} diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs index f45c62bb5..88e62389c 100644 --- a/crates/typst-pdf/src/lib.rs +++ b/crates/typst-pdf/src/lib.rs @@ -4,6 +4,7 @@ mod catalog; mod color; mod color_font; mod content; +mod embed; mod extg; mod font; mod gradient; @@ -14,12 +15,13 @@ mod page; mod resources; mod tiling; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::fmt::{self, Debug, Formatter}; use std::hash::Hash; use std::ops::{Deref, DerefMut}; use base64::Engine; +use ecow::EcoString; use pdf_writer::{Chunk, Name, Pdf, Ref, Str, TextStr}; use serde::{Deserialize, Serialize}; use typst_library::diag::{bail, SourceResult, StrResult}; @@ -33,6 +35,7 @@ use typst_utils::Deferred; use crate::catalog::write_catalog; use crate::color::{alloc_color_functions_refs, ColorFunctionRefs}; use crate::color_font::{write_color_fonts, ColorFontSlice}; +use crate::embed::write_embedded_files; use crate::extg::{write_graphic_states, ExtGState}; use crate::font::write_fonts; use crate::gradient::{write_gradients, PdfGradient}; @@ -67,6 +70,7 @@ pub fn pdf(document: &PagedDocument, options: &PdfOptions) -> SourceResult, } impl PdfStandards { /// Validates a list of PDF standards for compatibility and returns their /// encapsulated representation. pub fn new(list: &[PdfStandard]) -> StrResult { - Ok(Self { pdfa: list.contains(&PdfStandard::A_2b) }) + let a2b = list.contains(&PdfStandard::A_2b); + let a3b = list.contains(&PdfStandard::A_3b); + + if a2b && a3b { + bail!("PDF cannot conform to A-2B and A-3B at the same time") + } + + let pdfa = a2b || a3b; + Ok(Self { + pdfa, + embedded_files: !a2b, + pdfa_part: pdfa.then_some((if a2b { 2 } else { 3 }, "B")), + }) } } @@ -166,10 +188,9 @@ impl Debug for PdfStandards { } } -#[allow(clippy::derivable_impls)] impl Default for PdfStandards { fn default() -> Self { - Self { pdfa: false } + Self { pdfa: false, embedded_files: true, pdfa_part: None } } } @@ -186,6 +207,9 @@ pub enum PdfStandard { /// PDF/A-2b. #[serde(rename = "a-2b")] A_2b, + /// PDF/A-3b. + #[serde(rename = "a-3b")] + A_3b, } /// A struct to build a PDF following a fixed succession of phases. @@ -316,6 +340,8 @@ struct References { tilings: HashMap, /// The IDs of written external graphics states. ext_gs: HashMap, + /// The names and references for embedded files. + embedded_files: BTreeMap, } /// At this point, the references have been assigned to all resources. The page @@ -481,6 +507,14 @@ impl Renumber for HashMap { } } +impl Renumber for BTreeMap { + fn renumber(&mut self, offset: i32) { + for v in self.values_mut() { + v.renumber(offset); + } + } +} + impl Renumber for Option { fn renumber(&mut self, offset: i32) { if let Some(r) = self { diff --git a/docs/src/lib.rs b/docs/src/lib.rs index 5ca3724ab..e92799718 100644 --- a/docs/src/lib.rs +++ b/docs/src/lib.rs @@ -25,6 +25,7 @@ use typst::layout::{Abs, Margin, PageElem, PagedDocument, LAYOUT}; use typst::loading::DATA_LOADING; use typst::math::MATH; use typst::model::MODEL; +use typst::pdf::PDF; use typst::symbols::SYMBOLS; use typst::text::{Font, FontBook, TEXT}; use typst::utils::LazyHash; @@ -163,6 +164,7 @@ fn reference_pages(resolver: &dyn Resolver) -> PageModel { category_page(resolver, VISUALIZE), category_page(resolver, INTROSPECTION), category_page(resolver, DATA_LOADING), + category_page(resolver, PDF), ]; page } diff --git a/tests/suite/pdf/embed.typ b/tests/suite/pdf/embed.typ new file mode 100644 index 000000000..bb5c9316c --- /dev/null +++ b/tests/suite/pdf/embed.typ @@ -0,0 +1,30 @@ +// Test file embeddings. The tests here so far are unsatisfactory because we +// have no PDF testing infrastructure. That should be improved in the future. + +--- pdf-embed --- +#pdf.embed("/assets/text/hello.txt") +#pdf.embed( + "/assets/data/details.toml", + relationship: "supplement", + mime-type: "application/toml", + description: "Information about a secret project", +) + +--- pdf-embed-invalid-relationship --- +#pdf.embed( + "/assets/text/hello.txt", + // Error: 17-23 expected "source", "data", "alternative", "supplement", or none + relationship: "test", + mime-type: "text/plain", + description: "A test file", +) + +--- pdf-embed-decode --- +#pdf.embed.decode("hello.txt", read("/assets/text/hello.txt")) +#pdf.embed.decode( + "a_file_name.txt", + read("/assets/text/hello.txt"), + relationship: "supplement", + mime-type: "text/plain", + description: "A description", +)