From a0093ad8a7906068b4bd641c28f614804d3e099d Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 1 Oct 2024 11:24:18 +0200 Subject: [PATCH] Minimal PDF/A support (#5075) Co-authored-by: Martin Haug --- Cargo.lock | 18 +-- Cargo.toml | 8 +- crates/typst-cli/src/args.rs | 17 +++ crates/typst-cli/src/compile.rs | 17 ++- crates/typst-pdf/src/catalog.rs | 84 ++++++++++--- crates/typst-pdf/src/color.rs | 32 +++-- crates/typst-pdf/src/color_font.rs | 13 +- crates/typst-pdf/src/content.rs | 43 +++++-- crates/typst-pdf/src/font.rs | 47 +++++-- crates/typst-pdf/src/gradient.rs | 8 +- crates/typst-pdf/src/image.rs | 13 +- crates/typst-pdf/src/lib.rs | 142 +++++++++++++++++----- crates/typst-pdf/src/named_destination.rs | 10 +- crates/typst-pdf/src/outline.rs | 4 +- crates/typst/src/layout/page.rs | 2 +- crates/typst/src/model/link.rs | 2 +- crates/typst/src/text/font/book.rs | 6 + crates/typst/src/visualize/color.rs | 3 + 18 files changed, 361 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba5194568..a3d1e9207 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1681,9 +1681,9 @@ checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" [[package]] name = "pdf-writer" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af6a7882fda7808481d43c51cadfc3ec934c6af72612a1fe6985ce329a2f0469" +checksum = "be17f48d7fbbd22c6efedb58af5d409aa578e407f40b29a0bcb4e66ed84c5c98" dependencies = [ "bitflags 2.6.0", "itoa", @@ -2354,13 +2354,15 @@ dependencies = [ [[package]] name = "subsetter" -version = "0.11.0" -source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74f98178f34057d4d4de93d68104007c6dea4dfac930204a69ab4622daefa648" [[package]] name = "svg2pdf" -version = "0.11.0" -source = "git+https://github.com/typst/svg2pdf?rev=5963e1e#5963e1e890ac89fbf6b4750b3470ebd5765ef606" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5014c9dadcf318fb7ef8c16438e95abcc9de1ae24d60d5bccc64c55100c50364" dependencies = [ "fontdb", "image", @@ -3489,9 +3491,9 @@ checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9" [[package]] name = "xmp-writer" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4543ba138f64a94b19e1e9c66c165bca7e03d470e1c066cb76ea279d9d0e1989" +checksum = "8254499146a4fd0c86e3e99cf4a9f468f595808fb49ff8f3e495f2b117bf4ebc" [[package]] name = "xz2" diff --git a/Cargo.toml b/Cargo.toml index a0a6264ae..d0273339e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,7 @@ oxipng = { version = "9.0", default-features = false, features = ["filetime", "p palette = { version = "0.7.3", default-features = false, features = ["approx", "libm"] } parking_lot = "0.12.1" pathdiff = "0.2" -pdf-writer = "0.10.0" +pdf-writer = "0.12" phf = { version = "0.11", features = ["macros"] } pixglyph = "0.5" png = "0.17" @@ -102,8 +102,8 @@ shell-escape = "0.1.5" siphasher = "1" smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] } stacker = "0.1.15" -subsetter = { git = "https://github.com/typst/subsetter", rev = "4e0058b" } -svg2pdf = { git = "https://github.com/typst/svg2pdf", rev = "5963e1e" } +subsetter = "0.2" +svg2pdf = "0.12" syn = { version = "2", features = ["full", "extra-traits"] } syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] } tar = "0.4" @@ -127,7 +127,7 @@ walkdir = "2" wasmi = "0.35.0" xmlparser = "0.13.5" xmlwriter = "0.1.0" -xmp-writer = "0.2" +xmp-writer = "0.3" xz2 = { version = "0.1", features = ["static"] } yaml-front-matter = "0.1" zip = { version = "2", default-features = false, features = ["deflate"] } diff --git a/crates/typst-cli/src/args.rs b/crates/typst-cli/src/args.rs index d1cbefae1..6505994bd 100644 --- a/crates/typst-cli/src/args.rs +++ b/crates/typst-cli/src/args.rs @@ -122,6 +122,23 @@ pub struct CompileCommand { /// apart from file names and line numbers. #[arg(long = "timings", value_name = "OUTPUT_JSON")] pub timings: Option>, + + /// One (or multiple comma-separated) PDF standards that Typst will enforce + /// conformance with. + #[arg(long = "pdf-standard", value_delimiter = ',')] + pub pdf_standard: Vec, +} + +/// A PDF standard. +#[derive(Debug, Copy, Clone, Eq, PartialEq, ValueEnum)] +#[allow(non_camel_case_types)] +pub enum PdfStandard { + /// PDF 1.7. + #[value(name = "1.7")] + V_1_7, + /// PDF/A-2b. + #[value(name = "a-2b")] + A_2b, } /// Initializes a new project from a template diff --git a/crates/typst-cli/src/compile.rs b/crates/typst-cli/src/compile.rs index 58745c80c..b17869412 100644 --- a/crates/typst-cli/src/compile.rs +++ b/crates/typst-cli/src/compile.rs @@ -16,10 +16,11 @@ use typst::layout::{Frame, Page, PageRanges}; use typst::model::Document; use typst::syntax::{FileId, Source, Span}; use typst::WorldExt; -use typst_pdf::PdfOptions; +use typst_pdf::{PdfOptions, PdfStandards}; use crate::args::{ CompileCommand, DiagnosticFormat, Input, Output, OutputFormat, PageRangeArgument, + PdfStandard, }; use crate::timings::Timer; use crate::watch::Status; @@ -78,6 +79,19 @@ impl CompileCommand { ) }) } + + /// The PDF standards to try to conform with. + pub fn pdf_standards(&self) -> StrResult { + let list = self + .pdf_standard + .iter() + .map(|standard| match standard { + PdfStandard::V_1_7 => typst_pdf::PdfStandard::V_1_7, + PdfStandard::A_2b => typst_pdf::PdfStandard::A_2b, + }) + .collect::>(); + PdfStandards::new(&list) + } } /// Execute a compilation command. @@ -179,6 +193,7 @@ fn export_pdf(document: &Document, command: &CompileCommand) -> SourceResult<()> command.common.creation_timestamp.unwrap_or_else(chrono::Utc::now), ), page_ranges: command.exported_page_ranges(), + standards: command.pdf_standards().at(Span::detached())?, }; let buffer = typst_pdf::pdf(document, &options)?; command diff --git a/crates/typst-pdf/src/catalog.rs b/crates/typst-pdf/src/catalog.rs index 18f121e63..7aba0eb66 100644 --- a/crates/typst-pdf/src/catalog.rs +++ b/crates/typst-pdf/src/catalog.rs @@ -4,14 +4,15 @@ use ecow::eco_format; use pdf_writer::types::Direction; use pdf_writer::writers::PageLabel; use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr}; -use typst::diag::SourceResult; +use typst::diag::{bail, SourceResult}; use typst::foundations::{Datetime, Smart}; use typst::layout::Dir; +use typst::syntax::Span; use typst::text::Lang; use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter}; use crate::page::PdfPageLabel; -use crate::{hash_base64, outline, WithEverything}; +use crate::{hash_base64, outline, TextStrExt, WithEverything}; /// Write the document catalog. pub fn write_catalog( @@ -43,7 +44,7 @@ pub fn write_catalog( let mut info = pdf.document_info(info_ref); let mut xmp = XmpWriter::new(); if let Some(title) = &ctx.document.info.title { - info.title(TextStr(title)); + info.title(TextStr::trimmed(title)); xmp.title([(None, title.as_str())]); } @@ -66,7 +67,7 @@ pub fn write_catalog( // bit weird to not use the array (and it makes Acrobat show the author // list in quotes), but there's not much we can do about that. let joined = authors.join(", "); - info.author(TextStr(&joined)); + info.author(TextStr::trimmed(&joined)); xmp.creator([joined.as_str()]); } @@ -77,26 +78,20 @@ pub fn write_catalog( let keywords = &ctx.document.info.keywords; if !keywords.is_empty() { let joined = keywords.join(", "); - info.keywords(TextStr(&joined)); + info.keywords(TextStr::trimmed(&joined)); xmp.pdf_keywords(&joined); } - if let Some(date) = ctx.document.info.date.unwrap_or(ctx.options.timestamp) { - let tz = ctx.document.info.date.is_auto(); + let date = ctx.document.info.date.unwrap_or(ctx.options.timestamp); + let tz = ctx.document.info.date.is_auto(); + if let Some(date) = date { if let Some(pdf_date) = pdf_date(date, tz) { info.creation_date(pdf_date); info.modified_date(pdf_date); } - if let Some(xmp_date) = xmp_date(date, tz) { - xmp.create_date(xmp_date); - xmp.modify_date(xmp_date); - } } info.finish(); - xmp.num_pages(ctx.document.pages.len() as u32); - xmp.format("application/pdf"); - xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str()))); // A unique ID for this instance of the document. Changes if anything // changes in the frames. @@ -116,13 +111,46 @@ pub fn write_catalog( instance_id.clone() }; - // Write IDs. xmp.document_id(&doc_id); xmp.instance_id(&instance_id); - pdf.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes())); - - xmp.rendition_class(RenditionClass::Proof); + xmp.format("application/pdf"); xmp.pdf_version("1.7"); + xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str()))); + xmp.num_pages(ctx.document.pages.len() as u32); + xmp.rendition_class(RenditionClass::Proof); + + if let Some(xmp_date) = date.and_then(|date| xmp_date(date, tz)) { + xmp.create_date(xmp_date); + xmp.modify_date(xmp_date); + + if ctx.options.standards.pdfa { + let mut history = xmp.history(); + history + .add_event() + .action(xmp_writer::ResourceEventAction::Saved) + .when(xmp_date) + .instance_id(&eco_format!("{instance_id}_source")); + history + .add_event() + .action(xmp_writer::ResourceEventAction::Converted) + .when(xmp_date) + .instance_id(&instance_id) + .software_agent(&creator); + } + } + + // Assert dominance. + if ctx.options.standards.pdfa { + let mut extension_schemas = xmp.extension_schemas(); + extension_schemas + .xmp_media_management() + .properties() + .describe_instance_id(); + extension_schemas.pdf().properties().describe_all(); + extension_schemas.finish(); + xmp.pdfa_part(2); + xmp.pdfa_conformance("B"); + } let xmp_buf = xmp.finish(None); let meta_ref = alloc.bump(); @@ -130,6 +158,9 @@ pub fn write_catalog( .pair(Name(b"Type"), Name(b"Metadata")) .pair(Name(b"Subtype"), Name(b"XML")); + // Set IDs only now, so that we don't need to clone them. + pdf.set_file_id((doc_id.into_bytes(), instance_id.into_bytes())); + // Write the document catalog. let catalog_ref = alloc.bump(); let mut catalog = pdf.catalog(catalog_ref); @@ -164,8 +195,23 @@ pub fn write_catalog( catalog.lang(TextStr(lang.as_str())); } + if ctx.options.standards.pdfa { + catalog + .output_intents() + .push() + .subtype(pdf_writer::types::OutputIntentSubtype::PDFA) + .output_condition(TextStr("sRGB")) + .output_condition_identifier(TextStr("Custom")) + .info(TextStr("sRGB IEC61966-2.1")) + .dest_output_profile(ctx.globals.color_functions.srgb.unwrap()); + } + catalog.finish(); + if ctx.options.standards.pdfa && pdf.refs().count() > 8388607 { + bail!(Span::detached(), "too many PDF objects"); + } + Ok(()) } @@ -211,7 +257,7 @@ pub(crate) fn write_page_labels( // Only add what is actually provided. Don't add empty prefix string if // it wasn't given for example. if let Some(prefix) = &label.prefix { - entry.prefix(TextStr(prefix)); + entry.prefix(TextStr::trimmed(prefix)); } if let Some(style) = label.style { diff --git a/crates/typst-pdf/src/color.rs b/crates/typst-pdf/src/color.rs index 2a015ce6a..a19d776a4 100644 --- a/crates/typst-pdf/src/color.rs +++ b/crates/typst-pdf/src/color.rs @@ -1,10 +1,11 @@ use arrayvec::ArrayVec; use once_cell::sync::Lazy; use pdf_writer::{writers, Chunk, Dict, Filter, Name, Ref}; -use typst::diag::SourceResult; +use typst::diag::{bail, SourceResult}; +use typst::syntax::Span; use typst::visualize::{Color, ColorSpace, Paint}; -use crate::{content, deflate, PdfChunk, Renumber, WithResources}; +use crate::{content, deflate, PdfChunk, PdfOptions, Renumber, WithResources}; // The names of the color spaces. pub const SRGB: Name<'static> = Name(b"srgb"); @@ -65,18 +66,18 @@ impl ColorSpaces { /// PDF file. pub fn write_functions(&self, chunk: &mut Chunk, refs: &ColorFunctionRefs) { // Write the sRGB color space. - if self.use_srgb { + if let Some(id) = refs.srgb { chunk - .icc_profile(refs.srgb.unwrap(), &SRGB_ICC_DEFLATED) + .icc_profile(id, &SRGB_ICC_DEFLATED) .n(3) .range([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]) .filter(Filter::FlateDecode); } // Write the gray color space. - if self.use_d65_gray { + if let Some(id) = refs.d65_gray { chunk - .icc_profile(refs.d65_gray.unwrap(), &GRAY_ICC_DEFLATED) + .icc_profile(id, &GRAY_ICC_DEFLATED) .n(1) .range([0.0, 1.0]) .filter(Filter::FlateDecode); @@ -125,7 +126,7 @@ pub fn write( /// needed) in the final document, and be shared by all color space /// dictionaries. pub struct ColorFunctionRefs { - srgb: Option, + pub srgb: Option, d65_gray: Option, } @@ -147,6 +148,10 @@ pub fn alloc_color_functions_refs( let mut chunk = PdfChunk::new(); let mut used_color_spaces = ColorSpaces::default(); + if context.options.standards.pdfa { + used_color_spaces.mark_as_used(ColorSpace::Srgb); + } + context.resources.traverse(&mut |r| { used_color_spaces.merge(&r.colors); Ok(()) @@ -269,6 +274,7 @@ impl PaintEncode for Color { ctx.content.set_fill_color([r, g, b]); } Color::Cmyk(_) => { + check_cmyk_allowed(ctx.options)?; ctx.reset_fill_color_space(); let [c, m, y, k] = ColorSpace::Cmyk.encode(*self); @@ -312,6 +318,7 @@ impl PaintEncode for Color { ctx.content.set_stroke_color([r, g, b]); } Color::Cmyk(_) => { + check_cmyk_allowed(ctx.options)?; ctx.reset_stroke_color_space(); let [c, m, y, k] = ColorSpace::Cmyk.encode(*self); @@ -373,3 +380,14 @@ impl QuantizedColor for f32 { color.clamp(min, max) } } + +/// Fails with an error if PDF/A processing is enabled. +pub(super) fn check_cmyk_allowed(options: &PdfOptions) -> SourceResult<()> { + if options.standards.pdfa { + bail!( + Span::detached(), + "cmyk colors are not currently supported by PDF/A export" + ); + } + Ok(()) +} diff --git a/crates/typst-pdf/src/color_font.rs b/crates/typst-pdf/src/color_font.rs index 026c0bcee..5182a0594 100644 --- a/crates/typst-pdf/src/color_font.rs +++ b/crates/typst-pdf/src/color_font.rs @@ -10,15 +10,15 @@ use std::collections::HashMap; use ecow::eco_format; use indexmap::IndexMap; use pdf_writer::types::UnicodeCmap; +use pdf_writer::writers::WMode; use pdf_writer::{Filter, Finish, Name, Rect, Ref}; -use ttf_parser::name_id; use typst::diag::SourceResult; use typst::layout::Em; use typst::text::color::frame_for_glyph; use typst::text::Font; use crate::content; -use crate::font::{subset_tag, write_font_descriptor, CMAP_NAME, SYSTEM_INFO}; +use crate::font::{base_font_name, write_font_descriptor, CMAP_NAME, SYSTEM_INFO}; use crate::resources::{Resources, ResourcesRefs}; use crate::{EmExt, PdfChunk, PdfOptions, WithGlobalRefs}; @@ -84,12 +84,7 @@ pub fn write_color_fonts( // Determine the base font name. gids.sort(); - let subset_tag = subset_tag(&gids); - let postscript_name = font_slice - .font - .find_name(name_id::POST_SCRIPT_NAME) - .unwrap_or_else(|| "unknown".to_string()); - let base_font = eco_format!("{subset_tag}+{postscript_name}"); + let base_font = base_font_name(&font_slice.font, &gids); // Write the Type3 font object. let mut pdf_font = chunk.type3_font(subfont_id); @@ -134,7 +129,7 @@ pub fn write_color_fonts( cmap.pair_with_multiple(index as u8, text.chars()); } } - chunk.cmap(cmap_ref, &cmap.finish()); + chunk.cmap(cmap_ref, &cmap.finish()).writing_mode(WMode::Horizontal); // Write the font descriptor. write_font_descriptor( diff --git a/crates/typst-pdf/src/content.rs b/crates/typst-pdf/src/content.rs index 60f91470e..aa20e55af 100644 --- a/crates/typst-pdf/src/content.rs +++ b/crates/typst-pdf/src/content.rs @@ -8,8 +8,10 @@ use ecow::eco_format; use pdf_writer::types::{ ColorSpaceOperand, LineCapStyle, LineJoinStyle, TextRenderingMode, }; +use pdf_writer::writers::PositionedItems; use pdf_writer::{Content, Finish, Name, Rect, Str}; -use typst::diag::SourceResult; +use typst::diag::{bail, SourceResult}; +use typst::foundations::Repr; use typst::layout::{ Abs, Em, Frame, FrameItem, GroupItem, Point, Ratio, Size, Transform, }; @@ -28,7 +30,7 @@ use crate::color_font::ColorFontMap; use crate::extg::ExtGState; use crate::image::deferred_image; use crate::resources::Resources; -use crate::{deflate_deferred, AbsExt, EmExt, PdfOptions}; +use crate::{deflate_deferred, AbsExt, ContentExt, EmExt, PdfOptions, StrExt}; /// Encode a [`Frame`] into a content stream. /// @@ -201,8 +203,7 @@ pub(super) struct Transforms { impl Builder<'_, ()> { fn save_state(&mut self) -> SourceResult<()> { self.saves.push(self.state.clone()); - self.content.save_state(); - Ok(()) + self.content.save_state_checked() } fn restore_state(&mut self) { @@ -417,6 +418,19 @@ fn write_group(ctx: &mut Builder, pos: Point, group: &GroupItem) -> SourceResult /// Encode a text run into the content stream. fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()> { + if ctx.options.standards.pdfa { + let last_resort = text.font.info().is_last_resort(); + for g in &text.glyphs { + if last_resort || g.id == 0 { + bail!( + g.span.0, + "the text {} could not be displayed with any font", + text.text[g.range()].repr() + ); + } + } + } + let ttf = text.font.ttf(); let tables = ttf.tables(); @@ -526,7 +540,7 @@ fn write_normal_text( if !adjustment.is_zero() { if !encoded.is_empty() { - items.show(Str(&encoded)); + show_text(&mut items, &encoded); encoded.clear(); } @@ -565,7 +579,7 @@ fn write_normal_text( } if !encoded.is_empty() { - items.show(Str(&encoded)); + show_text(&mut items, &encoded); } items.finish(); @@ -575,6 +589,14 @@ fn write_normal_text( Ok(()) } +/// Shows text, ensuring that each individual string doesn't exceed the +/// implementation limits. +fn show_text(items: &mut PositionedItems, encoded: &[u8]) { + for chunk in encoded.chunks(Str::PDFA_LIMIT) { + items.show(Str(chunk)); + } +} + /// Encodes a text run made only of color glyphs into the content stream fn write_color_glyphs( ctx: &mut Builder, @@ -723,7 +745,8 @@ fn write_image( ) -> SourceResult<()> { let index = ctx.resources.images.insert(image.clone()); ctx.resources.deferred_images.entry(index).or_insert_with(|| { - let (image, color_space) = deferred_image(image.clone()); + let (image, color_space) = + deferred_image(image.clone(), ctx.options.standards.pdfa); if let Some(color_space) = color_space { ctx.resources.colors.mark_as_used(color_space); } @@ -735,10 +758,14 @@ fn write_image( let name = eco_format!("Im{index}"); let w = size.x.to_f32(); let h = size.y.to_f32(); - ctx.content.save_state(); + ctx.content.save_state_checked()?; ctx.content.transform([w, 0.0, 0.0, -h, x, y + h]); if let Some(alt) = image.alt() { + if ctx.options.standards.pdfa && alt.len() > Str::PDFA_LIMIT { + bail!(span, "the image's alt text is too long"); + } + let mut image_span = ctx.content.begin_marked_content_with_properties(Name(b"Span")); let mut image_alt = image_span.properties(); diff --git a/crates/typst-pdf/src/font.rs b/crates/typst-pdf/src/font.rs index d0cd29034..c870d32a9 100644 --- a/crates/typst-pdf/src/font.rs +++ b/crates/typst-pdf/src/font.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use ecow::{eco_format, EcoString}; use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap}; -use pdf_writer::writers::FontDescriptor; +use pdf_writer::writers::{FontDescriptor, WMode}; use pdf_writer::{Chunk, Filter, Finish, Name, Rect, Ref, Str}; use subsetter::GlyphRemapper; use ttf_parser::{name_id, GlyphId, Tag}; @@ -13,10 +13,14 @@ use typst::syntax::Span; use typst::text::Font; use typst::utils::SliceExt; -use crate::{deflate, EmExt, PdfChunk, WithGlobalRefs}; +use crate::{deflate, EmExt, NameExt, PdfChunk, WithGlobalRefs}; const CFF: Tag = Tag::from_bytes(b"CFF "); const CFF2: Tag = Tag::from_bytes(b"CFF2"); + +const SUBSET_TAG_LEN: usize = 6; +const IDENTITY_H: &str = "Identity-H"; + pub(crate) const CMAP_NAME: Name = Name(b"Custom"); pub(crate) const SYSTEM_INFO: SystemInfo = SystemInfo { registry: Str(b"Adobe"), @@ -58,14 +62,9 @@ pub fn write_fonts( .or_else(|| ttf.raw_face().table(CFF2)) .is_some(); - let postscript_name = font - .find_name(name_id::POST_SCRIPT_NAME) - .unwrap_or_else(|| "unknown".to_string()); - - let subset_tag = subset_tag(glyph_set); - let base_font = eco_format!("{subset_tag}+{postscript_name}"); + let base_font = base_font_name(font, glyph_set); let base_font_type0 = if is_cff { - eco_format!("{base_font}-Identity-H") + eco_format!("{base_font}-{IDENTITY_H}") } else { base_font.clone() }; @@ -74,7 +73,7 @@ pub fn write_fonts( chunk .type0_font(type0_ref) .base_font(Name(base_font_type0.as_bytes())) - .encoding_predefined(Name(b"Identity-H")) + .encoding_predefined(Name(IDENTITY_H.as_bytes())) .descendant_font(cid_ref) .to_unicode(cmap_ref); @@ -118,7 +117,10 @@ pub fn write_fonts( // Write the /ToUnicode character map, which maps glyph ids back to // unicode codepoints to enable copying out of the PDF. let cmap = create_cmap(glyph_set, glyph_remapper); - chunk.cmap(cmap_ref, &cmap).filter(Filter::FlateDecode); + chunk + .cmap(cmap_ref, &cmap) + .writing_mode(WMode::Horizontal) + .filter(Filter::FlateDecode); let subset = subset_font(font, glyph_remapper) .map_err(|err| { @@ -224,12 +226,31 @@ fn subset_font( Ok(Arc::new(deflate(data))) } +/// Creates the base font name for a font with a specific glyph subset. +/// Consists of a subset tag and the PostScript name of the font. +/// +/// Returns a string of length maximum 116, so that even with `-Identity-H` +/// added it does not exceed the maximum PDF/A name length of 127. +pub(crate) fn base_font_name(font: &Font, glyphs: &T) -> EcoString { + const MAX_LEN: usize = Name::PDFA_LIMIT - REST_LEN; + const REST_LEN: usize = SUBSET_TAG_LEN + 1 + 1 + IDENTITY_H.len(); + + let postscript_name = font.find_name(name_id::POST_SCRIPT_NAME); + let name = postscript_name.as_deref().unwrap_or("unknown"); + let trimmed = &name[..name.len().min(MAX_LEN)]; + + // Hash the full name (we might have trimmed) and the glyphs to produce + // a fairly unique subset tag. + let subset_tag = subset_tag(&(name, glyphs)); + + eco_format!("{subset_tag}+{trimmed}") +} + /// Produce a unique 6 letter tag for a glyph set. pub(crate) fn subset_tag(glyphs: &T) -> EcoString { - const LEN: usize = 6; const BASE: u128 = 26; let mut hash = typst::utils::hash128(&glyphs); - let mut letter = [b'A'; LEN]; + let mut letter = [b'A'; SUBSET_TAG_LEN]; for l in letter.iter_mut() { *l = b'A' + (hash % BASE) as u8; hash /= BASE; diff --git a/crates/typst-pdf/src/gradient.rs b/crates/typst-pdf/src/gradient.rs index 2cfd480b3..be0a3ea04 100644 --- a/crates/typst-pdf/src/gradient.rs +++ b/crates/typst-pdf/src/gradient.rs @@ -13,7 +13,9 @@ use typst::visualize::{ Color, ColorSpace, Gradient, RatioOrAngle, RelativeTo, WeightedColor, }; -use crate::color::{self, ColorSpaceExt, PaintEncode, QuantizedColor}; +use crate::color::{ + self, check_cmyk_allowed, ColorSpaceExt, PaintEncode, QuantizedColor, +}; use crate::{content, WithGlobalRefs}; use crate::{deflate, transform_to_array, AbsExt, PdfChunk}; @@ -56,6 +58,10 @@ pub fn write_gradients( gradient.space() }; + if color_space == ColorSpace::Cmyk { + check_cmyk_allowed(context.options)?; + } + let mut shading_pattern = match &gradient { Gradient::Linear(_) => { let shading_function = diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 44ed8d83b..bff09e096 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -118,6 +118,7 @@ pub fn write_images( #[comemo::memoize] pub fn deferred_image( image: Image, + pdfa: bool, ) -> (Deferred>, Option) { let color_space = match image.kind() { ImageKind::Raster(raster) if raster.icc().is_none() => { @@ -151,7 +152,7 @@ pub fn deferred_image( }) } ImageKind::Svg(svg) => { - let (chunk, id) = encode_svg(svg) + let (chunk, id) = encode_svg(svg, pdfa) .map_err(|err| eco_format!("failed to convert SVG to PDF: {err}"))?; Ok(EncodedImage::Svg(chunk, id)) } @@ -201,8 +202,14 @@ fn encode_alpha(raster: &RasterImage) -> (Vec, Filter) { /// Encode an SVG into a chunk of PDF objects. #[typst_macros::time(name = "encode svg")] -fn encode_svg(svg: &SvgImage) -> Result<(Chunk, Ref), svg2pdf::ConversionError> { - svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default()) +fn encode_svg( + svg: &SvgImage, + pdfa: bool, +) -> Result<(Chunk, Ref), svg2pdf::ConversionError> { + svg2pdf::to_chunk( + svg.tree(), + svg2pdf::ConversionOptions { pdfa, ..Default::default() }, + ) } /// A pre-encoded image. diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs index b2b3acc10..587f66cb1 100644 --- a/crates/typst-pdf/src/lib.rs +++ b/crates/typst-pdf/src/lib.rs @@ -15,15 +15,17 @@ mod pattern; mod resources; use std::collections::HashMap; +use std::fmt::{self, Debug, Formatter}; use std::hash::Hash; use std::ops::{Deref, DerefMut}; use base64::Engine; -use pdf_writer::{Chunk, Pdf, Ref}; -use typst::diag::SourceResult; +use pdf_writer::{Chunk, Name, Pdf, Ref, Str, TextStr}; +use typst::diag::{bail, SourceResult, StrResult}; use typst::foundations::{Datetime, Smart}; use typst::layout::{Abs, Em, PageRanges, Transform}; use typst::model::Document; +use typst::syntax::Span; use typst::text::Font; use typst::utils::Deferred; use typst::visualize::Image; @@ -45,25 +47,6 @@ use crate::resources::{ /// Export a document into a PDF file. /// /// Returns the raw bytes making up the PDF file. -/// -/// The `ident` parameter, if given, shall be a string that uniquely and stably -/// identifies the document. It should not change between compilations of the -/// same document. **If you cannot provide such a stable identifier, just pass -/// `Smart::Auto` rather than trying to come up with one.** The CLI, for -/// example, does not have a well-defined notion of a long-lived project and as -/// such just passes `Smart::Auto`. -/// -/// If an `ident` is given, the hash of it will be used to create a PDF document -/// identifier (the identifier itself is not leaked). If `ident` is `Auto`, a -/// hash of the document's title and author is used instead (which is reasonably -/// unique and stable). -/// -/// The `timestamp`, if given, is expected to be the creation date of the -/// document as a UTC datetime. It will only be used if `set document(date: ..)` -/// is `auto`. -/// -/// The `page_ranges` option specifies which ranges of pages should be exported -/// in the PDF. When `None`, all pages should be exported. #[typst_macros::time(name = "pdf")] pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult> { PdfBuilder::new(document, options) @@ -92,26 +75,70 @@ pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult> { } /// Settings for PDF export. -#[derive(Default)] +#[derive(Debug, Default)] pub struct PdfOptions<'a> { - /// If given, shall be a string that uniquely and stably identifies the - /// document. It should not change between compilations of the same - /// document. **If you cannot provide such a stable identifier, just pass - /// `Smart::Auto` rather than trying to come up with one.** The CLI, for - /// example, does not have a well-defined notion of a long-lived project and - /// as such just passes `Smart::Auto`. + /// If not `Smart::Auto`, shall be a string that uniquely and stably + /// identifies the document. It should not change between compilations of + /// the same document. **If you cannot provide such a stable identifier, + /// just pass `Smart::Auto` rather than trying to come up with one.** The + /// CLI, for example, does not have a well-defined notion of a long-lived + /// project and as such just passes `Smart::Auto`. /// /// If an `ident` is given, the hash of it will be used to create a PDF /// document identifier (the identifier itself is not leaked). If `ident` is /// `Auto`, a hash of the document's title and author is used instead (which /// is reasonably unique and stable). pub ident: Smart<&'a str>, - /// If given, is expected to be the creation date of the document as a UTC + /// If not `None`, shall be the creation date of the document as a UTC /// datetime. It will only be used if `set document(date: ..)` is `auto`. pub timestamp: Option, /// Specifies which ranges of pages should be exported in the PDF. When /// `None`, all pages should be exported. pub page_ranges: Option, + /// A list of PDF standards that Typst will enforce conformance with. + pub standards: PdfStandards, +} + +/// Encapsulates a list of compatible PDF standards. +#[derive(Clone)] +pub struct PdfStandards { + /// For now, we simplify to just PDF/A, since we only support PDF/A-2b. But + /// it can be more fine-grained in the future. + pub(crate) pdfa: bool, +} + +impl PdfStandards { + /// Validates a list of PDF standards for compatibility and returns their + /// encapsulated representation. + pub fn new(list: &[PdfStandard]) -> StrResult { + Ok(Self { pdfa: list.contains(&PdfStandard::A_2b) }) + } +} + +impl Debug for PdfStandards { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.pad("PdfStandards(..)") + } +} + +#[allow(clippy::derivable_impls)] +impl Default for PdfStandards { + fn default() -> Self { + Self { pdfa: false } + } +} + +/// A PDF standard. +/// +/// Support for more standards is planned. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[allow(non_camel_case_types)] +#[non_exhaustive] +pub enum PdfStandard { + /// PDF 1.7. + V_1_7, + /// PDF/A-2b. + A_2b, } /// A struct to build a PDF following a fixed succession of phases. @@ -515,6 +542,63 @@ impl EmExt for Em { } } +trait NameExt<'a> { + /// The maximum length of a name in PDF/A. + const PDFA_LIMIT: usize = 127; +} + +impl<'a> NameExt<'a> for Name<'a> {} + +/// Additional methods for [`Str`]. +trait StrExt<'a>: Sized { + /// The maximum length of a string in PDF/A. + const PDFA_LIMIT: usize = 32767; + + /// Create a string that satisfies the constraints of PDF/A. + #[allow(unused)] + fn trimmed(string: &'a [u8]) -> Self; +} + +impl<'a> StrExt<'a> for Str<'a> { + fn trimmed(string: &'a [u8]) -> Self { + Self(&string[..string.len().min(Self::PDFA_LIMIT)]) + } +} + +/// Additional methods for [`TextStr`]. +trait TextStrExt<'a>: Sized { + /// The maximum length of a string in PDF/A. + const PDFA_LIMIT: usize = Str::PDFA_LIMIT; + + /// Create a text string that satisfies the constraints of PDF/A. + fn trimmed(string: &'a str) -> Self; +} + +impl<'a> TextStrExt<'a> for TextStr<'a> { + fn trimmed(string: &'a str) -> Self { + Self(&string[..string.len().min(Self::PDFA_LIMIT)]) + } +} + +/// Extension trait for [`Content`](pdf_writer::Content). +trait ContentExt { + fn save_state_checked(&mut self) -> SourceResult<()>; +} + +impl ContentExt for pdf_writer::Content { + fn save_state_checked(&mut self) -> SourceResult<()> { + self.save_state(); + if self.state_nesting_depth() > 28 { + bail!( + Span::detached(), + "maximum PDF grouping depth exceeding"; + hint: "try to avoid excessive nesting of layout containers", + ); + } + Ok(()) + } +} + /// Convert to an array of floats. fn transform_to_array(ts: Transform) -> [f32; 6] { [ diff --git a/crates/typst-pdf/src/named_destination.rs b/crates/typst-pdf/src/named_destination.rs index 8dfdc4f30..2d893526e 100644 --- a/crates/typst-pdf/src/named_destination.rs +++ b/crates/typst-pdf/src/named_destination.rs @@ -1,14 +1,14 @@ use std::collections::{HashMap, HashSet}; use pdf_writer::writers::Destination; -use pdf_writer::Ref; +use pdf_writer::{Ref, Str}; use typst::diag::SourceResult; use typst::foundations::{Label, NativeElement}; use typst::introspection::Location; use typst::layout::Abs; use typst::model::HeadingElem; -use crate::{AbsExt, PdfChunk, Renumber, WithGlobalRefs}; +use crate::{AbsExt, PdfChunk, Renumber, StrExt, WithGlobalRefs}; /// A list of destinations in the PDF document (a specific point on a specific /// page), that have a name associated with them. @@ -56,6 +56,12 @@ pub fn write_named_destinations( matches.sort_by_key(|&(_, label)| label); for (loc, label) in matches { + // Don't encode named destinations that would exceed the limit. Those + // will instead be encoded as normal links. + if label.as_str().len() > Str::PDFA_LIMIT { + continue; + } + let pos = context.document.introspector.position(loc); let index = pos.page.get() - 1; let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); diff --git a/crates/typst-pdf/src/outline.rs b/crates/typst-pdf/src/outline.rs index 23cc4e976..5c099b89e 100644 --- a/crates/typst-pdf/src/outline.rs +++ b/crates/typst-pdf/src/outline.rs @@ -5,7 +5,7 @@ use typst::foundations::{NativeElement, Packed, StyleChain}; use typst::layout::Abs; use typst::model::HeadingElem; -use crate::{AbsExt, WithEverything}; +use crate::{AbsExt, TextStrExt, WithEverything}; /// Construct the outline for the document. pub(crate) fn write_outline( @@ -185,7 +185,7 @@ fn write_outline_item( } let body = node.element.body(); - outline.title(TextStr(body.plain_text().trim())); + outline.title(TextStr::trimmed(body.plain_text().trim())); let loc = node.element.location().unwrap(); let pos = ctx.document.introspector.position(loc); diff --git a/crates/typst/src/layout/page.rs b/crates/typst/src/layout/page.rs index ac7834bb8..5d8fc04fe 100644 --- a/crates/typst/src/layout/page.rs +++ b/crates/typst/src/layout/page.rs @@ -652,7 +652,7 @@ cast! { } /// A list of page ranges to be exported. - +#[derive(Debug, Clone)] pub struct PageRanges(Vec); /// A range of pages to export. diff --git a/crates/typst/src/model/link.rs b/crates/typst/src/model/link.rs index b583a6fd0..31c65a1df 100644 --- a/crates/typst/src/model/link.rs +++ b/crates/typst/src/model/link.rs @@ -182,7 +182,7 @@ cast! { pub struct Url(EcoString); impl Url { - /// Create an URL from a string, checking the maximum length. + /// Create a URL from a string, checking the maximum length. pub fn new(url: impl Into) -> StrResult { let url = url.into(); if url.len() > 8000 { diff --git a/crates/typst/src/text/font/book.rs b/crates/typst/src/text/font/book.rs index 131e666fe..23e27f64c 100644 --- a/crates/typst/src/text/font/book.rs +++ b/crates/typst/src/text/font/book.rs @@ -291,6 +291,12 @@ impl FontInfo { coverage: Coverage::from_vec(codepoints), }) } + + /// Whether this is the macOS LastResort font. It can yield tofus with + /// glyph ID != 0. + pub fn is_last_resort(&self) -> bool { + self.family == "LastResort" + } } /// Try to find and decode the name with the given id. diff --git a/crates/typst/src/visualize/color.rs b/crates/typst/src/visualize/color.rs index 0af520941..6e05f0e3c 100644 --- a/crates/typst/src/visualize/color.rs +++ b/crates/typst/src/visualize/color.rs @@ -544,6 +544,9 @@ impl Color { /// These components are also available using the /// [`components`]($color.components) method. /// + /// Note that CMYK colors are not currently supported when PDF/A output is + /// enabled. + /// /// ```example /// #square( /// fill: cmyk(27%, 0%, 3%, 5%)