Minimal PDF/A support (#5075)

Co-authored-by: Martin Haug <mhaug@live.de>
This commit is contained in:
Laurenz 2024-10-01 11:24:18 +02:00 committed by GitHub
parent 8eee3ec8d1
commit a0093ad8a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 361 additions and 108 deletions

18
Cargo.lock generated
View File

@ -1681,9 +1681,9 @@ checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
[[package]] [[package]]
name = "pdf-writer" name = "pdf-writer"
version = "0.10.0" version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af6a7882fda7808481d43c51cadfc3ec934c6af72612a1fe6985ce329a2f0469" checksum = "be17f48d7fbbd22c6efedb58af5d409aa578e407f40b29a0bcb4e66ed84c5c98"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.6.0",
"itoa", "itoa",
@ -2354,13 +2354,15 @@ dependencies = [
[[package]] [[package]]
name = "subsetter" name = "subsetter"
version = "0.11.0" version = "0.2.0"
source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74f98178f34057d4d4de93d68104007c6dea4dfac930204a69ab4622daefa648"
[[package]] [[package]]
name = "svg2pdf" name = "svg2pdf"
version = "0.11.0" version = "0.12.0"
source = "git+https://github.com/typst/svg2pdf?rev=5963e1e#5963e1e890ac89fbf6b4750b3470ebd5765ef606" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5014c9dadcf318fb7ef8c16438e95abcc9de1ae24d60d5bccc64c55100c50364"
dependencies = [ dependencies = [
"fontdb", "fontdb",
"image", "image",
@ -3489,9 +3491,9 @@ checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9"
[[package]] [[package]]
name = "xmp-writer" name = "xmp-writer"
version = "0.2.0" version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4543ba138f64a94b19e1e9c66c165bca7e03d470e1c066cb76ea279d9d0e1989" checksum = "8254499146a4fd0c86e3e99cf4a9f468f595808fb49ff8f3e495f2b117bf4ebc"
[[package]] [[package]]
name = "xz2" name = "xz2"

View File

@ -77,7 +77,7 @@ oxipng = { version = "9.0", default-features = false, features = ["filetime", "p
palette = { version = "0.7.3", default-features = false, features = ["approx", "libm"] } palette = { version = "0.7.3", default-features = false, features = ["approx", "libm"] }
parking_lot = "0.12.1" parking_lot = "0.12.1"
pathdiff = "0.2" pathdiff = "0.2"
pdf-writer = "0.10.0" pdf-writer = "0.12"
phf = { version = "0.11", features = ["macros"] } phf = { version = "0.11", features = ["macros"] }
pixglyph = "0.5" pixglyph = "0.5"
png = "0.17" png = "0.17"
@ -102,8 +102,8 @@ shell-escape = "0.1.5"
siphasher = "1" siphasher = "1"
smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] } smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] }
stacker = "0.1.15" stacker = "0.1.15"
subsetter = { git = "https://github.com/typst/subsetter", rev = "4e0058b" } subsetter = "0.2"
svg2pdf = { git = "https://github.com/typst/svg2pdf", rev = "5963e1e" } svg2pdf = "0.12"
syn = { version = "2", features = ["full", "extra-traits"] } syn = { version = "2", features = ["full", "extra-traits"] }
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] } syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
tar = "0.4" tar = "0.4"
@ -127,7 +127,7 @@ walkdir = "2"
wasmi = "0.35.0" wasmi = "0.35.0"
xmlparser = "0.13.5" xmlparser = "0.13.5"
xmlwriter = "0.1.0" xmlwriter = "0.1.0"
xmp-writer = "0.2" xmp-writer = "0.3"
xz2 = { version = "0.1", features = ["static"] } xz2 = { version = "0.1", features = ["static"] }
yaml-front-matter = "0.1" yaml-front-matter = "0.1"
zip = { version = "2", default-features = false, features = ["deflate"] } zip = { version = "2", default-features = false, features = ["deflate"] }

View File

@ -122,6 +122,23 @@ pub struct CompileCommand {
/// apart from file names and line numbers. /// apart from file names and line numbers.
#[arg(long = "timings", value_name = "OUTPUT_JSON")] #[arg(long = "timings", value_name = "OUTPUT_JSON")]
pub timings: Option<Option<PathBuf>>, pub timings: Option<Option<PathBuf>>,
/// One (or multiple comma-separated) PDF standards that Typst will enforce
/// conformance with.
#[arg(long = "pdf-standard", value_delimiter = ',')]
pub pdf_standard: Vec<PdfStandard>,
}
/// A PDF standard.
#[derive(Debug, Copy, Clone, Eq, PartialEq, ValueEnum)]
#[allow(non_camel_case_types)]
pub enum PdfStandard {
/// PDF 1.7.
#[value(name = "1.7")]
V_1_7,
/// PDF/A-2b.
#[value(name = "a-2b")]
A_2b,
} }
/// Initializes a new project from a template /// Initializes a new project from a template

View File

@ -16,10 +16,11 @@ use typst::layout::{Frame, Page, PageRanges};
use typst::model::Document; use typst::model::Document;
use typst::syntax::{FileId, Source, Span}; use typst::syntax::{FileId, Source, Span};
use typst::WorldExt; use typst::WorldExt;
use typst_pdf::PdfOptions; use typst_pdf::{PdfOptions, PdfStandards};
use crate::args::{ use crate::args::{
CompileCommand, DiagnosticFormat, Input, Output, OutputFormat, PageRangeArgument, CompileCommand, DiagnosticFormat, Input, Output, OutputFormat, PageRangeArgument,
PdfStandard,
}; };
use crate::timings::Timer; use crate::timings::Timer;
use crate::watch::Status; use crate::watch::Status;
@ -78,6 +79,19 @@ impl CompileCommand {
) )
}) })
} }
/// The PDF standards to try to conform with.
pub fn pdf_standards(&self) -> StrResult<PdfStandards> {
let list = self
.pdf_standard
.iter()
.map(|standard| match standard {
PdfStandard::V_1_7 => typst_pdf::PdfStandard::V_1_7,
PdfStandard::A_2b => typst_pdf::PdfStandard::A_2b,
})
.collect::<Vec<_>>();
PdfStandards::new(&list)
}
} }
/// Execute a compilation command. /// Execute a compilation command.
@ -179,6 +193,7 @@ fn export_pdf(document: &Document, command: &CompileCommand) -> SourceResult<()>
command.common.creation_timestamp.unwrap_or_else(chrono::Utc::now), command.common.creation_timestamp.unwrap_or_else(chrono::Utc::now),
), ),
page_ranges: command.exported_page_ranges(), page_ranges: command.exported_page_ranges(),
standards: command.pdf_standards().at(Span::detached())?,
}; };
let buffer = typst_pdf::pdf(document, &options)?; let buffer = typst_pdf::pdf(document, &options)?;
command command

View File

@ -4,14 +4,15 @@ use ecow::eco_format;
use pdf_writer::types::Direction; use pdf_writer::types::Direction;
use pdf_writer::writers::PageLabel; use pdf_writer::writers::PageLabel;
use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr}; use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr};
use typst::diag::SourceResult; use typst::diag::{bail, SourceResult};
use typst::foundations::{Datetime, Smart}; use typst::foundations::{Datetime, Smart};
use typst::layout::Dir; use typst::layout::Dir;
use typst::syntax::Span;
use typst::text::Lang; use typst::text::Lang;
use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter}; use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter};
use crate::page::PdfPageLabel; use crate::page::PdfPageLabel;
use crate::{hash_base64, outline, WithEverything}; use crate::{hash_base64, outline, TextStrExt, WithEverything};
/// Write the document catalog. /// Write the document catalog.
pub fn write_catalog( pub fn write_catalog(
@ -43,7 +44,7 @@ pub fn write_catalog(
let mut info = pdf.document_info(info_ref); let mut info = pdf.document_info(info_ref);
let mut xmp = XmpWriter::new(); let mut xmp = XmpWriter::new();
if let Some(title) = &ctx.document.info.title { if let Some(title) = &ctx.document.info.title {
info.title(TextStr(title)); info.title(TextStr::trimmed(title));
xmp.title([(None, title.as_str())]); xmp.title([(None, title.as_str())]);
} }
@ -66,7 +67,7 @@ pub fn write_catalog(
// bit weird to not use the array (and it makes Acrobat show the author // bit weird to not use the array (and it makes Acrobat show the author
// list in quotes), but there's not much we can do about that. // list in quotes), but there's not much we can do about that.
let joined = authors.join(", "); let joined = authors.join(", ");
info.author(TextStr(&joined)); info.author(TextStr::trimmed(&joined));
xmp.creator([joined.as_str()]); xmp.creator([joined.as_str()]);
} }
@ -77,26 +78,20 @@ pub fn write_catalog(
let keywords = &ctx.document.info.keywords; let keywords = &ctx.document.info.keywords;
if !keywords.is_empty() { if !keywords.is_empty() {
let joined = keywords.join(", "); let joined = keywords.join(", ");
info.keywords(TextStr(&joined)); info.keywords(TextStr::trimmed(&joined));
xmp.pdf_keywords(&joined); xmp.pdf_keywords(&joined);
} }
if let Some(date) = ctx.document.info.date.unwrap_or(ctx.options.timestamp) { let date = ctx.document.info.date.unwrap_or(ctx.options.timestamp);
let tz = ctx.document.info.date.is_auto(); let tz = ctx.document.info.date.is_auto();
if let Some(date) = date {
if let Some(pdf_date) = pdf_date(date, tz) { if let Some(pdf_date) = pdf_date(date, tz) {
info.creation_date(pdf_date); info.creation_date(pdf_date);
info.modified_date(pdf_date); info.modified_date(pdf_date);
} }
if let Some(xmp_date) = xmp_date(date, tz) {
xmp.create_date(xmp_date);
xmp.modify_date(xmp_date);
}
} }
info.finish(); info.finish();
xmp.num_pages(ctx.document.pages.len() as u32);
xmp.format("application/pdf");
xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str())));
// A unique ID for this instance of the document. Changes if anything // A unique ID for this instance of the document. Changes if anything
// changes in the frames. // changes in the frames.
@ -116,13 +111,46 @@ pub fn write_catalog(
instance_id.clone() instance_id.clone()
}; };
// Write IDs.
xmp.document_id(&doc_id); xmp.document_id(&doc_id);
xmp.instance_id(&instance_id); xmp.instance_id(&instance_id);
pdf.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes())); xmp.format("application/pdf");
xmp.rendition_class(RenditionClass::Proof);
xmp.pdf_version("1.7"); xmp.pdf_version("1.7");
xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str())));
xmp.num_pages(ctx.document.pages.len() as u32);
xmp.rendition_class(RenditionClass::Proof);
if let Some(xmp_date) = date.and_then(|date| xmp_date(date, tz)) {
xmp.create_date(xmp_date);
xmp.modify_date(xmp_date);
if ctx.options.standards.pdfa {
let mut history = xmp.history();
history
.add_event()
.action(xmp_writer::ResourceEventAction::Saved)
.when(xmp_date)
.instance_id(&eco_format!("{instance_id}_source"));
history
.add_event()
.action(xmp_writer::ResourceEventAction::Converted)
.when(xmp_date)
.instance_id(&instance_id)
.software_agent(&creator);
}
}
// Assert dominance.
if ctx.options.standards.pdfa {
let mut extension_schemas = xmp.extension_schemas();
extension_schemas
.xmp_media_management()
.properties()
.describe_instance_id();
extension_schemas.pdf().properties().describe_all();
extension_schemas.finish();
xmp.pdfa_part(2);
xmp.pdfa_conformance("B");
}
let xmp_buf = xmp.finish(None); let xmp_buf = xmp.finish(None);
let meta_ref = alloc.bump(); let meta_ref = alloc.bump();
@ -130,6 +158,9 @@ pub fn write_catalog(
.pair(Name(b"Type"), Name(b"Metadata")) .pair(Name(b"Type"), Name(b"Metadata"))
.pair(Name(b"Subtype"), Name(b"XML")); .pair(Name(b"Subtype"), Name(b"XML"));
// Set IDs only now, so that we don't need to clone them.
pdf.set_file_id((doc_id.into_bytes(), instance_id.into_bytes()));
// Write the document catalog. // Write the document catalog.
let catalog_ref = alloc.bump(); let catalog_ref = alloc.bump();
let mut catalog = pdf.catalog(catalog_ref); let mut catalog = pdf.catalog(catalog_ref);
@ -164,8 +195,23 @@ pub fn write_catalog(
catalog.lang(TextStr(lang.as_str())); catalog.lang(TextStr(lang.as_str()));
} }
if ctx.options.standards.pdfa {
catalog
.output_intents()
.push()
.subtype(pdf_writer::types::OutputIntentSubtype::PDFA)
.output_condition(TextStr("sRGB"))
.output_condition_identifier(TextStr("Custom"))
.info(TextStr("sRGB IEC61966-2.1"))
.dest_output_profile(ctx.globals.color_functions.srgb.unwrap());
}
catalog.finish(); catalog.finish();
if ctx.options.standards.pdfa && pdf.refs().count() > 8388607 {
bail!(Span::detached(), "too many PDF objects");
}
Ok(()) Ok(())
} }
@ -211,7 +257,7 @@ pub(crate) fn write_page_labels(
// Only add what is actually provided. Don't add empty prefix string if // Only add what is actually provided. Don't add empty prefix string if
// it wasn't given for example. // it wasn't given for example.
if let Some(prefix) = &label.prefix { if let Some(prefix) = &label.prefix {
entry.prefix(TextStr(prefix)); entry.prefix(TextStr::trimmed(prefix));
} }
if let Some(style) = label.style { if let Some(style) = label.style {

View File

@ -1,10 +1,11 @@
use arrayvec::ArrayVec; use arrayvec::ArrayVec;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use pdf_writer::{writers, Chunk, Dict, Filter, Name, Ref}; use pdf_writer::{writers, Chunk, Dict, Filter, Name, Ref};
use typst::diag::SourceResult; use typst::diag::{bail, SourceResult};
use typst::syntax::Span;
use typst::visualize::{Color, ColorSpace, Paint}; use typst::visualize::{Color, ColorSpace, Paint};
use crate::{content, deflate, PdfChunk, Renumber, WithResources}; use crate::{content, deflate, PdfChunk, PdfOptions, Renumber, WithResources};
// The names of the color spaces. // The names of the color spaces.
pub const SRGB: Name<'static> = Name(b"srgb"); pub const SRGB: Name<'static> = Name(b"srgb");
@ -65,18 +66,18 @@ impl ColorSpaces {
/// PDF file. /// PDF file.
pub fn write_functions(&self, chunk: &mut Chunk, refs: &ColorFunctionRefs) { pub fn write_functions(&self, chunk: &mut Chunk, refs: &ColorFunctionRefs) {
// Write the sRGB color space. // Write the sRGB color space.
if self.use_srgb { if let Some(id) = refs.srgb {
chunk chunk
.icc_profile(refs.srgb.unwrap(), &SRGB_ICC_DEFLATED) .icc_profile(id, &SRGB_ICC_DEFLATED)
.n(3) .n(3)
.range([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]) .range([0.0, 1.0, 0.0, 1.0, 0.0, 1.0])
.filter(Filter::FlateDecode); .filter(Filter::FlateDecode);
} }
// Write the gray color space. // Write the gray color space.
if self.use_d65_gray { if let Some(id) = refs.d65_gray {
chunk chunk
.icc_profile(refs.d65_gray.unwrap(), &GRAY_ICC_DEFLATED) .icc_profile(id, &GRAY_ICC_DEFLATED)
.n(1) .n(1)
.range([0.0, 1.0]) .range([0.0, 1.0])
.filter(Filter::FlateDecode); .filter(Filter::FlateDecode);
@ -125,7 +126,7 @@ pub fn write(
/// needed) in the final document, and be shared by all color space /// needed) in the final document, and be shared by all color space
/// dictionaries. /// dictionaries.
pub struct ColorFunctionRefs { pub struct ColorFunctionRefs {
srgb: Option<Ref>, pub srgb: Option<Ref>,
d65_gray: Option<Ref>, d65_gray: Option<Ref>,
} }
@ -147,6 +148,10 @@ pub fn alloc_color_functions_refs(
let mut chunk = PdfChunk::new(); let mut chunk = PdfChunk::new();
let mut used_color_spaces = ColorSpaces::default(); let mut used_color_spaces = ColorSpaces::default();
if context.options.standards.pdfa {
used_color_spaces.mark_as_used(ColorSpace::Srgb);
}
context.resources.traverse(&mut |r| { context.resources.traverse(&mut |r| {
used_color_spaces.merge(&r.colors); used_color_spaces.merge(&r.colors);
Ok(()) Ok(())
@ -269,6 +274,7 @@ impl PaintEncode for Color {
ctx.content.set_fill_color([r, g, b]); ctx.content.set_fill_color([r, g, b]);
} }
Color::Cmyk(_) => { Color::Cmyk(_) => {
check_cmyk_allowed(ctx.options)?;
ctx.reset_fill_color_space(); ctx.reset_fill_color_space();
let [c, m, y, k] = ColorSpace::Cmyk.encode(*self); let [c, m, y, k] = ColorSpace::Cmyk.encode(*self);
@ -312,6 +318,7 @@ impl PaintEncode for Color {
ctx.content.set_stroke_color([r, g, b]); ctx.content.set_stroke_color([r, g, b]);
} }
Color::Cmyk(_) => { Color::Cmyk(_) => {
check_cmyk_allowed(ctx.options)?;
ctx.reset_stroke_color_space(); ctx.reset_stroke_color_space();
let [c, m, y, k] = ColorSpace::Cmyk.encode(*self); let [c, m, y, k] = ColorSpace::Cmyk.encode(*self);
@ -373,3 +380,14 @@ impl QuantizedColor for f32 {
color.clamp(min, max) color.clamp(min, max)
} }
} }
/// Fails with an error if PDF/A processing is enabled.
pub(super) fn check_cmyk_allowed(options: &PdfOptions) -> SourceResult<()> {
if options.standards.pdfa {
bail!(
Span::detached(),
"cmyk colors are not currently supported by PDF/A export"
);
}
Ok(())
}

View File

@ -10,15 +10,15 @@ use std::collections::HashMap;
use ecow::eco_format; use ecow::eco_format;
use indexmap::IndexMap; use indexmap::IndexMap;
use pdf_writer::types::UnicodeCmap; use pdf_writer::types::UnicodeCmap;
use pdf_writer::writers::WMode;
use pdf_writer::{Filter, Finish, Name, Rect, Ref}; use pdf_writer::{Filter, Finish, Name, Rect, Ref};
use ttf_parser::name_id;
use typst::diag::SourceResult; use typst::diag::SourceResult;
use typst::layout::Em; use typst::layout::Em;
use typst::text::color::frame_for_glyph; use typst::text::color::frame_for_glyph;
use typst::text::Font; use typst::text::Font;
use crate::content; use crate::content;
use crate::font::{subset_tag, write_font_descriptor, CMAP_NAME, SYSTEM_INFO}; use crate::font::{base_font_name, write_font_descriptor, CMAP_NAME, SYSTEM_INFO};
use crate::resources::{Resources, ResourcesRefs}; use crate::resources::{Resources, ResourcesRefs};
use crate::{EmExt, PdfChunk, PdfOptions, WithGlobalRefs}; use crate::{EmExt, PdfChunk, PdfOptions, WithGlobalRefs};
@ -84,12 +84,7 @@ pub fn write_color_fonts(
// Determine the base font name. // Determine the base font name.
gids.sort(); gids.sort();
let subset_tag = subset_tag(&gids); let base_font = base_font_name(&font_slice.font, &gids);
let postscript_name = font_slice
.font
.find_name(name_id::POST_SCRIPT_NAME)
.unwrap_or_else(|| "unknown".to_string());
let base_font = eco_format!("{subset_tag}+{postscript_name}");
// Write the Type3 font object. // Write the Type3 font object.
let mut pdf_font = chunk.type3_font(subfont_id); let mut pdf_font = chunk.type3_font(subfont_id);
@ -134,7 +129,7 @@ pub fn write_color_fonts(
cmap.pair_with_multiple(index as u8, text.chars()); cmap.pair_with_multiple(index as u8, text.chars());
} }
} }
chunk.cmap(cmap_ref, &cmap.finish()); chunk.cmap(cmap_ref, &cmap.finish()).writing_mode(WMode::Horizontal);
// Write the font descriptor. // Write the font descriptor.
write_font_descriptor( write_font_descriptor(

View File

@ -8,8 +8,10 @@ use ecow::eco_format;
use pdf_writer::types::{ use pdf_writer::types::{
ColorSpaceOperand, LineCapStyle, LineJoinStyle, TextRenderingMode, ColorSpaceOperand, LineCapStyle, LineJoinStyle, TextRenderingMode,
}; };
use pdf_writer::writers::PositionedItems;
use pdf_writer::{Content, Finish, Name, Rect, Str}; use pdf_writer::{Content, Finish, Name, Rect, Str};
use typst::diag::SourceResult; use typst::diag::{bail, SourceResult};
use typst::foundations::Repr;
use typst::layout::{ use typst::layout::{
Abs, Em, Frame, FrameItem, GroupItem, Point, Ratio, Size, Transform, Abs, Em, Frame, FrameItem, GroupItem, Point, Ratio, Size, Transform,
}; };
@ -28,7 +30,7 @@ use crate::color_font::ColorFontMap;
use crate::extg::ExtGState; use crate::extg::ExtGState;
use crate::image::deferred_image; use crate::image::deferred_image;
use crate::resources::Resources; use crate::resources::Resources;
use crate::{deflate_deferred, AbsExt, EmExt, PdfOptions}; use crate::{deflate_deferred, AbsExt, ContentExt, EmExt, PdfOptions, StrExt};
/// Encode a [`Frame`] into a content stream. /// Encode a [`Frame`] into a content stream.
/// ///
@ -201,8 +203,7 @@ pub(super) struct Transforms {
impl Builder<'_, ()> { impl Builder<'_, ()> {
fn save_state(&mut self) -> SourceResult<()> { fn save_state(&mut self) -> SourceResult<()> {
self.saves.push(self.state.clone()); self.saves.push(self.state.clone());
self.content.save_state(); self.content.save_state_checked()
Ok(())
} }
fn restore_state(&mut self) { fn restore_state(&mut self) {
@ -417,6 +418,19 @@ fn write_group(ctx: &mut Builder, pos: Point, group: &GroupItem) -> SourceResult
/// Encode a text run into the content stream. /// Encode a text run into the content stream.
fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()> { fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()> {
if ctx.options.standards.pdfa {
let last_resort = text.font.info().is_last_resort();
for g in &text.glyphs {
if last_resort || g.id == 0 {
bail!(
g.span.0,
"the text {} could not be displayed with any font",
text.text[g.range()].repr()
);
}
}
}
let ttf = text.font.ttf(); let ttf = text.font.ttf();
let tables = ttf.tables(); let tables = ttf.tables();
@ -526,7 +540,7 @@ fn write_normal_text(
if !adjustment.is_zero() { if !adjustment.is_zero() {
if !encoded.is_empty() { if !encoded.is_empty() {
items.show(Str(&encoded)); show_text(&mut items, &encoded);
encoded.clear(); encoded.clear();
} }
@ -565,7 +579,7 @@ fn write_normal_text(
} }
if !encoded.is_empty() { if !encoded.is_empty() {
items.show(Str(&encoded)); show_text(&mut items, &encoded);
} }
items.finish(); items.finish();
@ -575,6 +589,14 @@ fn write_normal_text(
Ok(()) Ok(())
} }
/// Shows text, ensuring that each individual string doesn't exceed the
/// implementation limits.
fn show_text(items: &mut PositionedItems, encoded: &[u8]) {
for chunk in encoded.chunks(Str::PDFA_LIMIT) {
items.show(Str(chunk));
}
}
/// Encodes a text run made only of color glyphs into the content stream /// Encodes a text run made only of color glyphs into the content stream
fn write_color_glyphs( fn write_color_glyphs(
ctx: &mut Builder, ctx: &mut Builder,
@ -723,7 +745,8 @@ fn write_image(
) -> SourceResult<()> { ) -> SourceResult<()> {
let index = ctx.resources.images.insert(image.clone()); let index = ctx.resources.images.insert(image.clone());
ctx.resources.deferred_images.entry(index).or_insert_with(|| { ctx.resources.deferred_images.entry(index).or_insert_with(|| {
let (image, color_space) = deferred_image(image.clone()); let (image, color_space) =
deferred_image(image.clone(), ctx.options.standards.pdfa);
if let Some(color_space) = color_space { if let Some(color_space) = color_space {
ctx.resources.colors.mark_as_used(color_space); ctx.resources.colors.mark_as_used(color_space);
} }
@ -735,10 +758,14 @@ fn write_image(
let name = eco_format!("Im{index}"); let name = eco_format!("Im{index}");
let w = size.x.to_f32(); let w = size.x.to_f32();
let h = size.y.to_f32(); let h = size.y.to_f32();
ctx.content.save_state(); ctx.content.save_state_checked()?;
ctx.content.transform([w, 0.0, 0.0, -h, x, y + h]); ctx.content.transform([w, 0.0, 0.0, -h, x, y + h]);
if let Some(alt) = image.alt() { if let Some(alt) = image.alt() {
if ctx.options.standards.pdfa && alt.len() > Str::PDFA_LIMIT {
bail!(span, "the image's alt text is too long");
}
let mut image_span = let mut image_span =
ctx.content.begin_marked_content_with_properties(Name(b"Span")); ctx.content.begin_marked_content_with_properties(Name(b"Span"));
let mut image_alt = image_span.properties(); let mut image_alt = image_span.properties();

View File

@ -4,7 +4,7 @@ use std::sync::Arc;
use ecow::{eco_format, EcoString}; use ecow::{eco_format, EcoString};
use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap}; use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
use pdf_writer::writers::FontDescriptor; use pdf_writer::writers::{FontDescriptor, WMode};
use pdf_writer::{Chunk, Filter, Finish, Name, Rect, Ref, Str}; use pdf_writer::{Chunk, Filter, Finish, Name, Rect, Ref, Str};
use subsetter::GlyphRemapper; use subsetter::GlyphRemapper;
use ttf_parser::{name_id, GlyphId, Tag}; use ttf_parser::{name_id, GlyphId, Tag};
@ -13,10 +13,14 @@ use typst::syntax::Span;
use typst::text::Font; use typst::text::Font;
use typst::utils::SliceExt; use typst::utils::SliceExt;
use crate::{deflate, EmExt, PdfChunk, WithGlobalRefs}; use crate::{deflate, EmExt, NameExt, PdfChunk, WithGlobalRefs};
const CFF: Tag = Tag::from_bytes(b"CFF "); const CFF: Tag = Tag::from_bytes(b"CFF ");
const CFF2: Tag = Tag::from_bytes(b"CFF2"); const CFF2: Tag = Tag::from_bytes(b"CFF2");
const SUBSET_TAG_LEN: usize = 6;
const IDENTITY_H: &str = "Identity-H";
pub(crate) const CMAP_NAME: Name = Name(b"Custom"); pub(crate) const CMAP_NAME: Name = Name(b"Custom");
pub(crate) const SYSTEM_INFO: SystemInfo = SystemInfo { pub(crate) const SYSTEM_INFO: SystemInfo = SystemInfo {
registry: Str(b"Adobe"), registry: Str(b"Adobe"),
@ -58,14 +62,9 @@ pub fn write_fonts(
.or_else(|| ttf.raw_face().table(CFF2)) .or_else(|| ttf.raw_face().table(CFF2))
.is_some(); .is_some();
let postscript_name = font let base_font = base_font_name(font, glyph_set);
.find_name(name_id::POST_SCRIPT_NAME)
.unwrap_or_else(|| "unknown".to_string());
let subset_tag = subset_tag(glyph_set);
let base_font = eco_format!("{subset_tag}+{postscript_name}");
let base_font_type0 = if is_cff { let base_font_type0 = if is_cff {
eco_format!("{base_font}-Identity-H") eco_format!("{base_font}-{IDENTITY_H}")
} else { } else {
base_font.clone() base_font.clone()
}; };
@ -74,7 +73,7 @@ pub fn write_fonts(
chunk chunk
.type0_font(type0_ref) .type0_font(type0_ref)
.base_font(Name(base_font_type0.as_bytes())) .base_font(Name(base_font_type0.as_bytes()))
.encoding_predefined(Name(b"Identity-H")) .encoding_predefined(Name(IDENTITY_H.as_bytes()))
.descendant_font(cid_ref) .descendant_font(cid_ref)
.to_unicode(cmap_ref); .to_unicode(cmap_ref);
@ -118,7 +117,10 @@ pub fn write_fonts(
// Write the /ToUnicode character map, which maps glyph ids back to // Write the /ToUnicode character map, which maps glyph ids back to
// unicode codepoints to enable copying out of the PDF. // unicode codepoints to enable copying out of the PDF.
let cmap = create_cmap(glyph_set, glyph_remapper); let cmap = create_cmap(glyph_set, glyph_remapper);
chunk.cmap(cmap_ref, &cmap).filter(Filter::FlateDecode); chunk
.cmap(cmap_ref, &cmap)
.writing_mode(WMode::Horizontal)
.filter(Filter::FlateDecode);
let subset = subset_font(font, glyph_remapper) let subset = subset_font(font, glyph_remapper)
.map_err(|err| { .map_err(|err| {
@ -224,12 +226,31 @@ fn subset_font(
Ok(Arc::new(deflate(data))) Ok(Arc::new(deflate(data)))
} }
/// Creates the base font name for a font with a specific glyph subset.
/// Consists of a subset tag and the PostScript name of the font.
///
/// Returns a string of length maximum 116, so that even with `-Identity-H`
/// added it does not exceed the maximum PDF/A name length of 127.
pub(crate) fn base_font_name<T: Hash>(font: &Font, glyphs: &T) -> EcoString {
const MAX_LEN: usize = Name::PDFA_LIMIT - REST_LEN;
const REST_LEN: usize = SUBSET_TAG_LEN + 1 + 1 + IDENTITY_H.len();
let postscript_name = font.find_name(name_id::POST_SCRIPT_NAME);
let name = postscript_name.as_deref().unwrap_or("unknown");
let trimmed = &name[..name.len().min(MAX_LEN)];
// Hash the full name (we might have trimmed) and the glyphs to produce
// a fairly unique subset tag.
let subset_tag = subset_tag(&(name, glyphs));
eco_format!("{subset_tag}+{trimmed}")
}
/// Produce a unique 6 letter tag for a glyph set. /// Produce a unique 6 letter tag for a glyph set.
pub(crate) fn subset_tag<T: Hash>(glyphs: &T) -> EcoString { pub(crate) fn subset_tag<T: Hash>(glyphs: &T) -> EcoString {
const LEN: usize = 6;
const BASE: u128 = 26; const BASE: u128 = 26;
let mut hash = typst::utils::hash128(&glyphs); let mut hash = typst::utils::hash128(&glyphs);
let mut letter = [b'A'; LEN]; let mut letter = [b'A'; SUBSET_TAG_LEN];
for l in letter.iter_mut() { for l in letter.iter_mut() {
*l = b'A' + (hash % BASE) as u8; *l = b'A' + (hash % BASE) as u8;
hash /= BASE; hash /= BASE;

View File

@ -13,7 +13,9 @@ use typst::visualize::{
Color, ColorSpace, Gradient, RatioOrAngle, RelativeTo, WeightedColor, Color, ColorSpace, Gradient, RatioOrAngle, RelativeTo, WeightedColor,
}; };
use crate::color::{self, ColorSpaceExt, PaintEncode, QuantizedColor}; use crate::color::{
self, check_cmyk_allowed, ColorSpaceExt, PaintEncode, QuantizedColor,
};
use crate::{content, WithGlobalRefs}; use crate::{content, WithGlobalRefs};
use crate::{deflate, transform_to_array, AbsExt, PdfChunk}; use crate::{deflate, transform_to_array, AbsExt, PdfChunk};
@ -56,6 +58,10 @@ pub fn write_gradients(
gradient.space() gradient.space()
}; };
if color_space == ColorSpace::Cmyk {
check_cmyk_allowed(context.options)?;
}
let mut shading_pattern = match &gradient { let mut shading_pattern = match &gradient {
Gradient::Linear(_) => { Gradient::Linear(_) => {
let shading_function = let shading_function =

View File

@ -118,6 +118,7 @@ pub fn write_images(
#[comemo::memoize] #[comemo::memoize]
pub fn deferred_image( pub fn deferred_image(
image: Image, image: Image,
pdfa: bool,
) -> (Deferred<StrResult<EncodedImage>>, Option<ColorSpace>) { ) -> (Deferred<StrResult<EncodedImage>>, Option<ColorSpace>) {
let color_space = match image.kind() { let color_space = match image.kind() {
ImageKind::Raster(raster) if raster.icc().is_none() => { ImageKind::Raster(raster) if raster.icc().is_none() => {
@ -151,7 +152,7 @@ pub fn deferred_image(
}) })
} }
ImageKind::Svg(svg) => { ImageKind::Svg(svg) => {
let (chunk, id) = encode_svg(svg) let (chunk, id) = encode_svg(svg, pdfa)
.map_err(|err| eco_format!("failed to convert SVG to PDF: {err}"))?; .map_err(|err| eco_format!("failed to convert SVG to PDF: {err}"))?;
Ok(EncodedImage::Svg(chunk, id)) Ok(EncodedImage::Svg(chunk, id))
} }
@ -201,8 +202,14 @@ fn encode_alpha(raster: &RasterImage) -> (Vec<u8>, Filter) {
/// Encode an SVG into a chunk of PDF objects. /// Encode an SVG into a chunk of PDF objects.
#[typst_macros::time(name = "encode svg")] #[typst_macros::time(name = "encode svg")]
fn encode_svg(svg: &SvgImage) -> Result<(Chunk, Ref), svg2pdf::ConversionError> { fn encode_svg(
svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default()) svg: &SvgImage,
pdfa: bool,
) -> Result<(Chunk, Ref), svg2pdf::ConversionError> {
svg2pdf::to_chunk(
svg.tree(),
svg2pdf::ConversionOptions { pdfa, ..Default::default() },
)
} }
/// A pre-encoded image. /// A pre-encoded image.

View File

@ -15,15 +15,17 @@ mod pattern;
mod resources; mod resources;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::hash::Hash; use std::hash::Hash;
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
use base64::Engine; use base64::Engine;
use pdf_writer::{Chunk, Pdf, Ref}; use pdf_writer::{Chunk, Name, Pdf, Ref, Str, TextStr};
use typst::diag::SourceResult; use typst::diag::{bail, SourceResult, StrResult};
use typst::foundations::{Datetime, Smart}; use typst::foundations::{Datetime, Smart};
use typst::layout::{Abs, Em, PageRanges, Transform}; use typst::layout::{Abs, Em, PageRanges, Transform};
use typst::model::Document; use typst::model::Document;
use typst::syntax::Span;
use typst::text::Font; use typst::text::Font;
use typst::utils::Deferred; use typst::utils::Deferred;
use typst::visualize::Image; use typst::visualize::Image;
@ -45,25 +47,6 @@ use crate::resources::{
/// Export a document into a PDF file. /// Export a document into a PDF file.
/// ///
/// Returns the raw bytes making up the PDF file. /// Returns the raw bytes making up the PDF file.
///
/// The `ident` parameter, if given, shall be a string that uniquely and stably
/// identifies the document. It should not change between compilations of the
/// same document. **If you cannot provide such a stable identifier, just pass
/// `Smart::Auto` rather than trying to come up with one.** The CLI, for
/// example, does not have a well-defined notion of a long-lived project and as
/// such just passes `Smart::Auto`.
///
/// If an `ident` is given, the hash of it will be used to create a PDF document
/// identifier (the identifier itself is not leaked). If `ident` is `Auto`, a
/// hash of the document's title and author is used instead (which is reasonably
/// unique and stable).
///
/// The `timestamp`, if given, is expected to be the creation date of the
/// document as a UTC datetime. It will only be used if `set document(date: ..)`
/// is `auto`.
///
/// The `page_ranges` option specifies which ranges of pages should be exported
/// in the PDF. When `None`, all pages should be exported.
#[typst_macros::time(name = "pdf")] #[typst_macros::time(name = "pdf")]
pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult<Vec<u8>> { pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult<Vec<u8>> {
PdfBuilder::new(document, options) PdfBuilder::new(document, options)
@ -92,26 +75,70 @@ pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult<Vec<u8>> {
} }
/// Settings for PDF export. /// Settings for PDF export.
#[derive(Default)] #[derive(Debug, Default)]
pub struct PdfOptions<'a> { pub struct PdfOptions<'a> {
/// If given, shall be a string that uniquely and stably identifies the /// If not `Smart::Auto`, shall be a string that uniquely and stably
/// document. It should not change between compilations of the same /// identifies the document. It should not change between compilations of
/// document. **If you cannot provide such a stable identifier, just pass /// the same document. **If you cannot provide such a stable identifier,
/// `Smart::Auto` rather than trying to come up with one.** The CLI, for /// just pass `Smart::Auto` rather than trying to come up with one.** The
/// example, does not have a well-defined notion of a long-lived project and /// CLI, for example, does not have a well-defined notion of a long-lived
/// as such just passes `Smart::Auto`. /// project and as such just passes `Smart::Auto`.
/// ///
/// If an `ident` is given, the hash of it will be used to create a PDF /// If an `ident` is given, the hash of it will be used to create a PDF
/// document identifier (the identifier itself is not leaked). If `ident` is /// document identifier (the identifier itself is not leaked). If `ident` is
/// `Auto`, a hash of the document's title and author is used instead (which /// `Auto`, a hash of the document's title and author is used instead (which
/// is reasonably unique and stable). /// is reasonably unique and stable).
pub ident: Smart<&'a str>, pub ident: Smart<&'a str>,
/// If given, is expected to be the creation date of the document as a UTC /// If not `None`, shall be the creation date of the document as a UTC
/// datetime. It will only be used if `set document(date: ..)` is `auto`. /// datetime. It will only be used if `set document(date: ..)` is `auto`.
pub timestamp: Option<Datetime>, pub timestamp: Option<Datetime>,
/// Specifies which ranges of pages should be exported in the PDF. When /// Specifies which ranges of pages should be exported in the PDF. When
/// `None`, all pages should be exported. /// `None`, all pages should be exported.
pub page_ranges: Option<PageRanges>, pub page_ranges: Option<PageRanges>,
/// A list of PDF standards that Typst will enforce conformance with.
pub standards: PdfStandards,
}
/// Encapsulates a list of compatible PDF standards.
#[derive(Clone)]
pub struct PdfStandards {
/// For now, we simplify to just PDF/A, since we only support PDF/A-2b. But
/// it can be more fine-grained in the future.
pub(crate) pdfa: bool,
}
impl PdfStandards {
/// Validates a list of PDF standards for compatibility and returns their
/// encapsulated representation.
pub fn new(list: &[PdfStandard]) -> StrResult<Self> {
Ok(Self { pdfa: list.contains(&PdfStandard::A_2b) })
}
}
impl Debug for PdfStandards {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.pad("PdfStandards(..)")
}
}
#[allow(clippy::derivable_impls)]
impl Default for PdfStandards {
fn default() -> Self {
Self { pdfa: false }
}
}
/// A PDF standard.
///
/// Support for more standards is planned.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[allow(non_camel_case_types)]
#[non_exhaustive]
pub enum PdfStandard {
/// PDF 1.7.
V_1_7,
/// PDF/A-2b.
A_2b,
} }
/// A struct to build a PDF following a fixed succession of phases. /// A struct to build a PDF following a fixed succession of phases.
@ -515,6 +542,63 @@ impl EmExt for Em {
} }
} }
trait NameExt<'a> {
/// The maximum length of a name in PDF/A.
const PDFA_LIMIT: usize = 127;
}
impl<'a> NameExt<'a> for Name<'a> {}
/// Additional methods for [`Str`].
trait StrExt<'a>: Sized {
/// The maximum length of a string in PDF/A.
const PDFA_LIMIT: usize = 32767;
/// Create a string that satisfies the constraints of PDF/A.
#[allow(unused)]
fn trimmed(string: &'a [u8]) -> Self;
}
impl<'a> StrExt<'a> for Str<'a> {
fn trimmed(string: &'a [u8]) -> Self {
Self(&string[..string.len().min(Self::PDFA_LIMIT)])
}
}
/// Additional methods for [`TextStr`].
trait TextStrExt<'a>: Sized {
/// The maximum length of a string in PDF/A.
const PDFA_LIMIT: usize = Str::PDFA_LIMIT;
/// Create a text string that satisfies the constraints of PDF/A.
fn trimmed(string: &'a str) -> Self;
}
impl<'a> TextStrExt<'a> for TextStr<'a> {
fn trimmed(string: &'a str) -> Self {
Self(&string[..string.len().min(Self::PDFA_LIMIT)])
}
}
/// Extension trait for [`Content`](pdf_writer::Content).
trait ContentExt {
fn save_state_checked(&mut self) -> SourceResult<()>;
}
impl ContentExt for pdf_writer::Content {
fn save_state_checked(&mut self) -> SourceResult<()> {
self.save_state();
if self.state_nesting_depth() > 28 {
bail!(
Span::detached(),
"maximum PDF grouping depth exceeding";
hint: "try to avoid excessive nesting of layout containers",
);
}
Ok(())
}
}
/// Convert to an array of floats. /// Convert to an array of floats.
fn transform_to_array(ts: Transform) -> [f32; 6] { fn transform_to_array(ts: Transform) -> [f32; 6] {
[ [

View File

@ -1,14 +1,14 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use pdf_writer::writers::Destination; use pdf_writer::writers::Destination;
use pdf_writer::Ref; use pdf_writer::{Ref, Str};
use typst::diag::SourceResult; use typst::diag::SourceResult;
use typst::foundations::{Label, NativeElement}; use typst::foundations::{Label, NativeElement};
use typst::introspection::Location; use typst::introspection::Location;
use typst::layout::Abs; use typst::layout::Abs;
use typst::model::HeadingElem; use typst::model::HeadingElem;
use crate::{AbsExt, PdfChunk, Renumber, WithGlobalRefs}; use crate::{AbsExt, PdfChunk, Renumber, StrExt, WithGlobalRefs};
/// A list of destinations in the PDF document (a specific point on a specific /// A list of destinations in the PDF document (a specific point on a specific
/// page), that have a name associated with them. /// page), that have a name associated with them.
@ -56,6 +56,12 @@ pub fn write_named_destinations(
matches.sort_by_key(|&(_, label)| label); matches.sort_by_key(|&(_, label)| label);
for (loc, label) in matches { for (loc, label) in matches {
// Don't encode named destinations that would exceed the limit. Those
// will instead be encoded as normal links.
if label.as_str().len() > Str::PDFA_LIMIT {
continue;
}
let pos = context.document.introspector.position(loc); let pos = context.document.introspector.position(loc);
let index = pos.page.get() - 1; let index = pos.page.get() - 1;
let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());

View File

@ -5,7 +5,7 @@ use typst::foundations::{NativeElement, Packed, StyleChain};
use typst::layout::Abs; use typst::layout::Abs;
use typst::model::HeadingElem; use typst::model::HeadingElem;
use crate::{AbsExt, WithEverything}; use crate::{AbsExt, TextStrExt, WithEverything};
/// Construct the outline for the document. /// Construct the outline for the document.
pub(crate) fn write_outline( pub(crate) fn write_outline(
@ -185,7 +185,7 @@ fn write_outline_item(
} }
let body = node.element.body(); let body = node.element.body();
outline.title(TextStr(body.plain_text().trim())); outline.title(TextStr::trimmed(body.plain_text().trim()));
let loc = node.element.location().unwrap(); let loc = node.element.location().unwrap();
let pos = ctx.document.introspector.position(loc); let pos = ctx.document.introspector.position(loc);

View File

@ -652,7 +652,7 @@ cast! {
} }
/// A list of page ranges to be exported. /// A list of page ranges to be exported.
#[derive(Debug, Clone)]
pub struct PageRanges(Vec<PageRange>); pub struct PageRanges(Vec<PageRange>);
/// A range of pages to export. /// A range of pages to export.

View File

@ -182,7 +182,7 @@ cast! {
pub struct Url(EcoString); pub struct Url(EcoString);
impl Url { impl Url {
/// Create an URL from a string, checking the maximum length. /// Create a URL from a string, checking the maximum length.
pub fn new(url: impl Into<EcoString>) -> StrResult<Self> { pub fn new(url: impl Into<EcoString>) -> StrResult<Self> {
let url = url.into(); let url = url.into();
if url.len() > 8000 { if url.len() > 8000 {

View File

@ -291,6 +291,12 @@ impl FontInfo {
coverage: Coverage::from_vec(codepoints), coverage: Coverage::from_vec(codepoints),
}) })
} }
/// Whether this is the macOS LastResort font. It can yield tofus with
/// glyph ID != 0.
pub fn is_last_resort(&self) -> bool {
self.family == "LastResort"
}
} }
/// Try to find and decode the name with the given id. /// Try to find and decode the name with the given id.

View File

@ -544,6 +544,9 @@ impl Color {
/// These components are also available using the /// These components are also available using the
/// [`components`]($color.components) method. /// [`components`]($color.components) method.
/// ///
/// Note that CMYK colors are not currently supported when PDF/A output is
/// enabled.
///
/// ```example /// ```example
/// #square( /// #square(
/// fill: cmyk(27%, 0%, 3%, 5%) /// fill: cmyk(27%, 0%, 3%, 5%)