Minimal PDF/A support (#5075)

Co-authored-by: Martin Haug <mhaug@live.de>
This commit is contained in:
Laurenz 2024-10-01 11:24:18 +02:00 committed by GitHub
parent 8eee3ec8d1
commit a0093ad8a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 361 additions and 108 deletions

18
Cargo.lock generated
View File

@ -1681,9 +1681,9 @@ checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
[[package]]
name = "pdf-writer"
version = "0.10.0"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af6a7882fda7808481d43c51cadfc3ec934c6af72612a1fe6985ce329a2f0469"
checksum = "be17f48d7fbbd22c6efedb58af5d409aa578e407f40b29a0bcb4e66ed84c5c98"
dependencies = [
"bitflags 2.6.0",
"itoa",
@ -2354,13 +2354,15 @@ dependencies = [
[[package]]
name = "subsetter"
version = "0.11.0"
source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74f98178f34057d4d4de93d68104007c6dea4dfac930204a69ab4622daefa648"
[[package]]
name = "svg2pdf"
version = "0.11.0"
source = "git+https://github.com/typst/svg2pdf?rev=5963e1e#5963e1e890ac89fbf6b4750b3470ebd5765ef606"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5014c9dadcf318fb7ef8c16438e95abcc9de1ae24d60d5bccc64c55100c50364"
dependencies = [
"fontdb",
"image",
@ -3489,9 +3491,9 @@ checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9"
[[package]]
name = "xmp-writer"
version = "0.2.0"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4543ba138f64a94b19e1e9c66c165bca7e03d470e1c066cb76ea279d9d0e1989"
checksum = "8254499146a4fd0c86e3e99cf4a9f468f595808fb49ff8f3e495f2b117bf4ebc"
[[package]]
name = "xz2"

View File

@ -77,7 +77,7 @@ oxipng = { version = "9.0", default-features = false, features = ["filetime", "p
palette = { version = "0.7.3", default-features = false, features = ["approx", "libm"] }
parking_lot = "0.12.1"
pathdiff = "0.2"
pdf-writer = "0.10.0"
pdf-writer = "0.12"
phf = { version = "0.11", features = ["macros"] }
pixglyph = "0.5"
png = "0.17"
@ -102,8 +102,8 @@ shell-escape = "0.1.5"
siphasher = "1"
smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] }
stacker = "0.1.15"
subsetter = { git = "https://github.com/typst/subsetter", rev = "4e0058b" }
svg2pdf = { git = "https://github.com/typst/svg2pdf", rev = "5963e1e" }
subsetter = "0.2"
svg2pdf = "0.12"
syn = { version = "2", features = ["full", "extra-traits"] }
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
tar = "0.4"
@ -127,7 +127,7 @@ walkdir = "2"
wasmi = "0.35.0"
xmlparser = "0.13.5"
xmlwriter = "0.1.0"
xmp-writer = "0.2"
xmp-writer = "0.3"
xz2 = { version = "0.1", features = ["static"] }
yaml-front-matter = "0.1"
zip = { version = "2", default-features = false, features = ["deflate"] }

View File

@ -122,6 +122,23 @@ pub struct CompileCommand {
/// apart from file names and line numbers.
#[arg(long = "timings", value_name = "OUTPUT_JSON")]
pub timings: Option<Option<PathBuf>>,
/// One (or multiple comma-separated) PDF standards that Typst will enforce
/// conformance with.
#[arg(long = "pdf-standard", value_delimiter = ',')]
pub pdf_standard: Vec<PdfStandard>,
}
/// A PDF standard.
#[derive(Debug, Copy, Clone, Eq, PartialEq, ValueEnum)]
#[allow(non_camel_case_types)]
pub enum PdfStandard {
/// PDF 1.7.
#[value(name = "1.7")]
V_1_7,
/// PDF/A-2b.
#[value(name = "a-2b")]
A_2b,
}
/// Initializes a new project from a template

View File

@ -16,10 +16,11 @@ use typst::layout::{Frame, Page, PageRanges};
use typst::model::Document;
use typst::syntax::{FileId, Source, Span};
use typst::WorldExt;
use typst_pdf::PdfOptions;
use typst_pdf::{PdfOptions, PdfStandards};
use crate::args::{
CompileCommand, DiagnosticFormat, Input, Output, OutputFormat, PageRangeArgument,
PdfStandard,
};
use crate::timings::Timer;
use crate::watch::Status;
@ -78,6 +79,19 @@ impl CompileCommand {
)
})
}
/// The PDF standards to try to conform with.
pub fn pdf_standards(&self) -> StrResult<PdfStandards> {
let list = self
.pdf_standard
.iter()
.map(|standard| match standard {
PdfStandard::V_1_7 => typst_pdf::PdfStandard::V_1_7,
PdfStandard::A_2b => typst_pdf::PdfStandard::A_2b,
})
.collect::<Vec<_>>();
PdfStandards::new(&list)
}
}
/// Execute a compilation command.
@ -179,6 +193,7 @@ fn export_pdf(document: &Document, command: &CompileCommand) -> SourceResult<()>
command.common.creation_timestamp.unwrap_or_else(chrono::Utc::now),
),
page_ranges: command.exported_page_ranges(),
standards: command.pdf_standards().at(Span::detached())?,
};
let buffer = typst_pdf::pdf(document, &options)?;
command

View File

@ -4,14 +4,15 @@ use ecow::eco_format;
use pdf_writer::types::Direction;
use pdf_writer::writers::PageLabel;
use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr};
use typst::diag::SourceResult;
use typst::diag::{bail, SourceResult};
use typst::foundations::{Datetime, Smart};
use typst::layout::Dir;
use typst::syntax::Span;
use typst::text::Lang;
use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter};
use crate::page::PdfPageLabel;
use crate::{hash_base64, outline, WithEverything};
use crate::{hash_base64, outline, TextStrExt, WithEverything};
/// Write the document catalog.
pub fn write_catalog(
@ -43,7 +44,7 @@ pub fn write_catalog(
let mut info = pdf.document_info(info_ref);
let mut xmp = XmpWriter::new();
if let Some(title) = &ctx.document.info.title {
info.title(TextStr(title));
info.title(TextStr::trimmed(title));
xmp.title([(None, title.as_str())]);
}
@ -66,7 +67,7 @@ pub fn write_catalog(
// bit weird to not use the array (and it makes Acrobat show the author
// list in quotes), but there's not much we can do about that.
let joined = authors.join(", ");
info.author(TextStr(&joined));
info.author(TextStr::trimmed(&joined));
xmp.creator([joined.as_str()]);
}
@ -77,26 +78,20 @@ pub fn write_catalog(
let keywords = &ctx.document.info.keywords;
if !keywords.is_empty() {
let joined = keywords.join(", ");
info.keywords(TextStr(&joined));
info.keywords(TextStr::trimmed(&joined));
xmp.pdf_keywords(&joined);
}
if let Some(date) = ctx.document.info.date.unwrap_or(ctx.options.timestamp) {
let tz = ctx.document.info.date.is_auto();
let date = ctx.document.info.date.unwrap_or(ctx.options.timestamp);
let tz = ctx.document.info.date.is_auto();
if let Some(date) = date {
if let Some(pdf_date) = pdf_date(date, tz) {
info.creation_date(pdf_date);
info.modified_date(pdf_date);
}
if let Some(xmp_date) = xmp_date(date, tz) {
xmp.create_date(xmp_date);
xmp.modify_date(xmp_date);
}
}
info.finish();
xmp.num_pages(ctx.document.pages.len() as u32);
xmp.format("application/pdf");
xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str())));
// A unique ID for this instance of the document. Changes if anything
// changes in the frames.
@ -116,13 +111,46 @@ pub fn write_catalog(
instance_id.clone()
};
// Write IDs.
xmp.document_id(&doc_id);
xmp.instance_id(&instance_id);
pdf.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes()));
xmp.rendition_class(RenditionClass::Proof);
xmp.format("application/pdf");
xmp.pdf_version("1.7");
xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str())));
xmp.num_pages(ctx.document.pages.len() as u32);
xmp.rendition_class(RenditionClass::Proof);
if let Some(xmp_date) = date.and_then(|date| xmp_date(date, tz)) {
xmp.create_date(xmp_date);
xmp.modify_date(xmp_date);
if ctx.options.standards.pdfa {
let mut history = xmp.history();
history
.add_event()
.action(xmp_writer::ResourceEventAction::Saved)
.when(xmp_date)
.instance_id(&eco_format!("{instance_id}_source"));
history
.add_event()
.action(xmp_writer::ResourceEventAction::Converted)
.when(xmp_date)
.instance_id(&instance_id)
.software_agent(&creator);
}
}
// Assert dominance.
if ctx.options.standards.pdfa {
let mut extension_schemas = xmp.extension_schemas();
extension_schemas
.xmp_media_management()
.properties()
.describe_instance_id();
extension_schemas.pdf().properties().describe_all();
extension_schemas.finish();
xmp.pdfa_part(2);
xmp.pdfa_conformance("B");
}
let xmp_buf = xmp.finish(None);
let meta_ref = alloc.bump();
@ -130,6 +158,9 @@ pub fn write_catalog(
.pair(Name(b"Type"), Name(b"Metadata"))
.pair(Name(b"Subtype"), Name(b"XML"));
// Set IDs only now, so that we don't need to clone them.
pdf.set_file_id((doc_id.into_bytes(), instance_id.into_bytes()));
// Write the document catalog.
let catalog_ref = alloc.bump();
let mut catalog = pdf.catalog(catalog_ref);
@ -164,8 +195,23 @@ pub fn write_catalog(
catalog.lang(TextStr(lang.as_str()));
}
if ctx.options.standards.pdfa {
catalog
.output_intents()
.push()
.subtype(pdf_writer::types::OutputIntentSubtype::PDFA)
.output_condition(TextStr("sRGB"))
.output_condition_identifier(TextStr("Custom"))
.info(TextStr("sRGB IEC61966-2.1"))
.dest_output_profile(ctx.globals.color_functions.srgb.unwrap());
}
catalog.finish();
if ctx.options.standards.pdfa && pdf.refs().count() > 8388607 {
bail!(Span::detached(), "too many PDF objects");
}
Ok(())
}
@ -211,7 +257,7 @@ pub(crate) fn write_page_labels(
// Only add what is actually provided. Don't add empty prefix string if
// it wasn't given for example.
if let Some(prefix) = &label.prefix {
entry.prefix(TextStr(prefix));
entry.prefix(TextStr::trimmed(prefix));
}
if let Some(style) = label.style {

View File

@ -1,10 +1,11 @@
use arrayvec::ArrayVec;
use once_cell::sync::Lazy;
use pdf_writer::{writers, Chunk, Dict, Filter, Name, Ref};
use typst::diag::SourceResult;
use typst::diag::{bail, SourceResult};
use typst::syntax::Span;
use typst::visualize::{Color, ColorSpace, Paint};
use crate::{content, deflate, PdfChunk, Renumber, WithResources};
use crate::{content, deflate, PdfChunk, PdfOptions, Renumber, WithResources};
// The names of the color spaces.
pub const SRGB: Name<'static> = Name(b"srgb");
@ -65,18 +66,18 @@ impl ColorSpaces {
/// PDF file.
pub fn write_functions(&self, chunk: &mut Chunk, refs: &ColorFunctionRefs) {
// Write the sRGB color space.
if self.use_srgb {
if let Some(id) = refs.srgb {
chunk
.icc_profile(refs.srgb.unwrap(), &SRGB_ICC_DEFLATED)
.icc_profile(id, &SRGB_ICC_DEFLATED)
.n(3)
.range([0.0, 1.0, 0.0, 1.0, 0.0, 1.0])
.filter(Filter::FlateDecode);
}
// Write the gray color space.
if self.use_d65_gray {
if let Some(id) = refs.d65_gray {
chunk
.icc_profile(refs.d65_gray.unwrap(), &GRAY_ICC_DEFLATED)
.icc_profile(id, &GRAY_ICC_DEFLATED)
.n(1)
.range([0.0, 1.0])
.filter(Filter::FlateDecode);
@ -125,7 +126,7 @@ pub fn write(
/// needed) in the final document, and be shared by all color space
/// dictionaries.
pub struct ColorFunctionRefs {
srgb: Option<Ref>,
pub srgb: Option<Ref>,
d65_gray: Option<Ref>,
}
@ -147,6 +148,10 @@ pub fn alloc_color_functions_refs(
let mut chunk = PdfChunk::new();
let mut used_color_spaces = ColorSpaces::default();
if context.options.standards.pdfa {
used_color_spaces.mark_as_used(ColorSpace::Srgb);
}
context.resources.traverse(&mut |r| {
used_color_spaces.merge(&r.colors);
Ok(())
@ -269,6 +274,7 @@ impl PaintEncode for Color {
ctx.content.set_fill_color([r, g, b]);
}
Color::Cmyk(_) => {
check_cmyk_allowed(ctx.options)?;
ctx.reset_fill_color_space();
let [c, m, y, k] = ColorSpace::Cmyk.encode(*self);
@ -312,6 +318,7 @@ impl PaintEncode for Color {
ctx.content.set_stroke_color([r, g, b]);
}
Color::Cmyk(_) => {
check_cmyk_allowed(ctx.options)?;
ctx.reset_stroke_color_space();
let [c, m, y, k] = ColorSpace::Cmyk.encode(*self);
@ -373,3 +380,14 @@ impl QuantizedColor for f32 {
color.clamp(min, max)
}
}
/// Fails with an error if PDF/A processing is enabled.
pub(super) fn check_cmyk_allowed(options: &PdfOptions) -> SourceResult<()> {
if options.standards.pdfa {
bail!(
Span::detached(),
"cmyk colors are not currently supported by PDF/A export"
);
}
Ok(())
}

View File

@ -10,15 +10,15 @@ use std::collections::HashMap;
use ecow::eco_format;
use indexmap::IndexMap;
use pdf_writer::types::UnicodeCmap;
use pdf_writer::writers::WMode;
use pdf_writer::{Filter, Finish, Name, Rect, Ref};
use ttf_parser::name_id;
use typst::diag::SourceResult;
use typst::layout::Em;
use typst::text::color::frame_for_glyph;
use typst::text::Font;
use crate::content;
use crate::font::{subset_tag, write_font_descriptor, CMAP_NAME, SYSTEM_INFO};
use crate::font::{base_font_name, write_font_descriptor, CMAP_NAME, SYSTEM_INFO};
use crate::resources::{Resources, ResourcesRefs};
use crate::{EmExt, PdfChunk, PdfOptions, WithGlobalRefs};
@ -84,12 +84,7 @@ pub fn write_color_fonts(
// Determine the base font name.
gids.sort();
let subset_tag = subset_tag(&gids);
let postscript_name = font_slice
.font
.find_name(name_id::POST_SCRIPT_NAME)
.unwrap_or_else(|| "unknown".to_string());
let base_font = eco_format!("{subset_tag}+{postscript_name}");
let base_font = base_font_name(&font_slice.font, &gids);
// Write the Type3 font object.
let mut pdf_font = chunk.type3_font(subfont_id);
@ -134,7 +129,7 @@ pub fn write_color_fonts(
cmap.pair_with_multiple(index as u8, text.chars());
}
}
chunk.cmap(cmap_ref, &cmap.finish());
chunk.cmap(cmap_ref, &cmap.finish()).writing_mode(WMode::Horizontal);
// Write the font descriptor.
write_font_descriptor(

View File

@ -8,8 +8,10 @@ use ecow::eco_format;
use pdf_writer::types::{
ColorSpaceOperand, LineCapStyle, LineJoinStyle, TextRenderingMode,
};
use pdf_writer::writers::PositionedItems;
use pdf_writer::{Content, Finish, Name, Rect, Str};
use typst::diag::SourceResult;
use typst::diag::{bail, SourceResult};
use typst::foundations::Repr;
use typst::layout::{
Abs, Em, Frame, FrameItem, GroupItem, Point, Ratio, Size, Transform,
};
@ -28,7 +30,7 @@ use crate::color_font::ColorFontMap;
use crate::extg::ExtGState;
use crate::image::deferred_image;
use crate::resources::Resources;
use crate::{deflate_deferred, AbsExt, EmExt, PdfOptions};
use crate::{deflate_deferred, AbsExt, ContentExt, EmExt, PdfOptions, StrExt};
/// Encode a [`Frame`] into a content stream.
///
@ -201,8 +203,7 @@ pub(super) struct Transforms {
impl Builder<'_, ()> {
fn save_state(&mut self) -> SourceResult<()> {
self.saves.push(self.state.clone());
self.content.save_state();
Ok(())
self.content.save_state_checked()
}
fn restore_state(&mut self) {
@ -417,6 +418,19 @@ fn write_group(ctx: &mut Builder, pos: Point, group: &GroupItem) -> SourceResult
/// Encode a text run into the content stream.
fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()> {
if ctx.options.standards.pdfa {
let last_resort = text.font.info().is_last_resort();
for g in &text.glyphs {
if last_resort || g.id == 0 {
bail!(
g.span.0,
"the text {} could not be displayed with any font",
text.text[g.range()].repr()
);
}
}
}
let ttf = text.font.ttf();
let tables = ttf.tables();
@ -526,7 +540,7 @@ fn write_normal_text(
if !adjustment.is_zero() {
if !encoded.is_empty() {
items.show(Str(&encoded));
show_text(&mut items, &encoded);
encoded.clear();
}
@ -565,7 +579,7 @@ fn write_normal_text(
}
if !encoded.is_empty() {
items.show(Str(&encoded));
show_text(&mut items, &encoded);
}
items.finish();
@ -575,6 +589,14 @@ fn write_normal_text(
Ok(())
}
/// Shows text, ensuring that each individual string doesn't exceed the
/// implementation limits.
fn show_text(items: &mut PositionedItems, encoded: &[u8]) {
for chunk in encoded.chunks(Str::PDFA_LIMIT) {
items.show(Str(chunk));
}
}
/// Encodes a text run made only of color glyphs into the content stream
fn write_color_glyphs(
ctx: &mut Builder,
@ -723,7 +745,8 @@ fn write_image(
) -> SourceResult<()> {
let index = ctx.resources.images.insert(image.clone());
ctx.resources.deferred_images.entry(index).or_insert_with(|| {
let (image, color_space) = deferred_image(image.clone());
let (image, color_space) =
deferred_image(image.clone(), ctx.options.standards.pdfa);
if let Some(color_space) = color_space {
ctx.resources.colors.mark_as_used(color_space);
}
@ -735,10 +758,14 @@ fn write_image(
let name = eco_format!("Im{index}");
let w = size.x.to_f32();
let h = size.y.to_f32();
ctx.content.save_state();
ctx.content.save_state_checked()?;
ctx.content.transform([w, 0.0, 0.0, -h, x, y + h]);
if let Some(alt) = image.alt() {
if ctx.options.standards.pdfa && alt.len() > Str::PDFA_LIMIT {
bail!(span, "the image's alt text is too long");
}
let mut image_span =
ctx.content.begin_marked_content_with_properties(Name(b"Span"));
let mut image_alt = image_span.properties();

View File

@ -4,7 +4,7 @@ use std::sync::Arc;
use ecow::{eco_format, EcoString};
use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap};
use pdf_writer::writers::FontDescriptor;
use pdf_writer::writers::{FontDescriptor, WMode};
use pdf_writer::{Chunk, Filter, Finish, Name, Rect, Ref, Str};
use subsetter::GlyphRemapper;
use ttf_parser::{name_id, GlyphId, Tag};
@ -13,10 +13,14 @@ use typst::syntax::Span;
use typst::text::Font;
use typst::utils::SliceExt;
use crate::{deflate, EmExt, PdfChunk, WithGlobalRefs};
use crate::{deflate, EmExt, NameExt, PdfChunk, WithGlobalRefs};
const CFF: Tag = Tag::from_bytes(b"CFF ");
const CFF2: Tag = Tag::from_bytes(b"CFF2");
const SUBSET_TAG_LEN: usize = 6;
const IDENTITY_H: &str = "Identity-H";
pub(crate) const CMAP_NAME: Name = Name(b"Custom");
pub(crate) const SYSTEM_INFO: SystemInfo = SystemInfo {
registry: Str(b"Adobe"),
@ -58,14 +62,9 @@ pub fn write_fonts(
.or_else(|| ttf.raw_face().table(CFF2))
.is_some();
let postscript_name = font
.find_name(name_id::POST_SCRIPT_NAME)
.unwrap_or_else(|| "unknown".to_string());
let subset_tag = subset_tag(glyph_set);
let base_font = eco_format!("{subset_tag}+{postscript_name}");
let base_font = base_font_name(font, glyph_set);
let base_font_type0 = if is_cff {
eco_format!("{base_font}-Identity-H")
eco_format!("{base_font}-{IDENTITY_H}")
} else {
base_font.clone()
};
@ -74,7 +73,7 @@ pub fn write_fonts(
chunk
.type0_font(type0_ref)
.base_font(Name(base_font_type0.as_bytes()))
.encoding_predefined(Name(b"Identity-H"))
.encoding_predefined(Name(IDENTITY_H.as_bytes()))
.descendant_font(cid_ref)
.to_unicode(cmap_ref);
@ -118,7 +117,10 @@ pub fn write_fonts(
// Write the /ToUnicode character map, which maps glyph ids back to
// unicode codepoints to enable copying out of the PDF.
let cmap = create_cmap(glyph_set, glyph_remapper);
chunk.cmap(cmap_ref, &cmap).filter(Filter::FlateDecode);
chunk
.cmap(cmap_ref, &cmap)
.writing_mode(WMode::Horizontal)
.filter(Filter::FlateDecode);
let subset = subset_font(font, glyph_remapper)
.map_err(|err| {
@ -224,12 +226,31 @@ fn subset_font(
Ok(Arc::new(deflate(data)))
}
/// Creates the base font name for a font with a specific glyph subset.
/// Consists of a subset tag and the PostScript name of the font.
///
/// Returns a string of length maximum 116, so that even with `-Identity-H`
/// added it does not exceed the maximum PDF/A name length of 127.
pub(crate) fn base_font_name<T: Hash>(font: &Font, glyphs: &T) -> EcoString {
const MAX_LEN: usize = Name::PDFA_LIMIT - REST_LEN;
const REST_LEN: usize = SUBSET_TAG_LEN + 1 + 1 + IDENTITY_H.len();
let postscript_name = font.find_name(name_id::POST_SCRIPT_NAME);
let name = postscript_name.as_deref().unwrap_or("unknown");
let trimmed = &name[..name.len().min(MAX_LEN)];
// Hash the full name (we might have trimmed) and the glyphs to produce
// a fairly unique subset tag.
let subset_tag = subset_tag(&(name, glyphs));
eco_format!("{subset_tag}+{trimmed}")
}
/// Produce a unique 6 letter tag for a glyph set.
pub(crate) fn subset_tag<T: Hash>(glyphs: &T) -> EcoString {
const LEN: usize = 6;
const BASE: u128 = 26;
let mut hash = typst::utils::hash128(&glyphs);
let mut letter = [b'A'; LEN];
let mut letter = [b'A'; SUBSET_TAG_LEN];
for l in letter.iter_mut() {
*l = b'A' + (hash % BASE) as u8;
hash /= BASE;

View File

@ -13,7 +13,9 @@ use typst::visualize::{
Color, ColorSpace, Gradient, RatioOrAngle, RelativeTo, WeightedColor,
};
use crate::color::{self, ColorSpaceExt, PaintEncode, QuantizedColor};
use crate::color::{
self, check_cmyk_allowed, ColorSpaceExt, PaintEncode, QuantizedColor,
};
use crate::{content, WithGlobalRefs};
use crate::{deflate, transform_to_array, AbsExt, PdfChunk};
@ -56,6 +58,10 @@ pub fn write_gradients(
gradient.space()
};
if color_space == ColorSpace::Cmyk {
check_cmyk_allowed(context.options)?;
}
let mut shading_pattern = match &gradient {
Gradient::Linear(_) => {
let shading_function =

View File

@ -118,6 +118,7 @@ pub fn write_images(
#[comemo::memoize]
pub fn deferred_image(
image: Image,
pdfa: bool,
) -> (Deferred<StrResult<EncodedImage>>, Option<ColorSpace>) {
let color_space = match image.kind() {
ImageKind::Raster(raster) if raster.icc().is_none() => {
@ -151,7 +152,7 @@ pub fn deferred_image(
})
}
ImageKind::Svg(svg) => {
let (chunk, id) = encode_svg(svg)
let (chunk, id) = encode_svg(svg, pdfa)
.map_err(|err| eco_format!("failed to convert SVG to PDF: {err}"))?;
Ok(EncodedImage::Svg(chunk, id))
}
@ -201,8 +202,14 @@ fn encode_alpha(raster: &RasterImage) -> (Vec<u8>, Filter) {
/// Encode an SVG into a chunk of PDF objects.
#[typst_macros::time(name = "encode svg")]
fn encode_svg(svg: &SvgImage) -> Result<(Chunk, Ref), svg2pdf::ConversionError> {
svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default())
fn encode_svg(
svg: &SvgImage,
pdfa: bool,
) -> Result<(Chunk, Ref), svg2pdf::ConversionError> {
svg2pdf::to_chunk(
svg.tree(),
svg2pdf::ConversionOptions { pdfa, ..Default::default() },
)
}
/// A pre-encoded image.

View File

@ -15,15 +15,17 @@ mod pattern;
mod resources;
use std::collections::HashMap;
use std::fmt::{self, Debug, Formatter};
use std::hash::Hash;
use std::ops::{Deref, DerefMut};
use base64::Engine;
use pdf_writer::{Chunk, Pdf, Ref};
use typst::diag::SourceResult;
use pdf_writer::{Chunk, Name, Pdf, Ref, Str, TextStr};
use typst::diag::{bail, SourceResult, StrResult};
use typst::foundations::{Datetime, Smart};
use typst::layout::{Abs, Em, PageRanges, Transform};
use typst::model::Document;
use typst::syntax::Span;
use typst::text::Font;
use typst::utils::Deferred;
use typst::visualize::Image;
@ -45,25 +47,6 @@ use crate::resources::{
/// Export a document into a PDF file.
///
/// Returns the raw bytes making up the PDF file.
///
/// The `ident` parameter, if given, shall be a string that uniquely and stably
/// identifies the document. It should not change between compilations of the
/// same document. **If you cannot provide such a stable identifier, just pass
/// `Smart::Auto` rather than trying to come up with one.** The CLI, for
/// example, does not have a well-defined notion of a long-lived project and as
/// such just passes `Smart::Auto`.
///
/// If an `ident` is given, the hash of it will be used to create a PDF document
/// identifier (the identifier itself is not leaked). If `ident` is `Auto`, a
/// hash of the document's title and author is used instead (which is reasonably
/// unique and stable).
///
/// The `timestamp`, if given, is expected to be the creation date of the
/// document as a UTC datetime. It will only be used if `set document(date: ..)`
/// is `auto`.
///
/// The `page_ranges` option specifies which ranges of pages should be exported
/// in the PDF. When `None`, all pages should be exported.
#[typst_macros::time(name = "pdf")]
pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult<Vec<u8>> {
PdfBuilder::new(document, options)
@ -92,26 +75,70 @@ pub fn pdf(document: &Document, options: &PdfOptions) -> SourceResult<Vec<u8>> {
}
/// Settings for PDF export.
#[derive(Default)]
#[derive(Debug, Default)]
pub struct PdfOptions<'a> {
/// If given, shall be a string that uniquely and stably identifies the
/// document. It should not change between compilations of the same
/// document. **If you cannot provide such a stable identifier, just pass
/// `Smart::Auto` rather than trying to come up with one.** The CLI, for
/// example, does not have a well-defined notion of a long-lived project and
/// as such just passes `Smart::Auto`.
/// If not `Smart::Auto`, shall be a string that uniquely and stably
/// identifies the document. It should not change between compilations of
/// the same document. **If you cannot provide such a stable identifier,
/// just pass `Smart::Auto` rather than trying to come up with one.** The
/// CLI, for example, does not have a well-defined notion of a long-lived
/// project and as such just passes `Smart::Auto`.
///
/// If an `ident` is given, the hash of it will be used to create a PDF
/// document identifier (the identifier itself is not leaked). If `ident` is
/// `Auto`, a hash of the document's title and author is used instead (which
/// is reasonably unique and stable).
pub ident: Smart<&'a str>,
/// If given, is expected to be the creation date of the document as a UTC
/// If not `None`, shall be the creation date of the document as a UTC
/// datetime. It will only be used if `set document(date: ..)` is `auto`.
pub timestamp: Option<Datetime>,
/// Specifies which ranges of pages should be exported in the PDF. When
/// `None`, all pages should be exported.
pub page_ranges: Option<PageRanges>,
/// A list of PDF standards that Typst will enforce conformance with.
pub standards: PdfStandards,
}
/// Encapsulates a list of compatible PDF standards.
#[derive(Clone)]
pub struct PdfStandards {
/// For now, we simplify to just PDF/A, since we only support PDF/A-2b. But
/// it can be more fine-grained in the future.
pub(crate) pdfa: bool,
}
impl PdfStandards {
/// Validates a list of PDF standards for compatibility and returns their
/// encapsulated representation.
pub fn new(list: &[PdfStandard]) -> StrResult<Self> {
Ok(Self { pdfa: list.contains(&PdfStandard::A_2b) })
}
}
impl Debug for PdfStandards {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.pad("PdfStandards(..)")
}
}
#[allow(clippy::derivable_impls)]
impl Default for PdfStandards {
fn default() -> Self {
Self { pdfa: false }
}
}
/// A PDF standard.
///
/// Support for more standards is planned.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[allow(non_camel_case_types)]
#[non_exhaustive]
pub enum PdfStandard {
/// PDF 1.7.
V_1_7,
/// PDF/A-2b.
A_2b,
}
/// A struct to build a PDF following a fixed succession of phases.
@ -515,6 +542,63 @@ impl EmExt for Em {
}
}
trait NameExt<'a> {
/// The maximum length of a name in PDF/A.
const PDFA_LIMIT: usize = 127;
}
impl<'a> NameExt<'a> for Name<'a> {}
/// Additional methods for [`Str`].
trait StrExt<'a>: Sized {
/// The maximum length of a string in PDF/A.
const PDFA_LIMIT: usize = 32767;
/// Create a string that satisfies the constraints of PDF/A.
#[allow(unused)]
fn trimmed(string: &'a [u8]) -> Self;
}
impl<'a> StrExt<'a> for Str<'a> {
fn trimmed(string: &'a [u8]) -> Self {
Self(&string[..string.len().min(Self::PDFA_LIMIT)])
}
}
/// Additional methods for [`TextStr`].
trait TextStrExt<'a>: Sized {
/// The maximum length of a string in PDF/A.
const PDFA_LIMIT: usize = Str::PDFA_LIMIT;
/// Create a text string that satisfies the constraints of PDF/A.
fn trimmed(string: &'a str) -> Self;
}
impl<'a> TextStrExt<'a> for TextStr<'a> {
fn trimmed(string: &'a str) -> Self {
Self(&string[..string.len().min(Self::PDFA_LIMIT)])
}
}
/// Extension trait for [`Content`](pdf_writer::Content).
trait ContentExt {
fn save_state_checked(&mut self) -> SourceResult<()>;
}
impl ContentExt for pdf_writer::Content {
fn save_state_checked(&mut self) -> SourceResult<()> {
self.save_state();
if self.state_nesting_depth() > 28 {
bail!(
Span::detached(),
"maximum PDF grouping depth exceeding";
hint: "try to avoid excessive nesting of layout containers",
);
}
Ok(())
}
}
/// Convert to an array of floats.
fn transform_to_array(ts: Transform) -> [f32; 6] {
[

View File

@ -1,14 +1,14 @@
use std::collections::{HashMap, HashSet};
use pdf_writer::writers::Destination;
use pdf_writer::Ref;
use pdf_writer::{Ref, Str};
use typst::diag::SourceResult;
use typst::foundations::{Label, NativeElement};
use typst::introspection::Location;
use typst::layout::Abs;
use typst::model::HeadingElem;
use crate::{AbsExt, PdfChunk, Renumber, WithGlobalRefs};
use crate::{AbsExt, PdfChunk, Renumber, StrExt, WithGlobalRefs};
/// A list of destinations in the PDF document (a specific point on a specific
/// page), that have a name associated with them.
@ -56,6 +56,12 @@ pub fn write_named_destinations(
matches.sort_by_key(|&(_, label)| label);
for (loc, label) in matches {
// Don't encode named destinations that would exceed the limit. Those
// will instead be encoded as normal links.
if label.as_str().len() > Str::PDFA_LIMIT {
continue;
}
let pos = context.document.introspector.position(loc);
let index = pos.page.get() - 1;
let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());

View File

@ -5,7 +5,7 @@ use typst::foundations::{NativeElement, Packed, StyleChain};
use typst::layout::Abs;
use typst::model::HeadingElem;
use crate::{AbsExt, WithEverything};
use crate::{AbsExt, TextStrExt, WithEverything};
/// Construct the outline for the document.
pub(crate) fn write_outline(
@ -185,7 +185,7 @@ fn write_outline_item(
}
let body = node.element.body();
outline.title(TextStr(body.plain_text().trim()));
outline.title(TextStr::trimmed(body.plain_text().trim()));
let loc = node.element.location().unwrap();
let pos = ctx.document.introspector.position(loc);

View File

@ -652,7 +652,7 @@ cast! {
}
/// A list of page ranges to be exported.
#[derive(Debug, Clone)]
pub struct PageRanges(Vec<PageRange>);
/// A range of pages to export.

View File

@ -182,7 +182,7 @@ cast! {
pub struct Url(EcoString);
impl Url {
/// Create an URL from a string, checking the maximum length.
/// Create a URL from a string, checking the maximum length.
pub fn new(url: impl Into<EcoString>) -> StrResult<Self> {
let url = url.into();
if url.len() > 8000 {

View File

@ -291,6 +291,12 @@ impl FontInfo {
coverage: Coverage::from_vec(codepoints),
})
}
/// Whether this is the macOS LastResort font. It can yield tofus with
/// glyph ID != 0.
pub fn is_last_resort(&self) -> bool {
self.family == "LastResort"
}
}
/// Try to find and decode the name with the given id.

View File

@ -544,6 +544,9 @@ impl Color {
/// These components are also available using the
/// [`components`]($color.components) method.
///
/// Note that CMYK colors are not currently supported when PDF/A output is
/// enabled.
///
/// ```example
/// #square(
/// fill: cmyk(27%, 0%, 3%, 5%)