//! Exporting into PDF documents. mod color; mod extg; mod font; mod gradient; mod image; mod outline; mod page; mod pattern; use std::cmp::Eq; use std::collections::{BTreeMap, HashMap, HashSet}; use std::hash::Hash; use std::sync::Arc; use base64::Engine; use ecow::{eco_format, EcoString}; use pdf_writer::types::Direction; use pdf_writer::writers::Destination; use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr}; use typst::foundations::{Datetime, Label, NativeElement}; use typst::introspection::Location; use typst::layout::{Abs, Dir, Em, Transform}; use typst::model::{Document, HeadingElem}; use typst::text::{Font, Lang}; use typst::util::Deferred; use typst::visualize::Image; use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter}; use crate::color::ColorSpaces; use crate::extg::ExtGState; use crate::gradient::PdfGradient; use crate::image::EncodedImage; use crate::page::EncodedPage; use crate::pattern::PdfPattern; /// Export a document into a PDF file. /// /// Returns the raw bytes making up the PDF file. /// /// The `ident` parameter shall be a string that uniquely and stably identifies /// the document. It should not change between compilations of the same /// document. Its hash will be used to create a PDF document identifier (the /// identifier itself is not leaked). If `ident` is `None`, a hash of the /// document is used instead (which means that it _will_ change across /// compilations). /// /// The `timestamp`, if given, is expected to be the creation date of the /// document as a UTC datetime. It will only be used if `set document(date: ..)` /// is `auto`. #[typst_macros::time(name = "pdf")] pub fn pdf( document: &Document, ident: Option<&str>, timestamp: Option, ) -> Vec { let mut ctx = PdfContext::new(document); page::construct_pages(&mut ctx, &document.pages); font::write_fonts(&mut ctx); image::write_images(&mut ctx); gradient::write_gradients(&mut ctx); extg::write_external_graphics_states(&mut ctx); pattern::write_patterns(&mut ctx); write_named_destinations(&mut ctx); page::write_page_tree(&mut ctx); write_catalog(&mut ctx, ident, timestamp); ctx.pdf.finish() } /// Context for exporting a whole PDF document. struct PdfContext<'a> { /// The document that we're currently exporting. document: &'a Document, /// The writer we are writing the PDF into. pdf: Pdf, /// Content of exported pages. pages: Vec, /// For each font a mapping from used glyphs to their text representation. /// May contain multiple chars in case of ligatures or similar things. The /// same glyph can have a different text representation within one document, /// then we just save the first one. The resulting strings are used for the /// PDF's /ToUnicode map for glyphs that don't have an entry in the font's /// cmap. This is important for copy-paste and searching. glyph_sets: HashMap>, /// The number of glyphs for all referenced languages in the document. /// We keep track of this to determine the main document language. languages: HashMap, /// Allocator for indirect reference IDs. alloc: Ref, /// The ID of the page tree. page_tree_ref: Ref, /// The IDs of written pages. page_refs: Vec, /// The IDs of written fonts. font_refs: Vec, /// The IDs of written images. image_refs: Vec, /// The IDs of written gradients. gradient_refs: Vec, /// The IDs of written patterns. pattern_refs: Vec, /// The IDs of written external graphics states. ext_gs_refs: Vec, /// Handles color space writing. colors: ColorSpaces, /// Deduplicates fonts used across the document. font_map: Remapper, /// Deduplicates images used across the document. image_map: Remapper, /// Handles to deferred image conversions. image_deferred_map: HashMap>, /// Deduplicates gradients used across the document. gradient_map: Remapper, /// Deduplicates patterns used across the document. pattern_map: Remapper, /// Deduplicates external graphics states used across the document. extg_map: Remapper, /// A sorted list of all named destinations. dests: Vec<(Label, Ref)>, /// Maps from locations to named destinations that point to them. loc_to_dest: HashMap, } impl<'a> PdfContext<'a> { fn new(document: &'a Document) -> Self { let mut alloc = Ref::new(1); let page_tree_ref = alloc.bump(); Self { document, pdf: Pdf::new(), pages: vec![], glyph_sets: HashMap::new(), languages: HashMap::new(), alloc, page_tree_ref, page_refs: vec![], font_refs: vec![], image_refs: vec![], gradient_refs: vec![], pattern_refs: vec![], ext_gs_refs: vec![], colors: ColorSpaces::default(), font_map: Remapper::new(), image_map: Remapper::new(), image_deferred_map: HashMap::default(), gradient_map: Remapper::new(), pattern_map: Remapper::new(), extg_map: Remapper::new(), dests: vec![], loc_to_dest: HashMap::new(), } } } /// Write the document catalog. fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option) { let lang = ctx .languages .iter() .max_by_key(|(&lang, &count)| (count, lang)) .map(|(&k, _)| k); let dir = if lang.map(Lang::dir) == Some(Dir::RTL) { Direction::R2L } else { Direction::L2R }; // Write the outline tree. let outline_root_id = outline::write_outline(ctx); // Write the page labels. let page_labels = page::write_page_labels(ctx); // Write the document information. let mut info = ctx.pdf.document_info(ctx.alloc.bump()); let mut xmp = XmpWriter::new(); if let Some(title) = &ctx.document.title { info.title(TextStr(title)); xmp.title([(None, title.as_str())]); } let authors = &ctx.document.author; if !authors.is_empty() { // Turns out that if the authors are given in both the document // information dictionary and the XMP metadata, Acrobat takes a little // bit of both: The first author from the document information // dictionary and the remaining authors from the XMP metadata. // // To fix this for Acrobat, we could omit the remaining authors or all // metadata from the document information catalog (it is optional) and // only write XMP. However, not all other tools (including Apple // Preview) read the XMP data. This means we do want to include all // authors in the document information dictionary. // // Thus, the only alternative is to fold all authors into a single // `` in the XMP metadata. This is, in fact, exactly what the // PDF/A spec Part 1 section 6.7.3 has to say about the matter. It's a // bit weird to not use the array (and it makes Acrobat show the author // list in quotes), but there's not much we can do about that. let joined = authors.join(", "); info.author(TextStr(&joined)); xmp.creator([joined.as_str()]); } let creator = eco_format!("Typst {}", env!("CARGO_PKG_VERSION")); info.creator(TextStr(&creator)); xmp.creator_tool(&creator); let keywords = &ctx.document.keywords; if !keywords.is_empty() { let joined = keywords.join(", "); info.keywords(TextStr(&joined)); xmp.pdf_keywords(&joined); } if let Some(date) = ctx.document.date.unwrap_or(timestamp) { let tz = ctx.document.date.is_auto(); if let Some(pdf_date) = pdf_date(date, tz) { info.creation_date(pdf_date); info.modified_date(pdf_date); } if let Some(xmp_date) = xmp_date(date, tz) { xmp.create_date(xmp_date); xmp.modify_date(xmp_date); } } info.finish(); xmp.num_pages(ctx.document.pages.len() as u32); xmp.format("application/pdf"); xmp.language(ctx.languages.keys().map(|lang| LangId(lang.as_str()))); // A unique ID for this instance of the document. Changes if anything // changes in the frames. let instance_id = hash_base64(&ctx.pdf.as_bytes()); if let Some(ident) = ident { // A unique ID for the document that stays stable across compilations. let doc_id = hash_base64(&("PDF-1.7", ident)); xmp.document_id(&doc_id); xmp.instance_id(&instance_id); ctx.pdf .set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes())); } else { // This is not spec-compliant, but some PDF readers really want an ID. let bytes = instance_id.into_bytes(); ctx.pdf.set_file_id((bytes.clone(), bytes)); } xmp.rendition_class(RenditionClass::Proof); xmp.pdf_version("1.7"); let xmp_buf = xmp.finish(None); let meta_ref = ctx.alloc.bump(); ctx.pdf .stream(meta_ref, xmp_buf.as_bytes()) .pair(Name(b"Type"), Name(b"Metadata")) .pair(Name(b"Subtype"), Name(b"XML")); // Write the document catalog. let mut catalog = ctx.pdf.catalog(ctx.alloc.bump()); catalog.pages(ctx.page_tree_ref); catalog.viewer_preferences().direction(dir); catalog.metadata(meta_ref); // Write the named destination tree. let mut name_dict = catalog.names(); let mut dests_name_tree = name_dict.destinations(); let mut names = dests_name_tree.names(); for &(name, dest_ref, ..) in &ctx.dests { names.insert(Str(name.as_str().as_bytes()), dest_ref); } names.finish(); dests_name_tree.finish(); name_dict.finish(); // Insert the page labels. if !page_labels.is_empty() { let mut num_tree = catalog.page_labels(); let mut entries = num_tree.nums(); for (n, r) in &page_labels { entries.insert(n.get() as i32 - 1, *r); } } if let Some(outline_root_id) = outline_root_id { catalog.outlines(outline_root_id); } if let Some(lang) = lang { catalog.lang(TextStr(lang.as_str())); } catalog.finish(); } /// Fills in the map and vector for named destinations and writes the indirect /// destination objects. fn write_named_destinations(ctx: &mut PdfContext) { let mut seen = HashSet::new(); // Find all headings that have a label and are the first among other // headings with the same label. let mut matches: Vec<_> = ctx .document .introspector .query(&HeadingElem::elem().select()) .iter() .filter_map(|elem| elem.location().zip(elem.label())) .filter(|&(_, label)| seen.insert(label)) .collect(); // Named destinations must be sorted by key. matches.sort_by_key(|&(_, label)| label); for (loc, label) in matches { let pos = ctx.document.introspector.position(loc); let index = pos.page.get() - 1; let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); if let Some(page) = ctx.pages.get(index) { let dest_ref = ctx.alloc.bump(); let x = pos.point.x.to_f32(); let y = (page.size.y - y).to_f32(); ctx.dests.push((label, dest_ref)); ctx.loc_to_dest.insert(loc, label); ctx.pdf .indirect(dest_ref) .start::() .page(page.id) .xyz(x, y, None); } } } /// Compress data with the DEFLATE algorithm. fn deflate(data: &[u8]) -> Vec { const COMPRESSION_LEVEL: u8 = 6; miniz_oxide::deflate::compress_to_vec_zlib(data, COMPRESSION_LEVEL) } /// Memoized version of [`deflate`] specialized for a page's content stream. #[comemo::memoize] fn deflate_memoized(content: &[u8]) -> Arc> { Arc::new(deflate(content)) } /// Memoized and deferred version of [`deflate`] specialized for a page's content /// stream. #[comemo::memoize] fn deflate_deferred(content: Vec) -> Deferred> { Deferred::new(move || deflate(&content)) } /// Create a base64-encoded hash of the value. fn hash_base64(value: &T) -> String { base64::engine::general_purpose::STANDARD .encode(typst::util::hash128(value).to_be_bytes()) } /// Converts a datetime to a pdf-writer date. fn pdf_date(datetime: Datetime, tz: bool) -> Option { let year = datetime.year().filter(|&y| y >= 0)? as u16; let mut pdf_date = pdf_writer::Date::new(year); if let Some(month) = datetime.month() { pdf_date = pdf_date.month(month); } if let Some(day) = datetime.day() { pdf_date = pdf_date.day(day); } if let Some(h) = datetime.hour() { pdf_date = pdf_date.hour(h); } if let Some(m) = datetime.minute() { pdf_date = pdf_date.minute(m); } if let Some(s) = datetime.second() { pdf_date = pdf_date.second(s); } if tz { pdf_date = pdf_date.utc_offset_hour(0).utc_offset_minute(0); } Some(pdf_date) } /// Converts a datetime to an xmp-writer datetime. fn xmp_date(datetime: Datetime, tz: bool) -> Option { let year = datetime.year().filter(|&y| y >= 0)? as u16; Some(DateTime { year, month: datetime.month(), day: datetime.day(), hour: datetime.hour(), minute: datetime.minute(), second: datetime.second(), timezone: if tz { Some(Timezone::Utc) } else { None }, }) } /// Assigns new, consecutive PDF-internal indices to items. struct Remapper { /// Forwards from the items to the pdf indices. to_pdf: HashMap, /// Backwards from the pdf indices to the items. to_items: Vec, } impl Remapper where T: Eq + Hash + Clone, { fn new() -> Self { Self { to_pdf: HashMap::new(), to_items: vec![] } } fn insert(&mut self, item: T) -> usize { let to_layout = &mut self.to_items; *self.to_pdf.entry(item.clone()).or_insert_with(|| { let pdf_index = to_layout.len(); to_layout.push(item); pdf_index }) } fn pdf_indices<'a>( &'a self, refs: &'a [Ref], ) -> impl Iterator + 'a { refs.iter().copied().zip(0..self.to_pdf.len()) } fn items(&self) -> impl Iterator + '_ { self.to_items.iter() } } /// Additional methods for [`Abs`]. trait AbsExt { /// Convert an to a number of points. fn to_f32(self) -> f32; } impl AbsExt for Abs { fn to_f32(self) -> f32 { self.to_pt() as f32 } } /// Additional methods for [`Em`]. trait EmExt { /// Convert an em length to a number of PDF font units. fn to_font_units(self) -> f32; } impl EmExt for Em { fn to_font_units(self) -> f32 { 1000.0 * self.get() as f32 } } /// Convert to an array of floats. fn transform_to_array(ts: Transform) -> [f32; 6] { [ ts.sx.get() as f32, ts.ky.get() as f32, ts.kx.get() as f32, ts.sy.get() as f32, ts.tx.to_f32(), ts.ty.to_f32(), ] }