mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
485 lines
15 KiB
Rust
485 lines
15 KiB
Rust
//! Exporting into PDF documents.
|
|
|
|
mod color;
|
|
mod extg;
|
|
mod font;
|
|
mod gradient;
|
|
mod image;
|
|
mod outline;
|
|
mod page;
|
|
mod pattern;
|
|
|
|
use std::cmp::Eq;
|
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
|
use std::hash::Hash;
|
|
use std::sync::Arc;
|
|
|
|
use base64::Engine;
|
|
use ecow::{eco_format, EcoString};
|
|
use pdf_writer::types::Direction;
|
|
use pdf_writer::writers::Destination;
|
|
use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr};
|
|
use typst::foundations::{Datetime, Label, NativeElement};
|
|
use typst::introspection::Location;
|
|
use typst::layout::{Abs, Dir, Em, Transform};
|
|
use typst::model::{Document, HeadingElem};
|
|
use typst::text::{Font, Lang};
|
|
use typst::util::Deferred;
|
|
use typst::visualize::Image;
|
|
use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter};
|
|
|
|
use crate::color::ColorSpaces;
|
|
use crate::extg::ExtGState;
|
|
use crate::gradient::PdfGradient;
|
|
use crate::image::EncodedImage;
|
|
use crate::page::EncodedPage;
|
|
use crate::pattern::PdfPattern;
|
|
|
|
/// Export a document into a PDF file.
|
|
///
|
|
/// Returns the raw bytes making up the PDF file.
|
|
///
|
|
/// The `ident` parameter shall be a string that uniquely and stably identifies
|
|
/// the document. It should not change between compilations of the same
|
|
/// document. Its hash will be used to create a PDF document identifier (the
|
|
/// identifier itself is not leaked). If `ident` is `None`, a hash of the
|
|
/// document is used instead (which means that it _will_ change across
|
|
/// compilations).
|
|
///
|
|
/// The `timestamp`, if given, is expected to be the creation date of the
|
|
/// document as a UTC datetime. It will only be used if `set document(date: ..)`
|
|
/// is `auto`.
|
|
#[typst_macros::time(name = "pdf")]
|
|
pub fn pdf(
|
|
document: &Document,
|
|
ident: Option<&str>,
|
|
timestamp: Option<Datetime>,
|
|
) -> Vec<u8> {
|
|
let mut ctx = PdfContext::new(document);
|
|
page::construct_pages(&mut ctx, &document.pages);
|
|
font::write_fonts(&mut ctx);
|
|
image::write_images(&mut ctx);
|
|
gradient::write_gradients(&mut ctx);
|
|
extg::write_external_graphics_states(&mut ctx);
|
|
pattern::write_patterns(&mut ctx);
|
|
write_named_destinations(&mut ctx);
|
|
page::write_page_tree(&mut ctx);
|
|
write_catalog(&mut ctx, ident, timestamp);
|
|
ctx.pdf.finish()
|
|
}
|
|
|
|
/// Context for exporting a whole PDF document.
|
|
struct PdfContext<'a> {
|
|
/// The document that we're currently exporting.
|
|
document: &'a Document,
|
|
/// The writer we are writing the PDF into.
|
|
pdf: Pdf,
|
|
/// Content of exported pages.
|
|
pages: Vec<EncodedPage>,
|
|
/// For each font a mapping from used glyphs to their text representation.
|
|
/// May contain multiple chars in case of ligatures or similar things. The
|
|
/// same glyph can have a different text representation within one document,
|
|
/// then we just save the first one. The resulting strings are used for the
|
|
/// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
|
|
/// cmap. This is important for copy-paste and searching.
|
|
glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
|
|
/// The number of glyphs for all referenced languages in the document.
|
|
/// We keep track of this to determine the main document language.
|
|
languages: HashMap<Lang, usize>,
|
|
|
|
/// Allocator for indirect reference IDs.
|
|
alloc: Ref,
|
|
/// The ID of the page tree.
|
|
page_tree_ref: Ref,
|
|
/// The IDs of written pages.
|
|
page_refs: Vec<Ref>,
|
|
/// The IDs of written fonts.
|
|
font_refs: Vec<Ref>,
|
|
/// The IDs of written images.
|
|
image_refs: Vec<Ref>,
|
|
/// The IDs of written gradients.
|
|
gradient_refs: Vec<Ref>,
|
|
/// The IDs of written patterns.
|
|
pattern_refs: Vec<Ref>,
|
|
/// The IDs of written external graphics states.
|
|
ext_gs_refs: Vec<Ref>,
|
|
/// Handles color space writing.
|
|
colors: ColorSpaces,
|
|
|
|
/// Deduplicates fonts used across the document.
|
|
font_map: Remapper<Font>,
|
|
/// Deduplicates images used across the document.
|
|
image_map: Remapper<Image>,
|
|
/// Handles to deferred image conversions.
|
|
image_deferred_map: HashMap<usize, Deferred<EncodedImage>>,
|
|
/// Deduplicates gradients used across the document.
|
|
gradient_map: Remapper<PdfGradient>,
|
|
/// Deduplicates patterns used across the document.
|
|
pattern_map: Remapper<PdfPattern>,
|
|
/// Deduplicates external graphics states used across the document.
|
|
extg_map: Remapper<ExtGState>,
|
|
|
|
/// A sorted list of all named destinations.
|
|
dests: Vec<(Label, Ref)>,
|
|
/// Maps from locations to named destinations that point to them.
|
|
loc_to_dest: HashMap<Location, Label>,
|
|
}
|
|
|
|
impl<'a> PdfContext<'a> {
|
|
fn new(document: &'a Document) -> Self {
|
|
let mut alloc = Ref::new(1);
|
|
let page_tree_ref = alloc.bump();
|
|
Self {
|
|
document,
|
|
pdf: Pdf::new(),
|
|
pages: vec![],
|
|
glyph_sets: HashMap::new(),
|
|
languages: HashMap::new(),
|
|
alloc,
|
|
page_tree_ref,
|
|
page_refs: vec![],
|
|
font_refs: vec![],
|
|
image_refs: vec![],
|
|
gradient_refs: vec![],
|
|
pattern_refs: vec![],
|
|
ext_gs_refs: vec![],
|
|
colors: ColorSpaces::default(),
|
|
font_map: Remapper::new(),
|
|
image_map: Remapper::new(),
|
|
image_deferred_map: HashMap::default(),
|
|
gradient_map: Remapper::new(),
|
|
pattern_map: Remapper::new(),
|
|
extg_map: Remapper::new(),
|
|
dests: vec![],
|
|
loc_to_dest: HashMap::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write the document catalog.
|
|
fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option<Datetime>) {
|
|
let lang = ctx
|
|
.languages
|
|
.iter()
|
|
.max_by_key(|(&lang, &count)| (count, lang))
|
|
.map(|(&k, _)| k);
|
|
|
|
let dir = if lang.map(Lang::dir) == Some(Dir::RTL) {
|
|
Direction::R2L
|
|
} else {
|
|
Direction::L2R
|
|
};
|
|
|
|
// Write the outline tree.
|
|
let outline_root_id = outline::write_outline(ctx);
|
|
|
|
// Write the page labels.
|
|
let page_labels = page::write_page_labels(ctx);
|
|
|
|
// Write the document information.
|
|
let mut info = ctx.pdf.document_info(ctx.alloc.bump());
|
|
let mut xmp = XmpWriter::new();
|
|
if let Some(title) = &ctx.document.title {
|
|
info.title(TextStr(title));
|
|
xmp.title([(None, title.as_str())]);
|
|
}
|
|
|
|
let authors = &ctx.document.author;
|
|
if !authors.is_empty() {
|
|
// Turns out that if the authors are given in both the document
|
|
// information dictionary and the XMP metadata, Acrobat takes a little
|
|
// bit of both: The first author from the document information
|
|
// dictionary and the remaining authors from the XMP metadata.
|
|
//
|
|
// To fix this for Acrobat, we could omit the remaining authors or all
|
|
// metadata from the document information catalog (it is optional) and
|
|
// only write XMP. However, not all other tools (including Apple
|
|
// Preview) read the XMP data. This means we do want to include all
|
|
// authors in the document information dictionary.
|
|
//
|
|
// Thus, the only alternative is to fold all authors into a single
|
|
// `<rdf:li>` in the XMP metadata. This is, in fact, exactly what the
|
|
// PDF/A spec Part 1 section 6.7.3 has to say about the matter. It's a
|
|
// bit weird to not use the array (and it makes Acrobat show the author
|
|
// list in quotes), but there's not much we can do about that.
|
|
let joined = authors.join(", ");
|
|
info.author(TextStr(&joined));
|
|
xmp.creator([joined.as_str()]);
|
|
}
|
|
|
|
let creator = eco_format!("Typst {}", env!("CARGO_PKG_VERSION"));
|
|
info.creator(TextStr(&creator));
|
|
xmp.creator_tool(&creator);
|
|
|
|
let keywords = &ctx.document.keywords;
|
|
if !keywords.is_empty() {
|
|
let joined = keywords.join(", ");
|
|
info.keywords(TextStr(&joined));
|
|
xmp.pdf_keywords(&joined);
|
|
}
|
|
|
|
if let Some(date) = ctx.document.date.unwrap_or(timestamp) {
|
|
let tz = ctx.document.date.is_auto();
|
|
if let Some(pdf_date) = pdf_date(date, tz) {
|
|
info.creation_date(pdf_date);
|
|
info.modified_date(pdf_date);
|
|
}
|
|
if let Some(xmp_date) = xmp_date(date, tz) {
|
|
xmp.create_date(xmp_date);
|
|
xmp.modify_date(xmp_date);
|
|
}
|
|
}
|
|
|
|
info.finish();
|
|
xmp.num_pages(ctx.document.pages.len() as u32);
|
|
xmp.format("application/pdf");
|
|
xmp.language(ctx.languages.keys().map(|lang| LangId(lang.as_str())));
|
|
|
|
// A unique ID for this instance of the document. Changes if anything
|
|
// changes in the frames.
|
|
let instance_id = hash_base64(&ctx.pdf.as_bytes());
|
|
|
|
if let Some(ident) = ident {
|
|
// A unique ID for the document that stays stable across compilations.
|
|
let doc_id = hash_base64(&("PDF-1.7", ident));
|
|
xmp.document_id(&doc_id);
|
|
xmp.instance_id(&instance_id);
|
|
ctx.pdf
|
|
.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes()));
|
|
} else {
|
|
// This is not spec-compliant, but some PDF readers really want an ID.
|
|
let bytes = instance_id.into_bytes();
|
|
ctx.pdf.set_file_id((bytes.clone(), bytes));
|
|
}
|
|
|
|
xmp.rendition_class(RenditionClass::Proof);
|
|
xmp.pdf_version("1.7");
|
|
|
|
let xmp_buf = xmp.finish(None);
|
|
let meta_ref = ctx.alloc.bump();
|
|
ctx.pdf
|
|
.stream(meta_ref, xmp_buf.as_bytes())
|
|
.pair(Name(b"Type"), Name(b"Metadata"))
|
|
.pair(Name(b"Subtype"), Name(b"XML"));
|
|
|
|
// Write the document catalog.
|
|
let mut catalog = ctx.pdf.catalog(ctx.alloc.bump());
|
|
catalog.pages(ctx.page_tree_ref);
|
|
catalog.viewer_preferences().direction(dir);
|
|
catalog.metadata(meta_ref);
|
|
|
|
// Write the named destination tree.
|
|
let mut name_dict = catalog.names();
|
|
let mut dests_name_tree = name_dict.destinations();
|
|
let mut names = dests_name_tree.names();
|
|
for &(name, dest_ref, ..) in &ctx.dests {
|
|
names.insert(Str(name.as_str().as_bytes()), dest_ref);
|
|
}
|
|
names.finish();
|
|
dests_name_tree.finish();
|
|
name_dict.finish();
|
|
|
|
// Insert the page labels.
|
|
if !page_labels.is_empty() {
|
|
let mut num_tree = catalog.page_labels();
|
|
let mut entries = num_tree.nums();
|
|
for (n, r) in &page_labels {
|
|
entries.insert(n.get() as i32 - 1, *r);
|
|
}
|
|
}
|
|
|
|
if let Some(outline_root_id) = outline_root_id {
|
|
catalog.outlines(outline_root_id);
|
|
}
|
|
|
|
if let Some(lang) = lang {
|
|
catalog.lang(TextStr(lang.as_str()));
|
|
}
|
|
|
|
catalog.finish();
|
|
}
|
|
|
|
/// Fills in the map and vector for named destinations and writes the indirect
|
|
/// destination objects.
|
|
fn write_named_destinations(ctx: &mut PdfContext) {
|
|
let mut seen = HashSet::new();
|
|
|
|
// Find all headings that have a label and are the first among other
|
|
// headings with the same label.
|
|
let mut matches: Vec<_> = ctx
|
|
.document
|
|
.introspector
|
|
.query(&HeadingElem::elem().select())
|
|
.iter()
|
|
.filter_map(|elem| elem.location().zip(elem.label()))
|
|
.filter(|&(_, label)| seen.insert(label))
|
|
.collect();
|
|
|
|
// Named destinations must be sorted by key.
|
|
matches.sort_by_key(|&(_, label)| label);
|
|
|
|
for (loc, label) in matches {
|
|
let pos = ctx.document.introspector.position(loc);
|
|
let index = pos.page.get() - 1;
|
|
let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());
|
|
|
|
if let Some(page) = ctx.pages.get(index) {
|
|
let dest_ref = ctx.alloc.bump();
|
|
let x = pos.point.x.to_f32();
|
|
let y = (page.size.y - y).to_f32();
|
|
ctx.dests.push((label, dest_ref));
|
|
ctx.loc_to_dest.insert(loc, label);
|
|
ctx.pdf
|
|
.indirect(dest_ref)
|
|
.start::<Destination>()
|
|
.page(page.id)
|
|
.xyz(x, y, None);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Compress data with the DEFLATE algorithm.
|
|
fn deflate(data: &[u8]) -> Vec<u8> {
|
|
const COMPRESSION_LEVEL: u8 = 6;
|
|
miniz_oxide::deflate::compress_to_vec_zlib(data, COMPRESSION_LEVEL)
|
|
}
|
|
|
|
/// Memoized version of [`deflate`] specialized for a page's content stream.
|
|
#[comemo::memoize]
|
|
fn deflate_memoized(content: &[u8]) -> Arc<Vec<u8>> {
|
|
Arc::new(deflate(content))
|
|
}
|
|
|
|
/// Memoized and deferred version of [`deflate`] specialized for a page's content
|
|
/// stream.
|
|
#[comemo::memoize]
|
|
fn deflate_deferred(content: Vec<u8>) -> Deferred<Vec<u8>> {
|
|
Deferred::new(move || deflate(&content))
|
|
}
|
|
|
|
/// Create a base64-encoded hash of the value.
|
|
fn hash_base64<T: Hash>(value: &T) -> String {
|
|
base64::engine::general_purpose::STANDARD
|
|
.encode(typst::util::hash128(value).to_be_bytes())
|
|
}
|
|
|
|
/// Converts a datetime to a pdf-writer date.
|
|
fn pdf_date(datetime: Datetime, tz: bool) -> Option<pdf_writer::Date> {
|
|
let year = datetime.year().filter(|&y| y >= 0)? as u16;
|
|
|
|
let mut pdf_date = pdf_writer::Date::new(year);
|
|
|
|
if let Some(month) = datetime.month() {
|
|
pdf_date = pdf_date.month(month);
|
|
}
|
|
|
|
if let Some(day) = datetime.day() {
|
|
pdf_date = pdf_date.day(day);
|
|
}
|
|
|
|
if let Some(h) = datetime.hour() {
|
|
pdf_date = pdf_date.hour(h);
|
|
}
|
|
|
|
if let Some(m) = datetime.minute() {
|
|
pdf_date = pdf_date.minute(m);
|
|
}
|
|
|
|
if let Some(s) = datetime.second() {
|
|
pdf_date = pdf_date.second(s);
|
|
}
|
|
|
|
if tz {
|
|
pdf_date = pdf_date.utc_offset_hour(0).utc_offset_minute(0);
|
|
}
|
|
|
|
Some(pdf_date)
|
|
}
|
|
|
|
/// Converts a datetime to an xmp-writer datetime.
|
|
fn xmp_date(datetime: Datetime, tz: bool) -> Option<xmp_writer::DateTime> {
|
|
let year = datetime.year().filter(|&y| y >= 0)? as u16;
|
|
Some(DateTime {
|
|
year,
|
|
month: datetime.month(),
|
|
day: datetime.day(),
|
|
hour: datetime.hour(),
|
|
minute: datetime.minute(),
|
|
second: datetime.second(),
|
|
timezone: if tz { Some(Timezone::Utc) } else { None },
|
|
})
|
|
}
|
|
|
|
/// Assigns new, consecutive PDF-internal indices to items.
|
|
struct Remapper<T> {
|
|
/// Forwards from the items to the pdf indices.
|
|
to_pdf: HashMap<T, usize>,
|
|
/// Backwards from the pdf indices to the items.
|
|
to_items: Vec<T>,
|
|
}
|
|
|
|
impl<T> Remapper<T>
|
|
where
|
|
T: Eq + Hash + Clone,
|
|
{
|
|
fn new() -> Self {
|
|
Self { to_pdf: HashMap::new(), to_items: vec![] }
|
|
}
|
|
|
|
fn insert(&mut self, item: T) -> usize {
|
|
let to_layout = &mut self.to_items;
|
|
*self.to_pdf.entry(item.clone()).or_insert_with(|| {
|
|
let pdf_index = to_layout.len();
|
|
to_layout.push(item);
|
|
pdf_index
|
|
})
|
|
}
|
|
|
|
fn pdf_indices<'a>(
|
|
&'a self,
|
|
refs: &'a [Ref],
|
|
) -> impl Iterator<Item = (Ref, usize)> + 'a {
|
|
refs.iter().copied().zip(0..self.to_pdf.len())
|
|
}
|
|
|
|
fn items(&self) -> impl Iterator<Item = &T> + '_ {
|
|
self.to_items.iter()
|
|
}
|
|
}
|
|
|
|
/// Additional methods for [`Abs`].
|
|
trait AbsExt {
|
|
/// Convert an to a number of points.
|
|
fn to_f32(self) -> f32;
|
|
}
|
|
|
|
impl AbsExt for Abs {
|
|
fn to_f32(self) -> f32 {
|
|
self.to_pt() as f32
|
|
}
|
|
}
|
|
|
|
/// Additional methods for [`Em`].
|
|
trait EmExt {
|
|
/// Convert an em length to a number of PDF font units.
|
|
fn to_font_units(self) -> f32;
|
|
}
|
|
|
|
impl EmExt for Em {
|
|
fn to_font_units(self) -> f32 {
|
|
1000.0 * self.get() as f32
|
|
}
|
|
}
|
|
|
|
/// Convert to an array of floats.
|
|
fn transform_to_array(ts: Transform) -> [f32; 6] {
|
|
[
|
|
ts.sx.get() as f32,
|
|
ts.ky.get() as f32,
|
|
ts.kx.get() as f32,
|
|
ts.sy.get() as f32,
|
|
ts.tx.to_f32(),
|
|
ts.ty.to_f32(),
|
|
]
|
|
}
|