diff --git a/crates/typst-pdf/src/catalog.rs b/crates/typst-pdf/src/catalog.rs new file mode 100644 index 000000000..7d52cc58d --- /dev/null +++ b/crates/typst-pdf/src/catalog.rs @@ -0,0 +1,278 @@ +use std::num::NonZeroUsize; + +use ecow::eco_format; +use pdf_writer::{ + types::Direction, writers::PageLabel, Finish, Name, Pdf, Ref, Str, TextStr, +}; +use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter}; + +use typst::foundations::{Datetime, Smart}; +use typst::layout::Dir; +use typst::text::Lang; + +use crate::WithEverything; +use crate::{hash_base64, outline, page::PdfPageLabel}; + +/// Write the document catalog. +pub fn write_catalog( + ctx: WithEverything, + ident: Smart<&str>, + timestamp: Option, + pdf: &mut Pdf, + alloc: &mut Ref, +) { + let lang = ctx + .resources + .languages + .iter() + .max_by_key(|(_, &count)| count) + .map(|(&l, _)| l); + + let dir = if lang.map(Lang::dir) == Some(Dir::RTL) { + Direction::R2L + } else { + Direction::L2R + }; + + // Write the outline tree. + let outline_root_id = outline::write_outline(pdf, alloc, &ctx); + + // Write the page labels. + let page_labels = write_page_labels(pdf, alloc, &ctx); + + // Write the document information. + let info_ref = alloc.bump(); + let mut info = pdf.document_info(info_ref); + let mut xmp = XmpWriter::new(); + if let Some(title) = &ctx.document.title { + info.title(TextStr(title)); + xmp.title([(None, title.as_str())]); + } + + let authors = &ctx.document.author; + if !authors.is_empty() { + // Turns out that if the authors are given in both the document + // information dictionary and the XMP metadata, Acrobat takes a little + // bit of both: The first author from the document information + // dictionary and the remaining authors from the XMP metadata. + // + // To fix this for Acrobat, we could omit the remaining authors or all + // metadata from the document information catalog (it is optional) and + // only write XMP. However, not all other tools (including Apple + // Preview) read the XMP data. This means we do want to include all + // authors in the document information dictionary. + // + // Thus, the only alternative is to fold all authors into a single + // `` in the XMP metadata. This is, in fact, exactly what the + // PDF/A spec Part 1 section 6.7.3 has to say about the matter. It's a + // bit weird to not use the array (and it makes Acrobat show the author + // list in quotes), but there's not much we can do about that. + let joined = authors.join(", "); + info.author(TextStr(&joined)); + xmp.creator([joined.as_str()]); + } + + let creator = eco_format!("Typst {}", env!("CARGO_PKG_VERSION")); + info.creator(TextStr(&creator)); + xmp.creator_tool(&creator); + + let keywords = &ctx.document.keywords; + if !keywords.is_empty() { + let joined = keywords.join(", "); + info.keywords(TextStr(&joined)); + xmp.pdf_keywords(&joined); + } + + if let Some(date) = ctx.document.date.unwrap_or(timestamp) { + let tz = ctx.document.date.is_auto(); + if let Some(pdf_date) = pdf_date(date, tz) { + info.creation_date(pdf_date); + info.modified_date(pdf_date); + } + if let Some(xmp_date) = xmp_date(date, tz) { + xmp.create_date(xmp_date); + xmp.modify_date(xmp_date); + } + } + + info.finish(); + xmp.num_pages(ctx.document.pages.len() as u32); + xmp.format("application/pdf"); + xmp.language(ctx.resources.languages.keys().map(|lang| LangId(lang.as_str()))); + + // A unique ID for this instance of the document. Changes if anything + // changes in the frames. + let instance_id = hash_base64(&pdf.as_bytes()); + + // Determine the document's ID. It should be as stable as possible. + const PDF_VERSION: &str = "PDF-1.7"; + let doc_id = if let Smart::Custom(ident) = ident { + // We were provided with a stable ID. Yay! + hash_base64(&(PDF_VERSION, ident)) + } else if ctx.document.title.is_some() && !ctx.document.author.is_empty() { + // If not provided from the outside, but title and author were given, we + // compute a hash of them, which should be reasonably stable and unique. + hash_base64(&(PDF_VERSION, &ctx.document.title, &ctx.document.author)) + } else { + // The user provided no usable metadata which we can use as an `/ID`. + instance_id.clone() + }; + + // Write IDs. + xmp.document_id(&doc_id); + xmp.instance_id(&instance_id); + pdf.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes())); + + xmp.rendition_class(RenditionClass::Proof); + xmp.pdf_version("1.7"); + + let xmp_buf = xmp.finish(None); + let meta_ref = alloc.bump(); + pdf.stream(meta_ref, xmp_buf.as_bytes()) + .pair(Name(b"Type"), Name(b"Metadata")) + .pair(Name(b"Subtype"), Name(b"XML")); + + // Write the document catalog. + let catalog_ref = alloc.bump(); + let mut catalog = pdf.catalog(catalog_ref); + catalog.pages(ctx.page_tree_ref); + catalog.viewer_preferences().direction(dir); + catalog.metadata(meta_ref); + + // Write the named destination tree. + let mut name_dict = catalog.names(); + let mut dests_name_tree = name_dict.destinations(); + let mut names = dests_name_tree.names(); + for &(name, dest_ref, ..) in &ctx.references.named_destinations.dests { + names.insert(Str(name.as_str().as_bytes()), dest_ref); + } + names.finish(); + dests_name_tree.finish(); + name_dict.finish(); + + // Insert the page labels. + if !page_labels.is_empty() { + let mut num_tree = catalog.page_labels(); + let mut entries = num_tree.nums(); + for (n, r) in &page_labels { + entries.insert(n.get() as i32 - 1, *r); + } + } + + if let Some(outline_root_id) = outline_root_id { + catalog.outlines(outline_root_id); + } + + if let Some(lang) = lang { + catalog.lang(TextStr(lang.as_str())); + } + + catalog.finish(); +} + +/// Write the page labels. +pub(crate) fn write_page_labels( + chunk: &mut Pdf, + alloc: &mut Ref, + ctx: &WithEverything, +) -> Vec<(NonZeroUsize, Ref)> { + // If there is no exported page labeled, we skip the writing + if !ctx.pages.iter().filter_map(Option::as_ref).any(|p| { + p.label + .as_ref() + .is_some_and(|l| l.prefix.is_some() || l.style.is_some()) + }) { + return Vec::new(); + } + + let mut result = vec![]; + let empty_label = PdfPageLabel::default(); + let mut prev: Option<&PdfPageLabel> = None; + + // Skip non-exported pages for numbering. + for (i, page) in ctx.pages.iter().filter_map(Option::as_ref).enumerate() { + let nr = NonZeroUsize::new(1 + i).unwrap(); + // If there are pages with empty labels between labeled pages, we must + // write empty PageLabel entries. + let label = page.label.as_ref().unwrap_or(&empty_label); + + if let Some(pre) = prev { + if label.prefix == pre.prefix + && label.style == pre.style + && label.offset == pre.offset.map(|n| n.saturating_add(1)) + { + prev = Some(label); + continue; + } + } + + let id = alloc.bump(); + let mut entry = chunk.indirect(id).start::(); + + // Only add what is actually provided. Don't add empty prefix string if + // it wasn't given for example. + if let Some(prefix) = &label.prefix { + entry.prefix(TextStr(prefix)); + } + + if let Some(style) = label.style { + entry.style(style.to_pdf_numbering_style()); + } + + if let Some(offset) = label.offset { + entry.offset(offset.get() as i32); + } + + result.push((nr, id)); + prev = Some(label); + } + + result +} + +/// Converts a datetime to a pdf-writer date. +fn pdf_date(datetime: Datetime, tz: bool) -> Option { + let year = datetime.year().filter(|&y| y >= 0)? as u16; + + let mut pdf_date = pdf_writer::Date::new(year); + + if let Some(month) = datetime.month() { + pdf_date = pdf_date.month(month); + } + + if let Some(day) = datetime.day() { + pdf_date = pdf_date.day(day); + } + + if let Some(h) = datetime.hour() { + pdf_date = pdf_date.hour(h); + } + + if let Some(m) = datetime.minute() { + pdf_date = pdf_date.minute(m); + } + + if let Some(s) = datetime.second() { + pdf_date = pdf_date.second(s); + } + + if tz { + pdf_date = pdf_date.utc_offset_hour(0).utc_offset_minute(0); + } + + Some(pdf_date) +} + +/// Converts a datetime to an xmp-writer datetime. +fn xmp_date(datetime: Datetime, tz: bool) -> Option { + let year = datetime.year().filter(|&y| y >= 0)? as u16; + Some(DateTime { + year, + month: datetime.month(), + day: datetime.day(), + hour: datetime.hour(), + minute: datetime.minute(), + second: datetime.second(), + timezone: if tz { Some(Timezone::Utc) } else { None }, + }) +} diff --git a/crates/typst-pdf/src/color.rs b/crates/typst-pdf/src/color.rs index 4c35d9a23..dbd07d636 100644 --- a/crates/typst-pdf/src/color.rs +++ b/crates/typst-pdf/src/color.rs @@ -1,10 +1,8 @@ use once_cell::sync::Lazy; -use pdf_writer::types::DeviceNSubtype; -use pdf_writer::{writers, Chunk, Dict, Filter, Name, Ref}; +use pdf_writer::{types::DeviceNSubtype, writers, Chunk, Dict, Filter, Name, Ref}; use typst::visualize::{Color, ColorSpace, Paint}; -use crate::deflate; -use crate::page::{PageContext, Transforms}; +use crate::{content, deflate, PdfChunk, Renumber, WithResources}; // The names of the color spaces. pub const SRGB: Name<'static> = Name(b"srgb"); @@ -30,118 +28,166 @@ static OKLAB_DEFLATED: Lazy> = /// The color spaces present in the PDF document #[derive(Default)] pub struct ColorSpaces { - oklab: Option, - srgb: Option, - d65_gray: Option, + use_oklab: bool, + use_srgb: bool, + use_d65_gray: bool, use_linear_rgb: bool, } impl ColorSpaces { - /// Get a reference to the oklab color space. - /// - /// # Warning - /// The A and B components of the color must be offset by +0.4 before being - /// encoded into the PDF file. - pub fn oklab(&mut self, alloc: &mut Ref) -> Ref { - *self.oklab.get_or_insert_with(|| alloc.bump()) - } - - /// Get a reference to the srgb color space. - pub fn srgb(&mut self, alloc: &mut Ref) -> Ref { - *self.srgb.get_or_insert_with(|| alloc.bump()) - } - - /// Get a reference to the gray color space. - pub fn d65_gray(&mut self, alloc: &mut Ref) -> Ref { - *self.d65_gray.get_or_insert_with(|| alloc.bump()) - } - - /// Mark linear RGB as used. - pub fn linear_rgb(&mut self) { - self.use_linear_rgb = true; - } - - /// Write the color space on usage. - pub fn write( - &mut self, - color_space: ColorSpace, - writer: writers::ColorSpace, - alloc: &mut Ref, - ) { + /// Mark a color space as used. + pub fn mark_as_used(&mut self, color_space: ColorSpace) { match color_space { - ColorSpace::Oklab | ColorSpace::Hsl | ColorSpace::Hsv => { - let mut oklab = writer.device_n([OKLAB_L, OKLAB_A, OKLAB_B]); - self.write(ColorSpace::LinearRgb, oklab.alternate_color_space(), alloc); - oklab.tint_ref(self.oklab(alloc)); - oklab.attrs().subtype(DeviceNSubtype::DeviceN); + ColorSpace::Oklch | ColorSpace::Oklab | ColorSpace::Hsl | ColorSpace::Hsv => { + self.use_oklab = true; + self.use_linear_rgb = true; + } + ColorSpace::Srgb => { + self.use_srgb = true; + } + ColorSpace::D65Gray => { + self.use_d65_gray = true; } - ColorSpace::Oklch => self.write(ColorSpace::Oklab, writer, alloc), - ColorSpace::Srgb => writer.icc_based(self.srgb(alloc)), - ColorSpace::D65Gray => writer.icc_based(self.d65_gray(alloc)), ColorSpace::LinearRgb => { - writer.cal_rgb( - [0.9505, 1.0, 1.0888], - None, - Some([1.0, 1.0, 1.0]), - Some([ - 0.4124, 0.2126, 0.0193, 0.3576, 0.715, 0.1192, 0.1805, 0.0722, - 0.9505, - ]), - ); + self.use_linear_rgb = true; } - ColorSpace::Cmyk => writer.device_cmyk(), + ColorSpace::Cmyk => {} } } - // Write the color spaces to the PDF file. - pub fn write_color_spaces(&mut self, mut spaces: Dict, alloc: &mut Ref) { - if self.oklab.is_some() { - self.write(ColorSpace::Oklab, spaces.insert(OKLAB).start(), alloc); + /// Write the color spaces to the PDF file. + pub fn write_color_spaces(&self, mut spaces: Dict, refs: &ColorFunctionRefs) { + if self.use_oklab { + write(ColorSpace::Oklab, spaces.insert(OKLAB).start(), refs); } - if self.srgb.is_some() { - self.write(ColorSpace::Srgb, spaces.insert(SRGB).start(), alloc); + if self.use_srgb { + write(ColorSpace::Srgb, spaces.insert(SRGB).start(), refs); } - if self.d65_gray.is_some() { - self.write(ColorSpace::D65Gray, spaces.insert(D65_GRAY).start(), alloc); + if self.use_d65_gray { + write(ColorSpace::D65Gray, spaces.insert(D65_GRAY).start(), refs); } if self.use_linear_rgb { - self.write(ColorSpace::LinearRgb, spaces.insert(LINEAR_SRGB).start(), alloc); + write(ColorSpace::LinearRgb, spaces.insert(LINEAR_SRGB).start(), refs); } } /// Write the necessary color spaces functions and ICC profiles to the /// PDF file. - pub fn write_functions(&self, chunk: &mut Chunk) { + pub fn write_functions(&self, chunk: &mut Chunk, refs: &ColorFunctionRefs) { // Write the Oklab function & color space. - if let Some(oklab) = self.oklab { + if self.use_oklab { chunk - .post_script_function(oklab, &OKLAB_DEFLATED) + .post_script_function(refs.oklab.unwrap(), &OKLAB_DEFLATED) .domain([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]) .range([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]) .filter(Filter::FlateDecode); } // Write the sRGB color space. - if let Some(srgb) = self.srgb { + if self.use_srgb { chunk - .icc_profile(srgb, &SRGB_ICC_DEFLATED) + .icc_profile(refs.srgb.unwrap(), &SRGB_ICC_DEFLATED) .n(3) .range([0.0, 1.0, 0.0, 1.0, 0.0, 1.0]) .filter(Filter::FlateDecode); } // Write the gray color space. - if let Some(gray) = self.d65_gray { + if self.use_d65_gray { chunk - .icc_profile(gray, &GRAY_ICC_DEFLATED) + .icc_profile(refs.d65_gray.unwrap(), &GRAY_ICC_DEFLATED) .n(1) .range([0.0, 1.0]) .filter(Filter::FlateDecode); } } + + /// Merge two color space usage information together: a given color space is + /// considered to be used if it is used on either side. + pub fn merge(&mut self, other: &Self) { + self.use_d65_gray |= other.use_d65_gray; + self.use_linear_rgb |= other.use_linear_rgb; + self.use_oklab |= other.use_oklab; + self.use_srgb |= other.use_srgb; + } +} + +/// Write the color space. +pub fn write( + color_space: ColorSpace, + writer: writers::ColorSpace, + refs: &ColorFunctionRefs, +) { + match color_space { + ColorSpace::Oklab | ColorSpace::Hsl | ColorSpace::Hsv => { + let mut oklab = writer.device_n([OKLAB_L, OKLAB_A, OKLAB_B]); + write(ColorSpace::LinearRgb, oklab.alternate_color_space(), refs); + oklab.tint_ref(refs.oklab.unwrap()); + oklab.attrs().subtype(DeviceNSubtype::DeviceN); + } + ColorSpace::Oklch => write(ColorSpace::Oklab, writer, refs), + ColorSpace::Srgb => writer.icc_based(refs.srgb.unwrap()), + ColorSpace::D65Gray => writer.icc_based(refs.d65_gray.unwrap()), + ColorSpace::LinearRgb => { + writer.cal_rgb( + [0.9505, 1.0, 1.0888], + None, + Some([1.0, 1.0, 1.0]), + Some([ + 0.4124, 0.2126, 0.0193, 0.3576, 0.715, 0.1192, 0.1805, 0.0722, 0.9505, + ]), + ); + } + ColorSpace::Cmyk => writer.device_cmyk(), + } +} + +/// Global references for color conversion functions. +/// +/// These functions are only written once (at most, they are not written if not +/// needed) in the final document, and be shared by all color space +/// dictionaries. +pub struct ColorFunctionRefs { + oklab: Option, + srgb: Option, + d65_gray: Option, +} + +impl Renumber for ColorFunctionRefs { + fn renumber(&mut self, offset: i32) { + if let Some(r) = &mut self.oklab { + r.renumber(offset); + } + if let Some(r) = &mut self.srgb { + r.renumber(offset); + } + if let Some(r) = &mut self.d65_gray { + r.renumber(offset); + } + } +} + +/// Allocate all necessary [`ColorFunctionRefs`]. +pub fn alloc_color_functions_refs( + context: &WithResources, +) -> (PdfChunk, ColorFunctionRefs) { + let mut chunk = PdfChunk::new(); + let mut used_color_spaces = ColorSpaces::default(); + + context.resources.traverse(&mut |r| { + used_color_spaces.merge(&r.colors); + }); + + let refs = ColorFunctionRefs { + oklab: if used_color_spaces.use_oklab { Some(chunk.alloc()) } else { None }, + srgb: if used_color_spaces.use_srgb { Some(chunk.alloc()) } else { None }, + d65_gray: if used_color_spaces.use_d65_gray { Some(chunk.alloc()) } else { None }, + }; + + (chunk, refs) } /// This function removes comments, line spaces and carriage returns from a @@ -202,14 +248,29 @@ impl ColorEncode for ColorSpace { /// Encodes a paint into either a fill or stroke color. pub(super) trait PaintEncode { /// Set the paint as the fill color. - fn set_as_fill(&self, ctx: &mut PageContext, on_text: bool, transforms: Transforms); + fn set_as_fill( + &self, + ctx: &mut content::Builder, + on_text: bool, + transforms: content::Transforms, + ); /// Set the paint as the stroke color. - fn set_as_stroke(&self, ctx: &mut PageContext, on_text: bool, transforms: Transforms); + fn set_as_stroke( + &self, + ctx: &mut content::Builder, + on_text: bool, + transforms: content::Transforms, + ); } impl PaintEncode for Paint { - fn set_as_fill(&self, ctx: &mut PageContext, on_text: bool, transforms: Transforms) { + fn set_as_fill( + &self, + ctx: &mut content::Builder, + on_text: bool, + transforms: content::Transforms, + ) { match self { Self::Solid(c) => c.set_as_fill(ctx, on_text, transforms), Self::Gradient(gradient) => gradient.set_as_fill(ctx, on_text, transforms), @@ -219,9 +280,9 @@ impl PaintEncode for Paint { fn set_as_stroke( &self, - ctx: &mut PageContext, + ctx: &mut content::Builder, on_text: bool, - transforms: Transforms, + transforms: content::Transforms, ) { match self { Self::Solid(c) => c.set_as_stroke(ctx, on_text, transforms), @@ -232,10 +293,10 @@ impl PaintEncode for Paint { } impl PaintEncode for Color { - fn set_as_fill(&self, ctx: &mut PageContext, _: bool, _: Transforms) { + fn set_as_fill(&self, ctx: &mut content::Builder, _: bool, _: content::Transforms) { match self { Color::Luma(_) => { - ctx.parent.colors.d65_gray(&mut ctx.parent.alloc); + ctx.resources.colors.mark_as_used(ColorSpace::D65Gray); ctx.set_fill_color_space(D65_GRAY); let [l, _, _, _] = ColorSpace::D65Gray.encode(*self); @@ -243,21 +304,21 @@ impl PaintEncode for Color { } // Oklch is converted to Oklab. Color::Oklab(_) | Color::Oklch(_) | Color::Hsl(_) | Color::Hsv(_) => { - ctx.parent.colors.oklab(&mut ctx.parent.alloc); + ctx.resources.colors.mark_as_used(ColorSpace::Oklab); ctx.set_fill_color_space(OKLAB); let [l, a, b, _] = ColorSpace::Oklab.encode(*self); ctx.content.set_fill_color([l, a, b]); } Color::LinearRgb(_) => { - ctx.parent.colors.linear_rgb(); + ctx.resources.colors.mark_as_used(ColorSpace::LinearRgb); ctx.set_fill_color_space(LINEAR_SRGB); let [r, g, b, _] = ColorSpace::LinearRgb.encode(*self); ctx.content.set_fill_color([r, g, b]); } Color::Rgb(_) => { - ctx.parent.colors.srgb(&mut ctx.parent.alloc); + ctx.resources.colors.mark_as_used(ColorSpace::Srgb); ctx.set_fill_color_space(SRGB); let [r, g, b, _] = ColorSpace::Srgb.encode(*self); @@ -272,10 +333,10 @@ impl PaintEncode for Color { } } - fn set_as_stroke(&self, ctx: &mut PageContext, _: bool, _: Transforms) { + fn set_as_stroke(&self, ctx: &mut content::Builder, _: bool, _: content::Transforms) { match self { Color::Luma(_) => { - ctx.parent.colors.d65_gray(&mut ctx.parent.alloc); + ctx.resources.colors.mark_as_used(ColorSpace::D65Gray); ctx.set_stroke_color_space(D65_GRAY); let [l, _, _, _] = ColorSpace::D65Gray.encode(*self); @@ -283,21 +344,21 @@ impl PaintEncode for Color { } // Oklch is converted to Oklab. Color::Oklab(_) | Color::Oklch(_) | Color::Hsl(_) | Color::Hsv(_) => { - ctx.parent.colors.oklab(&mut ctx.parent.alloc); + ctx.resources.colors.mark_as_used(ColorSpace::Oklab); ctx.set_stroke_color_space(OKLAB); let [l, a, b, _] = ColorSpace::Oklab.encode(*self); ctx.content.set_stroke_color([l, a, b]); } Color::LinearRgb(_) => { - ctx.parent.colors.linear_rgb(); + ctx.resources.colors.mark_as_used(ColorSpace::LinearRgb); ctx.set_stroke_color_space(LINEAR_SRGB); let [r, g, b, _] = ColorSpace::LinearRgb.encode(*self); ctx.content.set_stroke_color([r, g, b]); } Color::Rgb(_) => { - ctx.parent.colors.srgb(&mut ctx.parent.alloc); + ctx.resources.colors.mark_as_used(ColorSpace::Srgb); ctx.set_stroke_color_space(SRGB); let [r, g, b, _] = ColorSpace::Srgb.encode(*self); diff --git a/crates/typst-pdf/src/color_font.rs b/crates/typst-pdf/src/color_font.rs new file mode 100644 index 000000000..641fe1560 --- /dev/null +++ b/crates/typst-pdf/src/color_font.rs @@ -0,0 +1,312 @@ +//! OpenType fonts generally define monochrome glyphs, but they can also define +//! glyphs with colors. This is how emojis are generally implemented for +//! example. +//! +//! There are various standards to represent color glyphs, but PDF readers don't +//! support any of them natively, so Typst has to handle them manually. + +use std::collections::HashMap; + +use ecow::eco_format; +use indexmap::IndexMap; +use pdf_writer::Filter; +use pdf_writer::{types::UnicodeCmap, Finish, Name, Rect, Ref}; +use ttf_parser::name_id; + +use typst::layout::Em; +use typst::text::{color::frame_for_glyph, Font}; + +use crate::resources::{Resources, ResourcesRefs}; +use crate::WithGlobalRefs; +use crate::{ + content, + font::{subset_tag, write_font_descriptor, CMAP_NAME, SYSTEM_INFO}, + EmExt, PdfChunk, +}; + +/// Write color fonts in the PDF document. +/// +/// They are written as Type3 fonts, which map glyph IDs to arbitrary PDF +/// instructions. +pub fn write_color_fonts( + context: &WithGlobalRefs, +) -> (PdfChunk, HashMap) { + let mut out = HashMap::new(); + let mut chunk = PdfChunk::new(); + context.resources.traverse(&mut |resources: &Resources| { + let Some(color_fonts) = &resources.color_fonts else { + return; + }; + + for (color_font, font_slice) in color_fonts.iter() { + if out.contains_key(&font_slice) { + continue; + } + + // Allocate some IDs. + let subfont_id = chunk.alloc(); + let cmap_ref = chunk.alloc(); + let descriptor_ref = chunk.alloc(); + let widths_ref = chunk.alloc(); + + // And a map between glyph IDs and the instructions to draw this + // glyph. + let mut glyphs_to_instructions = Vec::new(); + + let start = font_slice.subfont * 256; + let end = (start + 256).min(color_font.glyphs.len()); + let glyph_count = end - start; + let subset = &color_font.glyphs[start..end]; + let mut widths = Vec::new(); + let mut gids = Vec::new(); + + let scale_factor = font_slice.font.ttf().units_per_em() as f32; + + // Write the instructions for each glyph. + for color_glyph in subset { + let instructions_stream_ref = chunk.alloc(); + let width = font_slice + .font + .advance(color_glyph.gid) + .unwrap_or(Em::new(0.0)) + .to_font_units(); + widths.push(width); + chunk + .stream( + instructions_stream_ref, + color_glyph.instructions.content.wait(), + ) + .filter(Filter::FlateDecode); + + // Use this stream as instructions to draw the glyph. + glyphs_to_instructions.push(instructions_stream_ref); + gids.push(color_glyph.gid); + } + + // Write the Type3 font object. + let mut pdf_font = chunk.type3_font(subfont_id); + pdf_font.pair(Name(b"Resources"), color_fonts.resources.reference); + pdf_font.bbox(color_font.bbox); + pdf_font.matrix([1.0 / scale_factor, 0.0, 0.0, 1.0 / scale_factor, 0.0, 0.0]); + pdf_font.first_char(0); + pdf_font.last_char((glyph_count - 1) as u8); + pdf_font.pair(Name(b"Widths"), widths_ref); + pdf_font.to_unicode(cmap_ref); + pdf_font.font_descriptor(descriptor_ref); + + // Write the /CharProcs dictionary, that maps glyph names to + // drawing instructions. + let mut char_procs = pdf_font.char_procs(); + for (gid, instructions_ref) in glyphs_to_instructions.iter().enumerate() { + char_procs + .pair(Name(eco_format!("glyph{gid}").as_bytes()), *instructions_ref); + } + char_procs.finish(); + + // Write the /Encoding dictionary. + let names = (0..glyph_count) + .map(|gid| eco_format!("glyph{gid}")) + .collect::>(); + pdf_font + .encoding_custom() + .differences() + .consecutive(0, names.iter().map(|name| Name(name.as_bytes()))); + pdf_font.finish(); + + // Encode a CMAP to make it possible to search or copy glyphs. + let glyph_set = resources.glyph_sets.get(&font_slice.font).unwrap(); + let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); + for (index, glyph) in subset.iter().enumerate() { + let Some(text) = glyph_set.get(&glyph.gid) else { + continue; + }; + + if !text.is_empty() { + cmap.pair_with_multiple(index as u8, text.chars()); + } + } + chunk.cmap(cmap_ref, &cmap.finish()); + + // Write the font descriptor. + gids.sort(); + let subset_tag = subset_tag(&gids); + let postscript_name = font_slice + .font + .find_name(name_id::POST_SCRIPT_NAME) + .unwrap_or_else(|| "unknown".to_string()); + let base_font = eco_format!("{subset_tag}+{postscript_name}"); + write_font_descriptor( + &mut chunk, + descriptor_ref, + &font_slice.font, + &base_font, + ); + + // Write the widths array + chunk.indirect(widths_ref).array().items(widths); + + out.insert(font_slice, subfont_id); + } + }); + + (chunk, out) +} + +/// A mapping between `Font`s and all the corresponding `ColorFont`s. +/// +/// This mapping is one-to-many because there can only be 256 glyphs in a Type 3 +/// font, and fonts generally have more color glyphs than that. +pub struct ColorFontMap { + /// The mapping itself. + map: IndexMap, + /// The resources required to render the fonts in this map. + /// + /// For example, this can be the images for glyphs based on bitmaps or SVG. + pub resources: Resources, + /// The number of font slices (groups of 256 color glyphs), across all color + /// fonts. + total_slice_count: usize, +} + +/// A collection of Type3 font, belonging to the same TTF font. +pub struct ColorFont { + /// The IDs of each sub-slice of this font. They are the numbers after "Cf" + /// in the Resources dictionaries. + slice_ids: Vec, + /// The list of all color glyphs in this family. + /// + /// The index in this vector modulo 256 corresponds to the index in one of + /// the Type3 fonts in `refs` (the `n`-th in the vector, where `n` is the + /// quotient of the index divided by 256). + pub glyphs: Vec, + /// The global bounding box of the font. + pub bbox: Rect, + /// A mapping between glyph IDs and character indices in the `glyphs` + /// vector. + glyph_indices: HashMap, +} + +/// A single color glyph. +pub struct ColorGlyph { + /// The ID of the glyph. + pub gid: u16, + /// Instructions to draw the glyph. + pub instructions: content::Encoded, +} + +impl ColorFontMap<()> { + /// Creates a new empty mapping + pub fn new() -> Self { + Self { + map: IndexMap::new(), + total_slice_count: 0, + resources: Resources::default(), + } + } + + /// For a given glyph in a TTF font, give the ID of the Type3 font and the + /// index of the glyph inside of this Type3 font. + /// + /// If this is the first occurrence of this glyph in this font, it will + /// start its encoding and add it to the list of known glyphs. + pub fn get(&mut self, font: &Font, gid: u16) -> (usize, u8) { + let color_font = self.map.entry(font.clone()).or_insert_with(|| { + let global_bbox = font.ttf().global_bounding_box(); + let bbox = Rect::new( + font.to_em(global_bbox.x_min).to_font_units(), + font.to_em(global_bbox.y_min).to_font_units(), + font.to_em(global_bbox.x_max).to_font_units(), + font.to_em(global_bbox.y_max).to_font_units(), + ); + ColorFont { + bbox, + slice_ids: Vec::new(), + glyphs: Vec::new(), + glyph_indices: HashMap::new(), + } + }); + + if let Some(index_of_glyph) = color_font.glyph_indices.get(&gid) { + // If we already know this glyph, return it. + (color_font.slice_ids[index_of_glyph / 256], *index_of_glyph as u8) + } else { + // Otherwise, allocate a new ColorGlyph in the font, and a new Type3 font + // if needed + let index = color_font.glyphs.len(); + if index % 256 == 0 { + color_font.slice_ids.push(self.total_slice_count); + self.total_slice_count += 1; + } + + let frame = frame_for_glyph(font, gid); + let width = font.advance(gid).unwrap_or(Em::new(0.0)).to_font_units(); + let instructions = content::build(&mut self.resources, &frame, Some(width)); + color_font.glyphs.push(ColorGlyph { gid, instructions }); + color_font.glyph_indices.insert(gid, index); + + (color_font.slice_ids[index / 256], index as u8) + } + } + + /// Assign references to the resource dictionary used by this set of color + /// fonts. + pub fn with_refs(self, refs: &ResourcesRefs) -> ColorFontMap { + ColorFontMap { + map: self.map, + resources: self.resources.with_refs(refs), + total_slice_count: self.total_slice_count, + } + } +} + +impl ColorFontMap { + /// Iterate over all Type3 fonts. + /// + /// Each item of this iterator maps to a Type3 font: it contains + /// at most 256 glyphs. A same TTF font can yield multiple Type3 fonts. + pub fn iter(&self) -> ColorFontMapIter<'_, R> { + ColorFontMapIter { map: self, font_index: 0, slice_index: 0 } + } +} + +/// Iterator over a [`ColorFontMap`]. +/// +/// See [`ColorFontMap::iter`]. +pub struct ColorFontMapIter<'a, R> { + /// The map over which to iterate + map: &'a ColorFontMap, + /// The index of TTF font on which we currently iterate + font_index: usize, + /// The sub-font (slice of at most 256 glyphs) at which we currently are. + slice_index: usize, +} + +impl<'a, R> Iterator for ColorFontMapIter<'a, R> { + type Item = (&'a ColorFont, ColorFontSlice); + + fn next(&mut self) -> Option { + let (font, color_font) = self.map.map.get_index(self.font_index)?; + let slice_count = (color_font.glyphs.len() / 256) + 1; + + if self.slice_index >= slice_count { + self.font_index += 1; + self.slice_index = 0; + return self.next(); + } + + let slice = ColorFontSlice { font: font.clone(), subfont: self.slice_index }; + self.slice_index += 1; + Some((color_font, slice)) + } +} + +/// A set of at most 256 glyphs (a limit imposed on Type3 fonts by the PDF +/// specification) that represents a part of a TTF font. +#[derive(PartialEq, Eq, Hash, Debug, Clone)] +pub struct ColorFontSlice { + /// The original TTF font. + pub font: Font, + /// The index of the Type3 font, among all those that are necessary to + /// represent the subset of the TTF font we are interested in. + pub subfont: usize, +} diff --git a/crates/typst-pdf/src/content.rs b/crates/typst-pdf/src/content.rs new file mode 100644 index 000000000..c5327c188 --- /dev/null +++ b/crates/typst-pdf/src/content.rs @@ -0,0 +1,712 @@ +//! Generic writer for PDF content. +//! +//! It is used to write page contents, color glyph instructions, and patterns. +//! +//! See also [`pdf_writer::Content`]. + +use ecow::eco_format; +use pdf_writer::{ + types::{ColorSpaceOperand, LineCapStyle, LineJoinStyle, TextRenderingMode}, + Content, Finish, Name, Rect, Str, +}; +use typst::layout::{ + Abs, Em, Frame, FrameItem, GroupItem, Point, Ratio, Size, Transform, +}; +use typst::model::Destination; +use typst::text::{color::is_color_glyph, Font, TextItem, TextItemView}; +use typst::utils::{Deferred, Numeric, SliceExt}; +use typst::visualize::{ + FixedStroke, Geometry, Image, LineCap, LineJoin, Paint, Path, PathItem, Shape, +}; + +use crate::color_font::ColorFontMap; +use crate::extg::ExtGState; +use crate::image::deferred_image; +use crate::{color::PaintEncode, resources::Resources}; +use crate::{deflate_deferred, AbsExt, EmExt}; + +/// Encode a [`Frame`] into a content stream. +/// +/// The resources that were used in the stream will be added to `resources`. +/// +/// `color_glyph_width` should be `None` unless the `Frame` represents a [color +/// glyph]. +/// +/// [color glyph]: `crate::color_font` +pub fn build( + resources: &mut Resources<()>, + frame: &Frame, + color_glyph_width: Option, +) -> Encoded { + let size = frame.size(); + let mut ctx = Builder::new(resources, size); + + if let Some(width) = color_glyph_width { + ctx.content.start_color_glyph(width); + } + + // Make the coordinate system start at the top-left. + ctx.transform( + // Make the Y axis go upwards + Transform::scale(Ratio::one(), -Ratio::one()) + // Also move the origin to the top left corner + .post_concat(Transform::translate(Abs::zero(), size.y)), + ); + + // Encode the frame into the content stream. + write_frame(&mut ctx, frame); + + Encoded { + size, + content: deflate_deferred(ctx.content.finish()), + uses_opacities: ctx.uses_opacities, + links: ctx.links, + } +} + +/// An encoded content stream. +pub struct Encoded { + /// The dimensions of the content. + pub size: Size, + /// The actual content stream. + pub content: Deferred>, + /// Whether the content opacities. + pub uses_opacities: bool, + /// Links in the PDF coordinate system. + pub links: Vec<(Destination, Rect)>, +} + +/// An exporter for a single PDF content stream. +/// +/// Content streams are a series of PDF commands. They can reference external +/// objects only through resources. +/// +/// Content streams can be used for page contents, but also to describe color +/// glyphs and patterns. +pub struct Builder<'a, R = ()> { + /// A list of all resources that are used in the content stream. + pub(crate) resources: &'a mut Resources, + /// The PDF content stream that is being built. + pub content: Content, + /// Current graphic state. + state: State, + /// Stack of saved graphic states. + saves: Vec, + /// Wheter any stroke or fill was not totally opaque. + uses_opacities: bool, + /// All clickable links that are present in this content. + links: Vec<(Destination, Rect)>, +} + +impl<'a, R> Builder<'a, R> { + /// Create a new content builder. + pub fn new(resources: &'a mut Resources, size: Size) -> Self { + Builder { + resources, + uses_opacities: false, + content: Content::new(), + state: State::new(size), + saves: vec![], + links: vec![], + } + } +} + +/// A simulated graphics state used to deduplicate graphics state changes and +/// keep track of the current transformation matrix for link annotations. +#[derive(Debug, Clone)] +struct State { + /// The transform of the current item. + transform: Transform, + /// The transform of first hard frame in the hierarchy. + container_transform: Transform, + /// The size of the first hard frame in the hierarchy. + size: Size, + /// The current font. + font: Option<(Font, Abs)>, + /// The current fill paint. + fill: Option, + /// The color space of the current fill paint. + fill_space: Option>, + /// The current external graphic state. + external_graphics_state: Option, + /// The current stroke paint. + stroke: Option, + /// The color space of the current stroke paint. + stroke_space: Option>, + /// The current text rendering mode. + text_rendering_mode: TextRenderingMode, +} + +impl State { + /// Creates a new, clean state for a given `size`. + pub fn new(size: Size) -> Self { + Self { + transform: Transform::identity(), + container_transform: Transform::identity(), + size, + font: None, + fill: None, + fill_space: None, + external_graphics_state: None, + stroke: None, + stroke_space: None, + text_rendering_mode: TextRenderingMode::Fill, + } + } + + /// Creates the [`Transforms`] structure for the current item. + pub fn transforms(&self, size: Size, pos: Point) -> Transforms { + Transforms { + transform: self.transform.pre_concat(Transform::translate(pos.x, pos.y)), + container_transform: self.container_transform, + container_size: self.size, + size, + } + } +} + +/// Subset of the state used to calculate the transform of gradients and patterns. +#[derive(Debug, Clone, Copy)] +pub(super) struct Transforms { + /// The transform of the current item. + pub transform: Transform, + /// The transform of first hard frame in the hierarchy. + pub container_transform: Transform, + /// The size of the first hard frame in the hierarchy. + pub container_size: Size, + /// The size of the item. + pub size: Size, +} + +impl Builder<'_, ()> { + fn save_state(&mut self) { + self.saves.push(self.state.clone()); + self.content.save_state(); + } + + fn restore_state(&mut self) { + self.content.restore_state(); + self.state = self.saves.pop().expect("missing state save"); + } + + fn set_external_graphics_state(&mut self, graphics_state: &ExtGState) { + let current_state = self.state.external_graphics_state.as_ref(); + if current_state != Some(graphics_state) { + let index = self.resources.ext_gs.insert(*graphics_state); + let name = eco_format!("Gs{index}"); + self.content.set_parameters(Name(name.as_bytes())); + + if graphics_state.uses_opacities() { + self.uses_opacities = true; + } + } + } + + fn set_opacities(&mut self, stroke: Option<&FixedStroke>, fill: Option<&Paint>) { + let stroke_opacity = stroke + .map(|stroke| { + let color = match &stroke.paint { + Paint::Solid(color) => *color, + Paint::Gradient(_) | Paint::Pattern(_) => return 255, + }; + + color.alpha().map_or(255, |v| (v * 255.0).round() as u8) + }) + .unwrap_or(255); + let fill_opacity = fill + .map(|paint| { + let color = match paint { + Paint::Solid(color) => *color, + Paint::Gradient(_) | Paint::Pattern(_) => return 255, + }; + + color.alpha().map_or(255, |v| (v * 255.0).round() as u8) + }) + .unwrap_or(255); + self.set_external_graphics_state(&ExtGState { stroke_opacity, fill_opacity }); + } + + pub fn transform(&mut self, transform: Transform) { + let Transform { sx, ky, kx, sy, tx, ty } = transform; + self.state.transform = self.state.transform.pre_concat(transform); + if self.state.container_transform.is_identity() { + self.state.container_transform = self.state.transform; + } + self.content.transform([ + sx.get() as _, + ky.get() as _, + kx.get() as _, + sy.get() as _, + tx.to_f32(), + ty.to_f32(), + ]); + } + + fn group_transform(&mut self, transform: Transform) { + self.state.container_transform = + self.state.container_transform.pre_concat(transform); + } + + fn set_font(&mut self, font: &Font, size: Abs) { + if self.state.font.as_ref().map(|(f, s)| (f, *s)) != Some((font, size)) { + let index = self.resources.fonts.insert(font.clone()); + let name = eco_format!("F{index}"); + self.content.set_font(Name(name.as_bytes()), size.to_f32()); + self.state.font = Some((font.clone(), size)); + } + } + + fn size(&mut self, size: Size) { + self.state.size = size; + } + + fn set_fill(&mut self, fill: &Paint, on_text: bool, transforms: Transforms) { + if self.state.fill.as_ref() != Some(fill) + || matches!(self.state.fill, Some(Paint::Gradient(_))) + { + fill.set_as_fill(self, on_text, transforms); + self.state.fill = Some(fill.clone()); + } + } + + pub fn set_fill_color_space(&mut self, space: Name<'static>) { + if self.state.fill_space != Some(space) { + self.content.set_fill_color_space(ColorSpaceOperand::Named(space)); + self.state.fill_space = Some(space); + } + } + + pub fn reset_fill_color_space(&mut self) { + self.state.fill_space = None; + } + + fn set_stroke( + &mut self, + stroke: &FixedStroke, + on_text: bool, + transforms: Transforms, + ) { + if self.state.stroke.as_ref() != Some(stroke) + || matches!( + self.state.stroke.as_ref().map(|s| &s.paint), + Some(Paint::Gradient(_)) + ) + { + let FixedStroke { paint, thickness, cap, join, dash, miter_limit } = stroke; + paint.set_as_stroke(self, on_text, transforms); + + self.content.set_line_width(thickness.to_f32()); + if self.state.stroke.as_ref().map(|s| &s.cap) != Some(cap) { + self.content.set_line_cap(to_pdf_line_cap(*cap)); + } + if self.state.stroke.as_ref().map(|s| &s.join) != Some(join) { + self.content.set_line_join(to_pdf_line_join(*join)); + } + if self.state.stroke.as_ref().map(|s| &s.dash) != Some(dash) { + if let Some(pattern) = dash { + self.content.set_dash_pattern( + pattern.array.iter().map(|l| l.to_f32()), + pattern.phase.to_f32(), + ); + } else { + self.content.set_dash_pattern([], 0.0); + } + } + if self.state.stroke.as_ref().map(|s| &s.miter_limit) != Some(miter_limit) { + self.content.set_miter_limit(miter_limit.get() as f32); + } + self.state.stroke = Some(stroke.clone()); + } + } + + pub fn set_stroke_color_space(&mut self, space: Name<'static>) { + if self.state.stroke_space != Some(space) { + self.content.set_stroke_color_space(ColorSpaceOperand::Named(space)); + self.state.stroke_space = Some(space); + } + } + + pub fn reset_stroke_color_space(&mut self) { + self.state.stroke_space = None; + } + + fn set_text_rendering_mode(&mut self, mode: TextRenderingMode) { + if self.state.text_rendering_mode != mode { + self.content.set_text_rendering_mode(mode); + self.state.text_rendering_mode = mode; + } + } +} + +/// Encode a frame into the content stream. +pub(crate) fn write_frame(ctx: &mut Builder, frame: &Frame) { + for &(pos, ref item) in frame.items() { + let x = pos.x.to_f32(); + let y = pos.y.to_f32(); + match item { + FrameItem::Group(group) => write_group(ctx, pos, group), + FrameItem::Text(text) => write_text(ctx, pos, text), + FrameItem::Shape(shape, _) => write_shape(ctx, pos, shape), + FrameItem::Image(image, size, _) => write_image(ctx, x, y, image, *size), + FrameItem::Link(dest, size) => write_link(ctx, pos, dest, *size), + FrameItem::Tag(_) => {} + } + } +} + +/// Encode a group into the content stream. +fn write_group(ctx: &mut Builder, pos: Point, group: &GroupItem) { + let translation = Transform::translate(pos.x, pos.y); + + ctx.save_state(); + + if group.frame.kind().is_hard() { + ctx.group_transform( + ctx.state + .transform + .post_concat(ctx.state.container_transform.invert().unwrap()) + .pre_concat(translation) + .pre_concat(group.transform), + ); + ctx.size(group.frame.size()); + } + + ctx.transform(translation.pre_concat(group.transform)); + if let Some(clip_path) = &group.clip_path { + write_path(ctx, 0.0, 0.0, clip_path); + ctx.content.clip_nonzero(); + ctx.content.end_path(); + } + + write_frame(ctx, &group.frame); + ctx.restore_state(); +} + +/// Encode a text run into the content stream. +fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) { + let ttf = text.font.ttf(); + let tables = ttf.tables(); + + // If the text run contains either only color glyphs (used for emojis for + // example) or normal text we can render it directly + let has_color_glyphs = tables.sbix.is_some() + || tables.cbdt.is_some() + || tables.svg.is_some() + || tables.colr.is_some(); + if !has_color_glyphs { + write_normal_text(ctx, pos, TextItemView::all_of(text)); + return; + } + + let color_glyph_count = + text.glyphs.iter().filter(|g| is_color_glyph(&text.font, g)).count(); + + if color_glyph_count == text.glyphs.len() { + write_color_glyphs(ctx, pos, TextItemView::all_of(text)); + } else if color_glyph_count == 0 { + write_normal_text(ctx, pos, TextItemView::all_of(text)); + } else { + // Otherwise we need to split it in smaller text runs + let mut offset = 0; + let mut position_in_run = Abs::zero(); + for (color, sub_run) in + text.glyphs.group_by_key(|g| is_color_glyph(&text.font, g)) + { + let end = offset + sub_run.len(); + + // Build a sub text-run + let text_item_view = TextItemView::from_glyph_range(text, offset..end); + + // Adjust the position of the run on the line + let pos = pos + Point::new(position_in_run, Abs::zero()); + position_in_run += text_item_view.width(); + offset = end; + // Actually write the sub text-run + if color { + write_color_glyphs(ctx, pos, text_item_view); + } else { + write_normal_text(ctx, pos, text_item_view); + } + } + } +} + +/// Encodes a text run (without any color glyph) into the content stream. +fn write_normal_text(ctx: &mut Builder, pos: Point, text: TextItemView) { + let x = pos.x.to_f32(); + let y = pos.y.to_f32(); + + *ctx.resources.languages.entry(text.item.lang).or_insert(0) += text.glyph_range.len(); + + let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default(); + for g in text.glyphs() { + let t = text.text(); + let segment = &t[g.range()]; + glyph_set.entry(g.id).or_insert_with(|| segment.into()); + } + + let fill_transform = ctx.state.transforms(Size::zero(), pos); + ctx.set_fill(&text.item.fill, true, fill_transform); + + let stroke = text.item.stroke.as_ref().and_then(|stroke| { + if stroke.thickness.to_f32() > 0.0 { + Some(stroke) + } else { + None + } + }); + + if let Some(stroke) = stroke { + ctx.set_stroke(stroke, true, fill_transform); + ctx.set_text_rendering_mode(TextRenderingMode::FillStroke); + } else { + ctx.set_text_rendering_mode(TextRenderingMode::Fill); + } + + ctx.set_font(&text.item.font, text.item.size); + ctx.set_opacities(text.item.stroke.as_ref(), Some(&text.item.fill)); + ctx.content.begin_text(); + + // Position the text. + ctx.content.set_text_matrix([1.0, 0.0, 0.0, -1.0, x, y]); + + let mut positioned = ctx.content.show_positioned(); + let mut items = positioned.items(); + let mut adjustment = Em::zero(); + let mut encoded = vec![]; + + // Write the glyphs with kerning adjustments. + for glyph in text.glyphs() { + adjustment += glyph.x_offset; + + if !adjustment.is_zero() { + if !encoded.is_empty() { + items.show(Str(&encoded)); + encoded.clear(); + } + + items.adjust(-adjustment.to_font_units()); + adjustment = Em::zero(); + } + + let cid = crate::font::glyph_cid(&text.item.font, glyph.id); + encoded.push((cid >> 8) as u8); + encoded.push((cid & 0xff) as u8); + + if let Some(advance) = text.item.font.advance(glyph.id) { + adjustment += glyph.x_advance - advance; + } + + adjustment -= glyph.x_offset; + } + + if !encoded.is_empty() { + items.show(Str(&encoded)); + } + + items.finish(); + positioned.finish(); + ctx.content.end_text(); +} + +/// Encodes a text run made only of color glyphs into the content stream +fn write_color_glyphs(ctx: &mut Builder, pos: Point, text: TextItemView) { + let x = pos.x.to_f32(); + let y = pos.y.to_f32(); + + let mut last_font = None; + + ctx.content.begin_text(); + ctx.content.set_text_matrix([1.0, 0.0, 0.0, -1.0, x, y]); + // So that the next call to ctx.set_font() will change the font to one that + // displays regular glyphs and not color glyphs. + ctx.state.font = None; + + let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default(); + + for glyph in text.glyphs() { + // Retrieve the Type3 font reference and the glyph index in the font. + let color_fonts = ctx + .resources + .color_fonts + .get_or_insert_with(|| Box::new(ColorFontMap::new())); + let (font, index) = color_fonts.get(&text.item.font, glyph.id); + + if last_font != Some(font) { + ctx.content.set_font( + Name(eco_format!("Cf{}", font).as_bytes()), + text.item.size.to_f32(), + ); + last_font = Some(font); + } + + ctx.content.show(Str(&[index])); + + glyph_set + .entry(glyph.id) + .or_insert_with(|| text.text()[glyph.range()].into()); + } + ctx.content.end_text(); +} + +/// Encode a geometrical shape into the content stream. +fn write_shape(ctx: &mut Builder, pos: Point, shape: &Shape) { + let x = pos.x.to_f32(); + let y = pos.y.to_f32(); + + let stroke = shape.stroke.as_ref().and_then(|stroke| { + if stroke.thickness.to_f32() > 0.0 { + Some(stroke) + } else { + None + } + }); + + if shape.fill.is_none() && stroke.is_none() { + return; + } + + if let Some(fill) = &shape.fill { + ctx.set_fill(fill, false, ctx.state.transforms(shape.geometry.bbox_size(), pos)); + } + + if let Some(stroke) = stroke { + ctx.set_stroke( + stroke, + false, + ctx.state.transforms(shape.geometry.bbox_size(), pos), + ); + } + + ctx.set_opacities(stroke, shape.fill.as_ref()); + + match shape.geometry { + Geometry::Line(target) => { + let dx = target.x.to_f32(); + let dy = target.y.to_f32(); + ctx.content.move_to(x, y); + ctx.content.line_to(x + dx, y + dy); + } + Geometry::Rect(size) => { + let w = size.x.to_f32(); + let h = size.y.to_f32(); + if w.abs() > f32::EPSILON && h.abs() > f32::EPSILON { + ctx.content.rect(x, y, w, h); + } + } + Geometry::Path(ref path) => { + write_path(ctx, x, y, path); + } + } + + match (&shape.fill, stroke) { + (None, None) => unreachable!(), + (Some(_), None) => ctx.content.fill_nonzero(), + (None, Some(_)) => ctx.content.stroke(), + (Some(_), Some(_)) => ctx.content.fill_nonzero_and_stroke(), + }; +} + +/// Encode a bezier path into the content stream. +fn write_path(ctx: &mut Builder, x: f32, y: f32, path: &Path) { + for elem in &path.0 { + match elem { + PathItem::MoveTo(p) => { + ctx.content.move_to(x + p.x.to_f32(), y + p.y.to_f32()) + } + PathItem::LineTo(p) => { + ctx.content.line_to(x + p.x.to_f32(), y + p.y.to_f32()) + } + PathItem::CubicTo(p1, p2, p3) => ctx.content.cubic_to( + x + p1.x.to_f32(), + y + p1.y.to_f32(), + x + p2.x.to_f32(), + y + p2.y.to_f32(), + x + p3.x.to_f32(), + y + p3.y.to_f32(), + ), + PathItem::ClosePath => ctx.content.close_path(), + }; + } +} + +/// Encode a vector or raster image into the content stream. +fn write_image(ctx: &mut Builder, x: f32, y: f32, image: &Image, size: Size) { + let index = ctx.resources.images.insert(image.clone()); + ctx.resources.deferred_images.entry(index).or_insert_with(|| { + let (image, color_space) = deferred_image(image.clone()); + if let Some(color_space) = color_space { + ctx.resources.colors.mark_as_used(color_space); + } + image + }); + + let name = eco_format!("Im{index}"); + let w = size.x.to_f32(); + let h = size.y.to_f32(); + ctx.content.save_state(); + ctx.content.transform([w, 0.0, 0.0, -h, x, y + h]); + + if let Some(alt) = image.alt() { + let mut image_span = + ctx.content.begin_marked_content_with_properties(Name(b"Span")); + let mut image_alt = image_span.properties(); + image_alt.pair(Name(b"Alt"), pdf_writer::Str(alt.as_bytes())); + image_alt.finish(); + image_span.finish(); + + ctx.content.x_object(Name(name.as_bytes())); + ctx.content.end_marked_content(); + } else { + ctx.content.x_object(Name(name.as_bytes())); + } + + ctx.content.restore_state(); +} + +/// Save a link for later writing in the annotations dictionary. +fn write_link(ctx: &mut Builder, pos: Point, dest: &Destination, size: Size) { + let mut min_x = Abs::inf(); + let mut min_y = Abs::inf(); + let mut max_x = -Abs::inf(); + let mut max_y = -Abs::inf(); + + // Compute the bounding box of the transformed link. + for point in [ + pos, + pos + Point::with_x(size.x), + pos + Point::with_y(size.y), + pos + size.to_point(), + ] { + let t = point.transform(ctx.state.transform); + min_x.set_min(t.x); + min_y.set_min(t.y); + max_x.set_max(t.x); + max_y.set_max(t.y); + } + + let x1 = min_x.to_f32(); + let x2 = max_x.to_f32(); + let y1 = max_y.to_f32(); + let y2 = min_y.to_f32(); + let rect = Rect::new(x1, y1, x2, y2); + + ctx.links.push((dest.clone(), rect)); +} + +fn to_pdf_line_cap(cap: LineCap) -> LineCapStyle { + match cap { + LineCap::Butt => LineCapStyle::ButtCap, + LineCap::Round => LineCapStyle::RoundCap, + LineCap::Square => LineCapStyle::ProjectingSquareCap, + } +} + +fn to_pdf_line_join(join: LineJoin) -> LineJoinStyle { + match join { + LineJoin::Miter => LineJoinStyle::MiterJoin, + LineJoin::Round => LineJoinStyle::RoundJoin, + LineJoin::Bevel => LineJoinStyle::BevelJoin, + } +} diff --git a/crates/typst-pdf/src/extg.rs b/crates/typst-pdf/src/extg.rs index f3ad3815c..47d89b40f 100644 --- a/crates/typst-pdf/src/extg.rs +++ b/crates/typst-pdf/src/extg.rs @@ -1,4 +1,8 @@ -use crate::PdfContext; +use std::collections::HashMap; + +use pdf_writer::Ref; + +use crate::{PdfChunk, WithGlobalRefs}; /// A PDF external graphics state. #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] @@ -22,13 +26,25 @@ impl ExtGState { } /// Embed all used external graphics states into the PDF. -pub(crate) fn write_external_graphics_states(ctx: &mut PdfContext) { - for external_gs in ctx.extg_map.items() { - let id = ctx.alloc.bump(); - ctx.ext_gs_refs.push(id); - ctx.pdf - .ext_graphics(id) - .non_stroking_alpha(external_gs.fill_opacity as f32 / 255.0) - .stroking_alpha(external_gs.stroke_opacity as f32 / 255.0); - } +pub fn write_graphic_states( + context: &WithGlobalRefs, +) -> (PdfChunk, HashMap) { + let mut chunk = PdfChunk::new(); + let mut out = HashMap::new(); + context.resources.traverse(&mut |resources| { + for external_gs in resources.ext_gs.items() { + if out.contains_key(external_gs) { + continue; + } + + let id = chunk.alloc(); + out.insert(*external_gs, id); + chunk + .ext_graphics(id) + .non_stroking_alpha(external_gs.fill_opacity as f32 / 255.0) + .stroking_alpha(external_gs.stroke_opacity as f32 / 255.0); + } + }); + + (chunk, out) } diff --git a/crates/typst-pdf/src/font.rs b/crates/typst-pdf/src/font.rs index 5d32e5d99..22c3d22fe 100644 --- a/crates/typst-pdf/src/font.rs +++ b/crates/typst-pdf/src/font.rs @@ -1,24 +1,24 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::hash::Hash; use std::sync::Arc; use ecow::{eco_format, EcoString}; -use pdf_writer::types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap}; -use pdf_writer::writers::FontDescriptor; -use pdf_writer::{Filter, Finish, Name, Rect, Str}; +use pdf_writer::{ + types::{CidFontType, FontFlags, SystemInfo, UnicodeCmap}, + writers::FontDescriptor, + Chunk, Filter, Finish, Name, Rect, Ref, Str, +}; use ttf_parser::{name_id, GlyphId, Tag}; -use typst::layout::{Abs, Em, Ratio, Transform}; use typst::text::Font; use typst::utils::SliceExt; use unicode_properties::{GeneralCategory, UnicodeGeneralCategory}; -use crate::page::{write_frame, PageContext}; -use crate::{deflate, AbsExt, EmExt, PdfContext}; +use crate::{deflate, EmExt, PdfChunk, WithGlobalRefs}; const CFF: Tag = Tag::from_bytes(b"CFF "); const CFF2: Tag = Tag::from_bytes(b"CFF2"); -const CMAP_NAME: Name = Name(b"Custom"); -const SYSTEM_INFO: SystemInfo = SystemInfo { +pub(crate) const CMAP_NAME: Name = Name(b"Custom"); +pub(crate) const SYSTEM_INFO: SystemInfo = SystemInfo { registry: Str(b"Adobe"), ordering: Str(b"Identity"), supplement: 0, @@ -26,230 +26,127 @@ const SYSTEM_INFO: SystemInfo = SystemInfo { /// Embed all used fonts into the PDF. #[typst_macros::time(name = "write fonts")] -pub(crate) fn write_fonts(ctx: &mut PdfContext) { - write_color_fonts(ctx); - - for font in ctx.font_map.items() { - let type0_ref = ctx.alloc.bump(); - let cid_ref = ctx.alloc.bump(); - let descriptor_ref = ctx.alloc.bump(); - let cmap_ref = ctx.alloc.bump(); - let data_ref = ctx.alloc.bump(); - ctx.font_refs.push(type0_ref); - - let glyph_set = ctx.glyph_sets.get_mut(font).unwrap(); - let ttf = font.ttf(); - - // Do we have a TrueType or CFF font? - // - // FIXME: CFF2 must be handled differently and requires PDF 2.0 - // (or we have to convert it to CFF). - let is_cff = ttf - .raw_face() - .table(CFF) - .or_else(|| ttf.raw_face().table(CFF2)) - .is_some(); - - let postscript_name = font - .find_name(name_id::POST_SCRIPT_NAME) - .unwrap_or_else(|| "unknown".to_string()); - - let subset_tag = subset_tag(glyph_set); - let base_font = eco_format!("{subset_tag}+{postscript_name}"); - let base_font_type0 = if is_cff { - eco_format!("{base_font}-Identity-H") - } else { - base_font.clone() - }; - - // Write the base font object referencing the CID font. - ctx.pdf - .type0_font(type0_ref) - .base_font(Name(base_font_type0.as_bytes())) - .encoding_predefined(Name(b"Identity-H")) - .descendant_font(cid_ref) - .to_unicode(cmap_ref); - - // Write the CID font referencing the font descriptor. - let mut cid = ctx.pdf.cid_font(cid_ref); - cid.subtype(if is_cff { CidFontType::Type0 } else { CidFontType::Type2 }); - cid.base_font(Name(base_font.as_bytes())); - cid.system_info(SYSTEM_INFO); - cid.font_descriptor(descriptor_ref); - cid.default_width(0.0); - if !is_cff { - cid.cid_to_gid_map_predefined(Name(b"Identity")); - } - - // Extract the widths of all glyphs. - let mut widths = vec![]; - for gid in std::iter::once(0).chain(glyph_set.keys().copied()) { - let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0); - let units = font.to_em(width).to_font_units(); - let cid = glyph_cid(font, gid); - if usize::from(cid) >= widths.len() { - widths.resize(usize::from(cid) + 1, 0.0); - widths[usize::from(cid)] = units; - } - } - - // Write all non-zero glyph widths. - let mut first = 0; - let mut width_writer = cid.widths(); - for (w, group) in widths.group_by_key(|&w| w) { - let end = first + group.len(); - if w != 0.0 { - let last = end - 1; - width_writer.same(first as u16, last as u16, w); - } - first = end; - } - - width_writer.finish(); - cid.finish(); - - // Write the /ToUnicode character map, which maps glyph ids back to - // unicode codepoints to enable copying out of the PDF. - let cmap = create_cmap(font, glyph_set); - ctx.pdf.cmap(cmap_ref, &cmap.finish()); - - // Subset and write the font's bytes. - let glyphs: Vec<_> = glyph_set.keys().copied().collect(); - let data = subset_font(font, &glyphs); - - let mut stream = ctx.pdf.stream(data_ref, &data); - stream.filter(Filter::FlateDecode); - if is_cff { - stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C")); - } - - stream.finish(); - - let mut font_descriptor = - write_font_descriptor(&mut ctx.pdf, descriptor_ref, font, &base_font); - if is_cff { - font_descriptor.font_file3(data_ref); - } else { - font_descriptor.font_file2(data_ref); - } - } -} - -/// Writes color fonts as Type3 fonts -fn write_color_fonts(ctx: &mut PdfContext) { - let color_font_map = ctx.color_font_map.take_map(); - for (font, color_font) in color_font_map { - // For each Type3 font that is part of this family… - for (font_index, subfont_id) in color_font.refs.iter().enumerate() { - // Allocate some IDs. - let cmap_ref = ctx.alloc.bump(); - let descriptor_ref = ctx.alloc.bump(); - let widths_ref = ctx.alloc.bump(); - // And a map between glyph IDs and the instructions to draw this - // glyph. - let mut glyphs_to_instructions = Vec::new(); - - let start = font_index * 256; - let end = (start + 256).min(color_font.glyphs.len()); - let glyph_count = end - start; - let subset = &color_font.glyphs[start..end]; - let mut widths = Vec::new(); - let mut gids = Vec::new(); - - let scale_factor = font.ttf().units_per_em() as f32; - - // Write the instructions for each glyph. - for color_glyph in subset { - let instructions_stream_ref = ctx.alloc.bump(); - let width = - font.advance(color_glyph.gid).unwrap_or(Em::new(0.0)).to_font_units(); - widths.push(width); - // Create a fake page context for `write_frame`. We are only - // interested in the contents of the page. - let size = color_glyph.frame.size(); - let mut page_ctx = PageContext::new(ctx, size); - page_ctx.bottom = size.y.to_f32(); - page_ctx.content.start_color_glyph(width); - page_ctx.transform( - // Make the Y axis go upwards, while preserving aspect ratio - Transform::scale(Ratio::one(), -size.aspect_ratio()) - // Also move the origin to the top left corner - .post_concat(Transform::translate(Abs::zero(), size.y)), - ); - write_frame(&mut page_ctx, &color_glyph.frame); - - // Retrieve the stream of the page and write it. - let stream = page_ctx.content.finish(); - ctx.pdf.stream(instructions_stream_ref, &stream); - - // Use this stream as instructions to draw the glyph. - glyphs_to_instructions.push(instructions_stream_ref); - gids.push(color_glyph.gid); +pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap) { + let mut chunk = PdfChunk::new(); + let mut out = HashMap::new(); + context.resources.traverse(&mut |resources| { + for font in resources.fonts.items() { + if out.contains_key(font) { + continue; } - // Write the Type3 font object. - let mut pdf_font = ctx.pdf.type3_font(*subfont_id); - pdf_font.pair(Name(b"Resources"), ctx.type3_font_resources_ref); - pdf_font.bbox(color_font.bbox); - pdf_font.matrix([1.0 / scale_factor, 0.0, 0.0, 1.0 / scale_factor, 0.0, 0.0]); - pdf_font.first_char(0); - pdf_font.last_char((glyph_count - 1) as u8); - pdf_font.pair(Name(b"Widths"), widths_ref); - pdf_font.to_unicode(cmap_ref); - pdf_font.font_descriptor(descriptor_ref); + let type0_ref = chunk.alloc(); + let cid_ref = chunk.alloc(); + let descriptor_ref = chunk.alloc(); + let cmap_ref = chunk.alloc(); + let data_ref = chunk.alloc(); + out.insert(font.clone(), type0_ref); - // Write the /CharProcs dictionary, that maps glyph names to - // drawing instructions. - let mut char_procs = pdf_font.char_procs(); - for (gid, instructions_ref) in glyphs_to_instructions.iter().enumerate() { - char_procs - .pair(Name(eco_format!("glyph{gid}").as_bytes()), *instructions_ref); - } - char_procs.finish(); + let glyph_set = resources.glyph_sets.get(font).unwrap(); + let ttf = font.ttf(); - // Write the /Encoding dictionary. - let names = (0..glyph_count) - .map(|gid| eco_format!("glyph{gid}")) - .collect::>(); - pdf_font - .encoding_custom() - .differences() - .consecutive(0, names.iter().map(|name| Name(name.as_bytes()))); - pdf_font.finish(); + // Do we have a TrueType or CFF font? + // + // FIXME: CFF2 must be handled differently and requires PDF 2.0 + // (or we have to convert it to CFF). + let is_cff = ttf + .raw_face() + .table(CFF) + .or_else(|| ttf.raw_face().table(CFF2)) + .is_some(); - // Encode a CMAP to make it possible to search or copy glyphs. - let glyph_set = ctx.glyph_sets.get_mut(&font).unwrap(); - let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); - for (index, glyph) in subset.iter().enumerate() { - let Some(text) = glyph_set.get(&glyph.gid) else { - continue; - }; - - if !text.is_empty() { - cmap.pair_with_multiple(index as u8, text.chars()); - } - } - ctx.pdf.cmap(cmap_ref, &cmap.finish()); - - // Write the font descriptor. - gids.sort(); - let subset_tag = subset_tag(&gids); let postscript_name = font .find_name(name_id::POST_SCRIPT_NAME) .unwrap_or_else(|| "unknown".to_string()); - let base_font = eco_format!("{subset_tag}+{postscript_name}"); - write_font_descriptor(&mut ctx.pdf, descriptor_ref, &font, &base_font); - // Write the widths array - ctx.pdf.indirect(widths_ref).array().items(widths); + let subset_tag = subset_tag(glyph_set); + let base_font = eco_format!("{subset_tag}+{postscript_name}"); + let base_font_type0 = if is_cff { + eco_format!("{base_font}-Identity-H") + } else { + base_font.clone() + }; + + // Write the base font object referencing the CID font. + chunk + .type0_font(type0_ref) + .base_font(Name(base_font_type0.as_bytes())) + .encoding_predefined(Name(b"Identity-H")) + .descendant_font(cid_ref) + .to_unicode(cmap_ref); + + // Write the CID font referencing the font descriptor. + let mut cid = chunk.cid_font(cid_ref); + cid.subtype(if is_cff { CidFontType::Type0 } else { CidFontType::Type2 }); + cid.base_font(Name(base_font.as_bytes())); + cid.system_info(SYSTEM_INFO); + cid.font_descriptor(descriptor_ref); + cid.default_width(0.0); + if !is_cff { + cid.cid_to_gid_map_predefined(Name(b"Identity")); + } + + // Extract the widths of all glyphs. + let mut widths = vec![]; + for gid in std::iter::once(0).chain(glyph_set.keys().copied()) { + let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0); + let units = font.to_em(width).to_font_units(); + let cid = glyph_cid(font, gid); + if usize::from(cid) >= widths.len() { + widths.resize(usize::from(cid) + 1, 0.0); + widths[usize::from(cid)] = units; + } + } + + // Write all non-zero glyph widths. + let mut first = 0; + let mut width_writer = cid.widths(); + for (w, group) in widths.group_by_key(|&w| w) { + let end = first + group.len(); + if w != 0.0 { + let last = end - 1; + width_writer.same(first as u16, last as u16, w); + } + first = end; + } + + width_writer.finish(); + cid.finish(); + + // Write the /ToUnicode character map, which maps glyph ids back to + // unicode codepoints to enable copying out of the PDF. + let cmap = create_cmap(font, glyph_set); + chunk.cmap(cmap_ref, &cmap.finish()); + + // Subset and write the font's bytes. + let glyphs: Vec<_> = glyph_set.keys().copied().collect(); + let data = subset_font(font, &glyphs); + + let mut stream = chunk.stream(data_ref, &data); + stream.filter(Filter::FlateDecode); + if is_cff { + stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C")); + } + + stream.finish(); + + let mut font_descriptor = + write_font_descriptor(&mut chunk, descriptor_ref, font, &base_font); + if is_cff { + font_descriptor.font_file3(data_ref); + } else { + font_descriptor.font_file2(data_ref); + } } - } + }); + + (chunk, out) } /// Writes a FontDescriptor dictionary. -fn write_font_descriptor<'a>( - pdf: &'a mut pdf_writer::Pdf, - descriptor_ref: pdf_writer::Ref, +pub fn write_font_descriptor<'a>( + pdf: &'a mut Chunk, + descriptor_ref: Ref, font: &'a Font, base_font: &EcoString, ) -> FontDescriptor<'a> { @@ -317,7 +214,7 @@ fn subset_font(font: &Font, glyphs: &[u16]) -> Arc> { } /// Produce a unique 6 letter tag for a glyph set. -fn subset_tag(glyphs: &T) -> EcoString { +pub(crate) fn subset_tag(glyphs: &T) -> EcoString { const LEN: usize = 6; const BASE: u128 = 26; let mut hash = typst::utils::hash128(&glyphs); @@ -329,33 +226,40 @@ fn subset_tag(glyphs: &T) -> EcoString { std::str::from_utf8(&letter).unwrap().into() } -/// Create a /ToUnicode CMap. -fn create_cmap(font: &Font, glyph_set: &mut BTreeMap) -> UnicodeCmap { - let ttf = font.ttf(); +/// For glyphs that have codepoints mapping to them in the font's cmap table, we +/// prefer them over pre-existing text mappings from the document. Only things +/// that don't have a corresponding codepoint (or only a private-use one) like +/// the "Th" in Linux Libertine get the text of their first occurrences in the +/// document instead. +/// +/// This function replaces as much copepoints from the document with ones from +/// the cmap table as possible. +pub fn improve_glyph_sets(glyph_sets: &mut HashMap>) { + for (font, glyph_set) in glyph_sets { + let ttf = font.ttf(); - // For glyphs that have codepoints mapping to them in the font's cmap table, - // we prefer them over pre-existing text mappings from the document. Only - // things that don't have a corresponding codepoint (or only a private-use - // one) like the "Th" in Linux Libertine get the text of their first - // occurrences in the document instead. - for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) { - if !subtable.is_unicode() { - continue; + for subtable in ttf.tables().cmap.into_iter().flat_map(|table| table.subtables) { + if !subtable.is_unicode() { + continue; + } + + subtable.codepoints(|n| { + let Some(c) = std::char::from_u32(n) else { return }; + if c.general_category() == GeneralCategory::PrivateUse { + return; + } + + let Some(GlyphId(g)) = ttf.glyph_index(c) else { return }; + if glyph_set.contains_key(&g) { + glyph_set.insert(g, c.into()); + } + }); } - - subtable.codepoints(|n| { - let Some(c) = std::char::from_u32(n) else { return }; - if c.general_category() == GeneralCategory::PrivateUse { - return; - } - - let Some(GlyphId(g)) = ttf.glyph_index(c) else { return }; - if glyph_set.contains_key(&g) { - glyph_set.insert(g, c.into()); - } - }); } +} +/// Create a /ToUnicode CMap. +fn create_cmap(font: &Font, glyph_set: &BTreeMap) -> UnicodeCmap { // Produce a reverse mapping from glyphs' CIDs to unicode strings. let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); for (&g, text) in glyph_set.iter() { diff --git a/crates/typst-pdf/src/gradient.rs b/crates/typst-pdf/src/gradient.rs index 576c254e7..de77df336 100644 --- a/crates/typst-pdf/src/gradient.rs +++ b/crates/typst-pdf/src/gradient.rs @@ -1,19 +1,23 @@ +use std::collections::HashMap; use std::f32::consts::{PI, TAU}; use std::sync::Arc; use ecow::eco_format; -use pdf_writer::types::{ColorSpaceOperand, FunctionShadingType}; -use pdf_writer::writers::StreamShadingType; -use pdf_writer::{Filter, Finish, Name, Ref}; +use pdf_writer::{ + types::{ColorSpaceOperand, FunctionShadingType}, + writers::StreamShadingType, + Filter, Finish, Name, Ref, +}; + use typst::layout::{Abs, Angle, Point, Quadrant, Ratio, Transform}; use typst::utils::Numeric; use typst::visualize::{ Color, ColorSpace, Gradient, RatioOrAngle, RelativeTo, WeightedColor, }; -use crate::color::{ColorSpaceExt, PaintEncode, QuantizedColor}; -use crate::page::{PageContext, PageResource, ResourceKind, Transforms}; -use crate::{deflate, transform_to_array, AbsExt, PdfContext}; +use crate::color::{self, ColorSpaceExt, PaintEncode, QuantizedColor}; +use crate::{content, WithGlobalRefs}; +use crate::{deflate, transform_to_array, AbsExt, PdfChunk}; /// A unique-transform-aspect-ratio combination that will be encoded into the /// PDF. @@ -32,122 +36,144 @@ pub struct PdfGradient { /// Writes the actual gradients (shading patterns) to the PDF. /// This is performed once after writing all pages. -pub(crate) fn write_gradients(ctx: &mut PdfContext) { - for PdfGradient { transform, aspect_ratio, gradient, angle } in - ctx.gradient_map.items().cloned().collect::>() - { - let shading = ctx.alloc.bump(); - ctx.gradient_refs.push(shading); - - let color_space = if gradient.space().hue_index().is_some() { - ColorSpace::Oklab - } else { - gradient.space() - }; - - let mut shading_pattern = match &gradient { - Gradient::Linear(_) => { - let shading_function = shading_function(ctx, &gradient, color_space); - let mut shading_pattern = ctx.pdf.shading_pattern(shading); - let mut shading = shading_pattern.function_shading(); - shading.shading_type(FunctionShadingType::Axial); - - ctx.colors.write(color_space, shading.color_space(), &mut ctx.alloc); - - let (mut sin, mut cos) = (angle.sin(), angle.cos()); - - // Scale to edges of unit square. - let factor = cos.abs() + sin.abs(); - sin *= factor; - cos *= factor; - - let (x1, y1, x2, y2): (f64, f64, f64, f64) = match angle.quadrant() { - Quadrant::First => (0.0, 0.0, cos, sin), - Quadrant::Second => (1.0, 0.0, cos + 1.0, sin), - Quadrant::Third => (1.0, 1.0, cos + 1.0, sin + 1.0), - Quadrant::Fourth => (0.0, 1.0, cos, sin + 1.0), - }; - - shading - .anti_alias(gradient.anti_alias()) - .function(shading_function) - .coords([x1 as f32, y1 as f32, x2 as f32, y2 as f32]) - .extend([true; 2]); - - shading.finish(); - - shading_pattern +pub fn write_gradients( + context: &WithGlobalRefs, +) -> (PdfChunk, HashMap) { + let mut chunk = PdfChunk::new(); + let mut out = HashMap::new(); + context.resources.traverse(&mut |resources| { + for pdf_gradient in resources.gradients.items() { + if out.contains_key(pdf_gradient) { + continue; } - Gradient::Radial(radial) => { - let shading_function = shading_function(ctx, &gradient, color_space); - let mut shading_pattern = ctx.pdf.shading_pattern(shading); - let mut shading = shading_pattern.function_shading(); - shading.shading_type(FunctionShadingType::Radial); - ctx.colors.write(color_space, shading.color_space(), &mut ctx.alloc); + let shading = chunk.alloc(); + out.insert(pdf_gradient.clone(), shading); - shading - .anti_alias(gradient.anti_alias()) - .function(shading_function) - .coords([ - radial.focal_center.x.get() as f32, - radial.focal_center.y.get() as f32, - radial.focal_radius.get() as f32, - radial.center.x.get() as f32, - radial.center.y.get() as f32, - radial.radius.get() as f32, - ]) - .extend([true; 2]); + let PdfGradient { transform, aspect_ratio, gradient, angle } = pdf_gradient; - shading.finish(); + let color_space = if gradient.space().hue_index().is_some() { + ColorSpace::Oklab + } else { + gradient.space() + }; - shading_pattern - } - Gradient::Conic(_) => { - let vertices = compute_vertex_stream(&gradient, aspect_ratio); + let mut shading_pattern = match &gradient { + Gradient::Linear(_) => { + let shading_function = + shading_function(gradient, &mut chunk, color_space); + let mut shading_pattern = chunk.chunk.shading_pattern(shading); + let mut shading = shading_pattern.function_shading(); + shading.shading_type(FunctionShadingType::Axial); - let stream_shading_id = ctx.alloc.bump(); - let mut stream_shading = - ctx.pdf.stream_shading(stream_shading_id, &vertices); + color::write( + color_space, + shading.color_space(), + &context.globals.color_functions, + ); - ctx.colors.write( - color_space, - stream_shading.color_space(), - &mut ctx.alloc, - ); + let (mut sin, mut cos) = (angle.sin(), angle.cos()); - let range = color_space.range(); - stream_shading - .bits_per_coordinate(16) - .bits_per_component(16) - .bits_per_flag(8) - .shading_type(StreamShadingType::CoonsPatch) - .decode([ - 0.0, 1.0, 0.0, 1.0, range[0], range[1], range[2], range[3], - range[4], range[5], - ]) - .anti_alias(gradient.anti_alias()) - .filter(Filter::FlateDecode); + // Scale to edges of unit square. + let factor = cos.abs() + sin.abs(); + sin *= factor; + cos *= factor; - stream_shading.finish(); + let (x1, y1, x2, y2): (f64, f64, f64, f64) = match angle.quadrant() { + Quadrant::First => (0.0, 0.0, cos, sin), + Quadrant::Second => (1.0, 0.0, cos + 1.0, sin), + Quadrant::Third => (1.0, 1.0, cos + 1.0, sin + 1.0), + Quadrant::Fourth => (0.0, 1.0, cos, sin + 1.0), + }; - let mut shading_pattern = ctx.pdf.shading_pattern(shading); - shading_pattern.shading_ref(stream_shading_id); - shading_pattern - } - }; + shading + .anti_alias(gradient.anti_alias()) + .function(shading_function) + .coords([x1 as f32, y1 as f32, x2 as f32, y2 as f32]) + .extend([true; 2]); - shading_pattern.matrix(transform_to_array(transform)); - } + shading.finish(); + + shading_pattern + } + Gradient::Radial(radial) => { + let shading_function = + shading_function(gradient, &mut chunk, color_space_of(gradient)); + let mut shading_pattern = chunk.chunk.shading_pattern(shading); + let mut shading = shading_pattern.function_shading(); + shading.shading_type(FunctionShadingType::Radial); + + color::write( + color_space, + shading.color_space(), + &context.globals.color_functions, + ); + + shading + .anti_alias(gradient.anti_alias()) + .function(shading_function) + .coords([ + radial.focal_center.x.get() as f32, + radial.focal_center.y.get() as f32, + radial.focal_radius.get() as f32, + radial.center.x.get() as f32, + radial.center.y.get() as f32, + radial.radius.get() as f32, + ]) + .extend([true; 2]); + + shading.finish(); + + shading_pattern + } + Gradient::Conic(_) => { + let vertices = compute_vertex_stream(gradient, *aspect_ratio); + + let stream_shading_id = chunk.alloc(); + let mut stream_shading = + chunk.chunk.stream_shading(stream_shading_id, &vertices); + + color::write( + color_space, + stream_shading.color_space(), + &context.globals.color_functions, + ); + + let range = color_space.range(); + stream_shading + .bits_per_coordinate(16) + .bits_per_component(16) + .bits_per_flag(8) + .shading_type(StreamShadingType::CoonsPatch) + .decode([ + 0.0, 1.0, 0.0, 1.0, range[0], range[1], range[2], range[3], + range[4], range[5], + ]) + .anti_alias(gradient.anti_alias()) + .filter(Filter::FlateDecode); + + stream_shading.finish(); + + let mut shading_pattern = chunk.shading_pattern(shading); + shading_pattern.shading_ref(stream_shading_id); + shading_pattern + } + }; + + shading_pattern.matrix(transform_to_array(*transform)); + } + }); + + (chunk, out) } /// Writes an expotential or stitched function that expresses the gradient. fn shading_function( - ctx: &mut PdfContext, gradient: &Gradient, + chunk: &mut PdfChunk, color_space: ColorSpace, ) -> Ref { - let function = ctx.alloc.bump(); + let function = chunk.alloc(); let mut functions = vec![]; let mut bounds = vec![]; let mut encode = vec![]; @@ -166,7 +192,7 @@ fn shading_function( let real_t = first.1.get() * (1.0 - t) + second.1.get() * t; let c = gradient.sample(RatioOrAngle::Ratio(Ratio::new(real_t))); - functions.push(single_gradient(ctx, last_c, c, color_space)); + functions.push(single_gradient(chunk, last_c, c, color_space)); bounds.push(real_t as f32); encode.extend([0.0, 1.0]); last_c = c; @@ -174,7 +200,7 @@ fn shading_function( } bounds.push(second.1.get() as f32); - functions.push(single_gradient(ctx, first.0, second.0, color_space)); + functions.push(single_gradient(chunk, first.0, second.0, color_space)); encode.extend([0.0, 1.0]); } @@ -187,7 +213,7 @@ fn shading_function( bounds.pop(); // Create the stitching function. - ctx.pdf + chunk .stitching_function(function) .domain([0.0, 1.0]) .range(color_space.range()) @@ -201,14 +227,13 @@ fn shading_function( /// Writes an expontential function that expresses a single segment (between two /// stops) of a gradient. fn single_gradient( - ctx: &mut PdfContext, + chunk: &mut PdfChunk, first_color: Color, second_color: Color, color_space: ColorSpace, ) -> Ref { - let reference = ctx.alloc.bump(); - - ctx.pdf + let reference = chunk.alloc(); + chunk .exponential_function(reference) .range(color_space.range()) .c0(color_space.convert(first_color)) @@ -220,7 +245,12 @@ fn single_gradient( } impl PaintEncode for Gradient { - fn set_as_fill(&self, ctx: &mut PageContext, on_text: bool, transforms: Transforms) { + fn set_as_fill( + &self, + ctx: &mut content::Builder, + on_text: bool, + transforms: content::Transforms, + ) { ctx.reset_fill_color_space(); let index = register_gradient(ctx, self, on_text, transforms); @@ -229,15 +259,13 @@ impl PaintEncode for Gradient { ctx.content.set_fill_color_space(ColorSpaceOperand::Pattern); ctx.content.set_fill_pattern(None, name); - ctx.resources - .insert(PageResource::new(ResourceKind::Gradient, id), index); } fn set_as_stroke( &self, - ctx: &mut PageContext, + ctx: &mut content::Builder, on_text: bool, - transforms: Transforms, + transforms: content::Transforms, ) { ctx.reset_stroke_color_space(); @@ -247,17 +275,15 @@ impl PaintEncode for Gradient { ctx.content.set_stroke_color_space(ColorSpaceOperand::Pattern); ctx.content.set_stroke_pattern(None, name); - ctx.resources - .insert(PageResource::new(ResourceKind::Gradient, id), index); } } /// Deduplicates a gradient to a named PDF resource. fn register_gradient( - ctx: &mut PageContext, + ctx: &mut content::Builder, gradient: &Gradient, on_text: bool, - mut transforms: Transforms, + mut transforms: content::Transforms, ) -> usize { // Edge cases for strokes. if transforms.size.x.is_zero() { @@ -307,7 +333,9 @@ fn register_gradient( angle: Gradient::correct_aspect_ratio(rotation, size.aspect_ratio()), }; - ctx.parent.gradient_map.insert(pdf_gradient) + ctx.resources.colors.mark_as_used(color_space_of(gradient)); + + ctx.resources.gradients.insert(pdf_gradient) } /// Writes a single Coons Patch as defined in the PDF specification @@ -466,3 +494,11 @@ fn compute_vertex_stream(gradient: &Gradient, aspect_ratio: Ratio) -> Arc ColorSpace { + if gradient.space().hue_index().is_some() { + ColorSpace::Oklab + } else { + gradient.space() + } +} diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 7d108d6d9..9da7158ca 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -8,14 +8,119 @@ use typst::visualize::{ ColorSpace, Image, ImageKind, RasterFormat, RasterImage, SvgImage, }; -use crate::{deflate, PdfContext}; +use crate::{color, deflate, PdfChunk, WithGlobalRefs}; + +/// Embed all used images into the PDF. +#[typst_macros::time(name = "write images")] +pub fn write_images(context: &WithGlobalRefs) -> (PdfChunk, HashMap) { + let mut chunk = PdfChunk::new(); + let mut out = HashMap::new(); + context.resources.traverse(&mut |resources| { + for (i, image) in resources.images.items().enumerate() { + if out.contains_key(image) { + continue; + } + + let handle = resources.deferred_images.get(&i).unwrap(); + match handle.wait() { + EncodedImage::Raster { + data, + filter, + has_color, + width, + height, + icc, + alpha, + } => { + let image_ref = chunk.alloc(); + out.insert(image.clone(), image_ref); + + let mut image = chunk.chunk.image_xobject(image_ref, data); + image.filter(*filter); + image.width(*width as i32); + image.height(*height as i32); + image.bits_per_component(8); + + let mut icc_ref = None; + let space = image.color_space(); + if icc.is_some() { + let id = chunk.alloc.bump(); + space.icc_based(id); + icc_ref = Some(id); + } else if *has_color { + color::write( + ColorSpace::Srgb, + space, + &context.globals.color_functions, + ); + } else { + color::write( + ColorSpace::D65Gray, + space, + &context.globals.color_functions, + ); + } + + // Add a second gray-scale image containing the alpha values if + // this image has an alpha channel. + if let Some((alpha_data, alpha_filter)) = alpha { + let mask_ref = chunk.alloc.bump(); + image.s_mask(mask_ref); + image.finish(); + + let mut mask = chunk.image_xobject(mask_ref, alpha_data); + mask.filter(*alpha_filter); + mask.width(*width as i32); + mask.height(*height as i32); + mask.color_space().device_gray(); + mask.bits_per_component(8); + } else { + image.finish(); + } + + if let (Some(icc), Some(icc_ref)) = (icc, icc_ref) { + let mut stream = chunk.icc_profile(icc_ref, icc); + stream.filter(Filter::FlateDecode); + if *has_color { + stream.n(3); + stream.alternate().srgb(); + } else { + stream.n(1); + stream.alternate().d65_gray(); + } + } + } + EncodedImage::Svg(svg_chunk) => { + let mut map = HashMap::new(); + svg_chunk.renumber_into(&mut chunk.chunk, |old| { + *map.entry(old).or_insert_with(|| chunk.alloc.bump()) + }); + out.insert(image.clone(), map[&Ref::new(1)]); + } + } + } + }); + + (chunk, out) +} /// Creates a new PDF image from the given image. /// /// Also starts the deferred encoding of the image. #[comemo::memoize] -pub fn deferred_image(image: Image) -> Deferred { - Deferred::new(move || match image.kind() { +pub fn deferred_image(image: Image) -> (Deferred, Option) { + let color_space = match image.kind() { + ImageKind::Raster(raster) if raster.icc().is_none() => { + if raster.dynamic().color().channel_count() > 2 { + Some(ColorSpace::Srgb) + } else { + Some(ColorSpace::D65Gray) + } + } + _ => None, + }; + + let deferred = Deferred::new(move || match image.kind() { ImageKind::Raster(raster) => { let raster = raster.clone(); let (width, height) = (raster.width(), raster.height()); @@ -28,83 +133,9 @@ pub fn deferred_image(image: Image) -> Deferred { EncodedImage::Raster { data, filter, has_color, width, height, icc, alpha } } ImageKind::Svg(svg) => EncodedImage::Svg(encode_svg(svg)), - }) -} + }); -/// Embed all used images into the PDF. -#[typst_macros::time(name = "write images")] -pub(crate) fn write_images(ctx: &mut PdfContext) { - for (i, _) in ctx.image_map.items().enumerate() { - let handle = ctx.image_deferred_map.get(&i).unwrap(); - match handle.wait() { - EncodedImage::Raster { - data, - filter, - has_color, - width, - height, - icc, - alpha, - } => { - let image_ref = ctx.alloc.bump(); - ctx.image_refs.push(image_ref); - - let mut image = ctx.pdf.image_xobject(image_ref, data); - image.filter(*filter); - image.width(*width as i32); - image.height(*height as i32); - image.bits_per_component(8); - - let mut icc_ref = None; - let space = image.color_space(); - if icc.is_some() { - let id = ctx.alloc.bump(); - space.icc_based(id); - icc_ref = Some(id); - } else if *has_color { - ctx.colors.write(ColorSpace::Srgb, space, &mut ctx.alloc); - } else { - ctx.colors.write(ColorSpace::D65Gray, space, &mut ctx.alloc); - } - - // Add a second gray-scale image containing the alpha values if - // this image has an alpha channel. - if let Some((alpha_data, alpha_filter)) = alpha { - let mask_ref = ctx.alloc.bump(); - image.s_mask(mask_ref); - image.finish(); - - let mut mask = ctx.pdf.image_xobject(mask_ref, alpha_data); - mask.filter(*alpha_filter); - mask.width(*width as i32); - mask.height(*height as i32); - mask.color_space().device_gray(); - mask.bits_per_component(8); - } else { - image.finish(); - } - - if let (Some(icc), Some(icc_ref)) = (icc, icc_ref) { - let mut stream = ctx.pdf.icc_profile(icc_ref, icc); - stream.filter(Filter::FlateDecode); - if *has_color { - stream.n(3); - stream.alternate().srgb(); - } else { - stream.n(1); - stream.alternate().d65_gray(); - } - } - } - EncodedImage::Svg(chunk) => { - let mut map = HashMap::new(); - chunk.renumber_into(&mut ctx.pdf, |old| { - *map.entry(old).or_insert_with(|| ctx.alloc.bump()) - }); - ctx.image_refs.push(map[&Ref::new(1)]); - } - } - } + (deferred, color_space) } /// Encode an image with a suitable filter and return the data, filter and diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs index 61906b1ff..9af830bd3 100644 --- a/crates/typst-pdf/src/lib.rs +++ b/crates/typst-pdf/src/lib.rs @@ -1,40 +1,45 @@ //! Exporting of Typst documents into PDFs. +mod catalog; mod color; +mod color_font; +mod content; mod extg; mod font; mod gradient; mod image; +mod named_destination; mod outline; mod page; mod pattern; +mod resources; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::HashMap; use std::hash::Hash; -use std::sync::Arc; +use std::ops::{Deref, DerefMut}; use base64::Engine; -use ecow::{eco_format, EcoString}; -use indexmap::IndexMap; -use pdf_writer::types::Direction; -use pdf_writer::writers::Destination; -use pdf_writer::{Finish, Name, Pdf, Rect, Ref, Str, TextStr}; -use typst::foundations::{Datetime, Label, NativeElement, Smart}; -use typst::introspection::Location; -use typst::layout::{Abs, Dir, Em, Frame, PageRanges, Transform}; -use typst::model::{Document, HeadingElem}; -use typst::text::color::frame_for_glyph; -use typst::text::{Font, Lang}; +use pdf_writer::{Chunk, Pdf, Ref}; +use typst::foundations::{Datetime, Smart}; +use typst::layout::{Abs, Em, PageRanges, Transform}; +use typst::model::Document; +use typst::text::Font; use typst::utils::Deferred; use typst::visualize::Image; -use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter}; -use crate::color::ColorSpaces; -use crate::extg::ExtGState; -use crate::gradient::PdfGradient; -use crate::image::EncodedImage; -use crate::page::EncodedPage; -use crate::pattern::PdfPattern; +use crate::catalog::write_catalog; +use crate::color::{alloc_color_functions_refs, ColorFunctionRefs}; +use crate::color_font::{write_color_fonts, ColorFontSlice}; +use crate::extg::{write_graphic_states, ExtGState}; +use crate::font::write_fonts; +use crate::gradient::{write_gradients, PdfGradient}; +use crate::image::write_images; +use crate::named_destination::{write_named_destinations, NamedDestinations}; +use crate::page::{alloc_page_refs, traverse_pages, write_page_tree, EncodedPage}; +use crate::pattern::{write_patterns, PdfPattern}; +use crate::resources::{ + alloc_resources_refs, write_resource_dictionaries, Resources, ResourcesRefs, +}; /// Export a document into a PDF file. /// @@ -65,311 +70,389 @@ pub fn pdf( timestamp: Option, page_ranges: Option, ) -> Vec { - let mut ctx = PdfContext::new(document, page_ranges); - page::construct_pages(&mut ctx, &document.pages); - font::write_fonts(&mut ctx); - image::write_images(&mut ctx); - gradient::write_gradients(&mut ctx); - extg::write_external_graphics_states(&mut ctx); - pattern::write_patterns(&mut ctx); - write_named_destinations(&mut ctx); - page::write_page_tree(&mut ctx); - page::write_global_resources(&mut ctx); - write_catalog(&mut ctx, ident, timestamp); - ctx.pdf.finish() + PdfBuilder::new(document, page_ranges) + .phase(|builder| builder.run(traverse_pages)) + .phase(|builder| GlobalRefs { + color_functions: builder.run(alloc_color_functions_refs), + pages: builder.run(alloc_page_refs), + resources: builder.run(alloc_resources_refs), + }) + .phase(|builder| References { + named_destinations: builder.run(write_named_destinations), + fonts: builder.run(write_fonts), + color_fonts: builder.run(write_color_fonts), + images: builder.run(write_images), + gradients: builder.run(write_gradients), + patterns: builder.run(write_patterns), + ext_gs: builder.run(write_graphic_states), + }) + .phase(|builder| builder.run(write_page_tree)) + .phase(|builder| builder.run(write_resource_dictionaries)) + .export_with(ident, timestamp, write_catalog) } -/// Context for exporting a whole PDF document. -struct PdfContext<'a> { - /// The document that we're currently exporting. - document: &'a Document, - /// The writer we are writing the PDF into. +/// A struct to build a PDF following a fixed succession of phases. +/// +/// This type uses generics to represent its current state. `S` (for "state") is +/// all data that was produced by the previous phases, that is now read-only. +/// +/// Phase after phase, this state will be transformed. Each phase corresponds to +/// a call to the [eponymous function](`PdfBuilder::phase`) and produces a new +/// part of the state, that will be aggregated with all other information, for +/// consumption during the next phase. +/// +/// In other words: this struct follows the **typestate pattern**. This prevents +/// you from using data that is not yet available, at the type level. +/// +/// Each phase consists of processes, that can read the state of the previous +/// phases, and construct a part of the new state. +/// +/// A final step, that has direct access to the global reference allocator and +/// PDF document, can be run with [`PdfBuilder::export_with`]. +struct PdfBuilder { + /// The context that has been accumulated so far. + state: S, + /// A global bump allocator. + alloc: Ref, + /// The PDF document that is being written. pdf: Pdf, - /// Content of exported pages. - pages: Vec>, +} + +/// The initial state: we are exploring the document, collecting all resources +/// that will be necessary later. The content of the pages is also built during +/// this phase. +struct WithDocument<'a> { + /// The Typst document that is exported. + document: &'a Document, /// Page ranges to export. /// When `None`, all pages are exported. exported_pages: Option, - /// For each font a mapping from used glyphs to their text representation. - /// May contain multiple chars in case of ligatures or similar things. The - /// same glyph can have a different text representation within one document, - /// then we just save the first one. The resulting strings are used for the - /// PDF's /ToUnicode map for glyphs that don't have an entry in the font's - /// cmap. This is important for copy-paste and searching. - glyph_sets: HashMap>, - /// The number of glyphs for all referenced languages in the document. - /// We keep track of this to determine the main document language. - /// BTreeMap is used to write sorted list of languages to metadata. - languages: BTreeMap, +} - /// Allocator for indirect reference IDs. - alloc: Ref, - /// The ID of the page tree. - page_tree_ref: Ref, - /// The ID of the globally shared Resources dictionary. - global_resources_ref: Ref, - /// The ID of the resource dictionary shared by Type3 fonts. +/// At this point, resources were listed, but they don't have any reference +/// associated with them. +/// +/// This phase allocates some global references. +struct WithResources<'a> { + document: &'a Document, + exported_pages: Option, + /// The content of the pages encoded as PDF content streams. /// - /// Type3 fonts cannot use the global resources, as it would create some - /// kind of infinite recursion (they are themselves present in that - /// dictionary), which Acrobat doesn't appreciate (it fails to parse the - /// font) even if the specification seems to allow it. - type3_font_resources_ref: Ref, - /// The IDs of written fonts. - font_refs: Vec, - /// The IDs of written images. - image_refs: Vec, - /// The IDs of written gradients. - gradient_refs: Vec, - /// The IDs of written patterns. - pattern_refs: Vec, - /// The IDs of written external graphics states. - ext_gs_refs: Vec, - /// Handles color space writing. - colors: ColorSpaces, - - /// Deduplicates fonts used across the document. - font_map: Remapper, - /// Deduplicates images used across the document. - image_map: Remapper, - /// Handles to deferred image conversions. - image_deferred_map: HashMap>, - /// Deduplicates gradients used across the document. - gradient_map: Remapper, - /// Deduplicates patterns used across the document. - pattern_map: Remapper, - /// Deduplicates external graphics states used across the document. - extg_map: Remapper, - /// Deduplicates color glyphs. - color_font_map: ColorFontMap, - - /// A sorted list of all named destinations. - dests: Vec<(Label, Ref)>, - /// Maps from locations to named destinations that point to them. - loc_to_dest: HashMap, + /// The pages are at the index corresponding to their page number, but they + /// may be `None` if they are not in the range specified by + /// `exported_pages`. + pages: Vec>, + /// The PDF resources that are used in the content of the pages. + resources: Resources<()>, } -impl<'a> PdfContext<'a> { - fn new(document: &'a Document, page_ranges: Option) -> Self { - let mut alloc = Ref::new(1); - let page_tree_ref = alloc.bump(); - let global_resources_ref = alloc.bump(); - let type3_font_resources_ref = alloc.bump(); +/// Global references. +struct GlobalRefs { + /// References for color conversion functions. + color_functions: ColorFunctionRefs, + /// Reference for pages. + /// + /// Items of this vector are `None` if the corresponding page is not + /// exported. + pages: Vec>, + /// References for the resource dictionaries. + resources: ResourcesRefs, +} + +impl<'a> From<(WithDocument<'a>, (Vec>, Resources<()>))> + for WithResources<'a> +{ + fn from( + (previous, (pages, resources)): ( + WithDocument<'a>, + (Vec>, Resources<()>), + ), + ) -> Self { Self { - document, - pdf: Pdf::new(), - pages: vec![], - exported_pages: page_ranges, - glyph_sets: HashMap::new(), - languages: BTreeMap::new(), - alloc, + document: previous.document, + exported_pages: previous.exported_pages, + pages, + resources, + } + } +} + +/// At this point, the resources have been collected, and global references have +/// been allocated. +/// +/// We are now writing objects corresponding to resources, and giving them references, +/// that will be collected in [`References`]. +struct WithGlobalRefs<'a> { + document: &'a Document, + exported_pages: Option, + pages: Vec>, + /// Resources are the same as in previous phases, but each dictionary now has a reference. + resources: Resources, + /// Global references that were just allocated. + globals: GlobalRefs, +} + +impl<'a> From<(WithResources<'a>, GlobalRefs)> for WithGlobalRefs<'a> { + fn from((previous, globals): (WithResources<'a>, GlobalRefs)) -> Self { + Self { + document: previous.document, + exported_pages: previous.exported_pages, + pages: previous.pages, + resources: previous.resources.with_refs(&globals.resources), + globals, + } + } +} + +/// The references that have been assigned to each object. +struct References { + /// List of named destinations, each with an ID. + named_destinations: NamedDestinations, + /// The IDs of written fonts. + fonts: HashMap, + /// The IDs of written color fonts. + color_fonts: HashMap, + /// The IDs of written images. + images: HashMap, + /// The IDs of written gradients. + gradients: HashMap, + /// The IDs of written patterns. + patterns: HashMap, + /// The IDs of written external graphics states. + ext_gs: HashMap, +} + +/// At this point, the references have been assigned to all resources. The page +/// tree is going to be written, and given a reference. It is also at this point that +/// the page contents is actually written. +struct WithRefs<'a> { + globals: GlobalRefs, + document: &'a Document, + pages: Vec>, + exported_pages: Option, + resources: Resources, + /// References that were allocated for resources. + references: References, +} + +impl<'a> From<(WithGlobalRefs<'a>, References)> for WithRefs<'a> { + fn from((previous, references): (WithGlobalRefs<'a>, References)) -> Self { + Self { + globals: previous.globals, + exported_pages: previous.exported_pages, + document: previous.document, + pages: previous.pages, + resources: previous.resources, + references, + } + } +} + +/// In this phase, we write resource dictionaries. +/// +/// Each sub-resource gets its own isolated resource dictionary. +struct WithEverything<'a> { + globals: GlobalRefs, + document: &'a Document, + pages: Vec>, + exported_pages: Option, + resources: Resources, + references: References, + /// Reference that was allocated for the page tree. + page_tree_ref: Ref, +} + +impl<'a> From<(WithEverything<'a>, ())> for WithEverything<'a> { + fn from((this, _): (WithEverything<'a>, ())) -> Self { + this + } +} + +impl<'a> From<(WithRefs<'a>, Ref)> for WithEverything<'a> { + fn from((previous, page_tree_ref): (WithRefs<'a>, Ref)) -> Self { + Self { + exported_pages: previous.exported_pages, + globals: previous.globals, + document: previous.document, + resources: previous.resources, + references: previous.references, + pages: previous.pages, page_tree_ref, - global_resources_ref, - type3_font_resources_ref, - font_refs: vec![], - image_refs: vec![], - gradient_refs: vec![], - pattern_refs: vec![], - ext_gs_refs: vec![], - colors: ColorSpaces::default(), - font_map: Remapper::new(), - image_map: Remapper::new(), - image_deferred_map: HashMap::default(), - gradient_map: Remapper::new(), - pattern_map: Remapper::new(), - extg_map: Remapper::new(), - color_font_map: ColorFontMap::new(), - dests: vec![], - loc_to_dest: HashMap::new(), } } } -/// Write the document catalog. -fn write_catalog(ctx: &mut PdfContext, ident: Smart<&str>, timestamp: Option) { - let lang = ctx.languages.iter().max_by_key(|(_, &count)| count).map(|(&l, _)| l); - - let dir = if lang.map(Lang::dir) == Some(Dir::RTL) { - Direction::R2L - } else { - Direction::L2R - }; - - // Write the outline tree. - let outline_root_id = outline::write_outline(ctx); - - // Write the page labels. - let page_labels = page::write_page_labels(ctx); - - // Write the document information. - let mut info = ctx.pdf.document_info(ctx.alloc.bump()); - let mut xmp = XmpWriter::new(); - if let Some(title) = &ctx.document.title { - info.title(TextStr(title)); - xmp.title([(None, title.as_str())]); - } - - let authors = &ctx.document.author; - if !authors.is_empty() { - // Turns out that if the authors are given in both the document - // information dictionary and the XMP metadata, Acrobat takes a little - // bit of both: The first author from the document information - // dictionary and the remaining authors from the XMP metadata. - // - // To fix this for Acrobat, we could omit the remaining authors or all - // metadata from the document information catalog (it is optional) and - // only write XMP. However, not all other tools (including Apple - // Preview) read the XMP data. This means we do want to include all - // authors in the document information dictionary. - // - // Thus, the only alternative is to fold all authors into a single - // `` in the XMP metadata. This is, in fact, exactly what the - // PDF/A spec Part 1 section 6.7.3 has to say about the matter. It's a - // bit weird to not use the array (and it makes Acrobat show the author - // list in quotes), but there's not much we can do about that. - let joined = authors.join(", "); - info.author(TextStr(&joined)); - xmp.creator([joined.as_str()]); - } - - let creator = eco_format!("Typst {}", env!("CARGO_PKG_VERSION")); - info.creator(TextStr(&creator)); - xmp.creator_tool(&creator); - - let keywords = &ctx.document.keywords; - if !keywords.is_empty() { - let joined = keywords.join(", "); - info.keywords(TextStr(&joined)); - xmp.pdf_keywords(&joined); - } - - if let Some(date) = ctx.document.date.unwrap_or(timestamp) { - let tz = ctx.document.date.is_auto(); - if let Some(pdf_date) = pdf_date(date, tz) { - info.creation_date(pdf_date); - info.modified_date(pdf_date); - } - if let Some(xmp_date) = xmp_date(date, tz) { - xmp.create_date(xmp_date); - xmp.modify_date(xmp_date); +impl<'a> PdfBuilder> { + /// Start building a PDF for a Typst document. + fn new(document: &'a Document, exported_pages: Option) -> Self { + Self { + alloc: Ref::new(1), + pdf: Pdf::new(), + state: WithDocument { document, exported_pages }, } } - - info.finish(); - // Only count exported pages. - xmp.num_pages(ctx.pages.iter().filter(|page| page.is_some()).count() as u32); - xmp.format("application/pdf"); - xmp.language(ctx.languages.keys().map(|lang| LangId(lang.as_str()))); - - // A unique ID for this instance of the document. Changes if anything - // changes in the frames. - let instance_id = hash_base64(&ctx.pdf.as_bytes()); - - // Determine the document's ID. It should be as stable as possible. - const PDF_VERSION: &str = "PDF-1.7"; - let doc_id = if let Smart::Custom(ident) = ident { - // We were provided with a stable ID. Yay! - hash_base64(&(PDF_VERSION, ident)) - } else if ctx.document.title.is_some() && !ctx.document.author.is_empty() { - // If not provided from the outside, but title and author were given, we - // compute a hash of them, which should be reasonably stable and unique. - hash_base64(&(PDF_VERSION, &ctx.document.title, &ctx.document.author)) - } else { - // The user provided no usable metadata which we can use as an `/ID`. - instance_id.clone() - }; - - // Write IDs. - xmp.document_id(&doc_id); - xmp.instance_id(&instance_id); - ctx.pdf - .set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes())); - - xmp.rendition_class(RenditionClass::Proof); - xmp.pdf_version("1.7"); - - let xmp_buf = xmp.finish(None); - let meta_ref = ctx.alloc.bump(); - ctx.pdf - .stream(meta_ref, xmp_buf.as_bytes()) - .pair(Name(b"Type"), Name(b"Metadata")) - .pair(Name(b"Subtype"), Name(b"XML")); - - // Write the document catalog. - let mut catalog = ctx.pdf.catalog(ctx.alloc.bump()); - catalog.pages(ctx.page_tree_ref); - catalog.viewer_preferences().direction(dir); - catalog.metadata(meta_ref); - - // Write the named destination tree. - let mut name_dict = catalog.names(); - let mut dests_name_tree = name_dict.destinations(); - let mut names = dests_name_tree.names(); - for &(name, dest_ref, ..) in &ctx.dests { - names.insert(Str(name.as_str().as_bytes()), dest_ref); - } - names.finish(); - dests_name_tree.finish(); - name_dict.finish(); - - // Insert the page labels. - if !page_labels.is_empty() { - let mut num_tree = catalog.page_labels(); - let mut entries = num_tree.nums(); - for (n, r) in &page_labels { - entries.insert(n.get() as i32 - 1, *r); - } - } - - if let Some(outline_root_id) = outline_root_id { - catalog.outlines(outline_root_id); - } - - if let Some(lang) = lang { - catalog.lang(TextStr(lang.as_str())); - } - - catalog.finish(); } -/// Fills in the map and vector for named destinations and writes the indirect -/// destination objects. -fn write_named_destinations(ctx: &mut PdfContext) { - let mut seen = HashSet::new(); - - // Find all headings that have a label and are the first among other - // headings with the same label. - let mut matches: Vec<_> = ctx - .document - .introspector - .query(&HeadingElem::elem().select()) - .iter() - .filter_map(|elem| elem.location().zip(elem.label())) - .filter(|&(_, label)| seen.insert(label)) - .collect(); - - // Named destinations must be sorted by key. - matches.sort_by_key(|&(_, label)| label); - - for (loc, label) in matches { - let pos = ctx.document.introspector.position(loc); - let index = pos.page.get() - 1; - let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); - - // If the heading's page exists and is exported, include it. - if let Some(Some(page)) = ctx.pages.get(index) { - let dest_ref = ctx.alloc.bump(); - let x = pos.point.x.to_f32(); - let y = (page.size.y - y).to_f32(); - ctx.dests.push((label, dest_ref)); - ctx.loc_to_dest.insert(loc, label); - ctx.pdf - .indirect(dest_ref) - .start::() - .page(page.id) - .xyz(x, y, None); +impl PdfBuilder { + /// Start a new phase, and save its output in the global state. + fn phase(mut self, builder: B) -> PdfBuilder + where + // New state + NS: From<(S, O)>, + // Builder + B: Fn(&mut Self) -> O, + { + let output = builder(&mut self); + PdfBuilder { + state: NS::from((self.state, output)), + alloc: self.alloc, + pdf: self.pdf, } } + + /// Runs a step with the current state, merge its output in the PDF file, + /// and renumber any references it returned. + fn run(&mut self, process: P) -> O + where + // Process + P: Fn(&S) -> (PdfChunk, O), + // Output + O: Renumber, + { + let (chunk, mut output) = process(&self.state); + // Allocate a final reference for each temporary one + let allocated = chunk.alloc.get() - TEMPORARY_REFS_START; + let offset = TEMPORARY_REFS_START - self.alloc.get(); + + // Merge the chunk into the PDF, using the new references + chunk.renumber_into(&mut self.pdf, |mut r| { + r.renumber(offset); + + r + }); + + // Also update the references in the output + output.renumber(offset); + + self.alloc = Ref::new(self.alloc.get() + allocated); + + output + } + + /// Finalize the PDF export and returns the buffer representing the + /// document. + fn export_with

( + mut self, + ident: Smart<&str>, + timestamp: Option, + process: P, + ) -> Vec + where + P: Fn(S, Smart<&str>, Option, &mut Pdf, &mut Ref), + { + process(self.state, ident, timestamp, &mut self.pdf, &mut self.alloc); + self.pdf.finish() + } +} + +/// A reference or collection of references that can be re-numbered, +/// to become valid in a global scope. +trait Renumber { + /// Renumber this value by shifting any references it contains by `offset`. + fn renumber(&mut self, offset: i32); +} + +impl Renumber for () { + fn renumber(&mut self, _offset: i32) {} +} + +impl Renumber for Ref { + fn renumber(&mut self, offset: i32) { + if self.get() >= TEMPORARY_REFS_START { + *self = Ref::new(self.get() - offset); + } + } +} + +impl Renumber for Vec { + fn renumber(&mut self, offset: i32) { + for item in self { + item.renumber(offset); + } + } +} + +impl Renumber for HashMap { + fn renumber(&mut self, offset: i32) { + for v in self.values_mut() { + v.renumber(offset); + } + } +} + +impl Renumber for Option { + fn renumber(&mut self, offset: i32) { + if let Some(r) = self { + r.renumber(offset) + } + } +} + +impl Renumber for (T, R) { + fn renumber(&mut self, offset: i32) { + self.1.renumber(offset) + } +} + +/// A portion of a PDF file. +struct PdfChunk { + /// The actual chunk. + chunk: Chunk, + /// A local allocator. + alloc: Ref, +} + +/// Any reference below that value was already allocated before and +/// should not be rewritten. Anything above was allocated in the current +/// chunk, and should be remapped. +/// +/// This is a constant (large enough to avoid collisions) and not +/// dependant on self.alloc to allow for better memoization of steps, if +/// needed in the future. +const TEMPORARY_REFS_START: i32 = 1_000_000_000; + +/// A part of a PDF document. +impl PdfChunk { + /// Start writing a new part of the document. + fn new() -> Self { + PdfChunk { + chunk: Chunk::new(), + alloc: Ref::new(TEMPORARY_REFS_START), + } + } + + /// Allocate a reference that is valid in the context of this chunk. + /// + /// References allocated with this function should be [renumbered](`Renumber::renumber`) + /// before being used in other chunks. This is done automatically if these + /// references are stored in the global `PdfBuilder` state. + fn alloc(&mut self) -> Ref { + self.alloc.bump() + } +} + +impl Deref for PdfChunk { + type Target = Chunk; + + fn deref(&self) -> &Self::Target { + &self.chunk + } +} + +impl DerefMut for PdfChunk { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.chunk + } } /// Compress data with the DEFLATE algorithm. @@ -378,12 +461,6 @@ fn deflate(data: &[u8]) -> Vec { miniz_oxide::deflate::compress_to_vec_zlib(data, COMPRESSION_LEVEL) } -/// Memoized version of [`deflate`] specialized for a page's content stream. -#[comemo::memoize] -fn deflate_memoized(content: &[u8]) -> Arc> { - Arc::new(deflate(content)) -} - /// Memoized and deferred version of [`deflate`] specialized for a page's content /// stream. #[comemo::memoize] @@ -397,182 +474,6 @@ fn hash_base64(value: &T) -> String { .encode(typst::utils::hash128(value).to_be_bytes()) } -/// Converts a datetime to a pdf-writer date. -fn pdf_date(datetime: Datetime, tz: bool) -> Option { - let year = datetime.year().filter(|&y| y >= 0)? as u16; - - let mut pdf_date = pdf_writer::Date::new(year); - - if let Some(month) = datetime.month() { - pdf_date = pdf_date.month(month); - } - - if let Some(day) = datetime.day() { - pdf_date = pdf_date.day(day); - } - - if let Some(h) = datetime.hour() { - pdf_date = pdf_date.hour(h); - } - - if let Some(m) = datetime.minute() { - pdf_date = pdf_date.minute(m); - } - - if let Some(s) = datetime.second() { - pdf_date = pdf_date.second(s); - } - - if tz { - pdf_date = pdf_date.utc_offset_hour(0).utc_offset_minute(0); - } - - Some(pdf_date) -} - -/// Converts a datetime to an xmp-writer datetime. -fn xmp_date(datetime: Datetime, tz: bool) -> Option { - let year = datetime.year().filter(|&y| y >= 0)? as u16; - Some(DateTime { - year, - month: datetime.month(), - day: datetime.day(), - hour: datetime.hour(), - minute: datetime.minute(), - second: datetime.second(), - timezone: if tz { Some(Timezone::Utc) } else { None }, - }) -} - -/// Assigns new, consecutive PDF-internal indices to items. -struct Remapper { - /// Forwards from the items to the pdf indices. - to_pdf: HashMap, - /// Backwards from the pdf indices to the items. - to_items: Vec, -} - -impl Remapper -where - T: Eq + Hash + Clone, -{ - fn new() -> Self { - Self { to_pdf: HashMap::new(), to_items: vec![] } - } - - fn insert(&mut self, item: T) -> usize { - let to_layout = &mut self.to_items; - *self.to_pdf.entry(item.clone()).or_insert_with(|| { - let pdf_index = to_layout.len(); - to_layout.push(item); - pdf_index - }) - } - - fn pdf_indices<'a>( - &'a self, - refs: &'a [Ref], - ) -> impl Iterator + 'a { - refs.iter().copied().zip(0..self.to_pdf.len()) - } - - fn items(&self) -> impl Iterator + '_ { - self.to_items.iter() - } -} - -/// A mapping between `Font`s and all the corresponding `ColorFont`s. -/// -/// This mapping is one-to-many because there can only be 256 glyphs in a Type 3 -/// font, and fonts generally have more color glyphs than that. -struct ColorFontMap { - /// The mapping itself - map: IndexMap, - /// A list of all PDF indirect references to Type3 font objects. - all_refs: Vec, -} - -/// A collection of Type3 font, belonging to the same TTF font. -struct ColorFont { - /// A list of references to Type3 font objects for this font family. - refs: Vec, - /// The list of all color glyphs in this family. - /// - /// The index in this vector modulo 256 corresponds to the index in one of - /// the Type3 fonts in `refs` (the `n`-th in the vector, where `n` is the - /// quotient of the index divided by 256). - glyphs: Vec, - /// The global bounding box of the font. - bbox: Rect, - /// A mapping between glyph IDs and character indices in the `glyphs` - /// vector. - glyph_indices: HashMap, -} - -/// A single color glyph. -struct ColorGlyph { - /// The ID of the glyph. - gid: u16, - /// A frame that contains the glyph. - frame: Frame, -} - -impl ColorFontMap { - /// Creates a new empty mapping - fn new() -> Self { - Self { map: IndexMap::new(), all_refs: Vec::new() } - } - - /// Takes the contents of the mapping. - /// - /// After calling this function, the mapping will be empty. - fn take_map(&mut self) -> IndexMap { - std::mem::take(&mut self.map) - } - - /// Obtains the reference to a Type3 font, and an index in this font - /// that can be used to draw a color glyph. - /// - /// The glyphs will be de-duplicated if needed. - fn get(&mut self, alloc: &mut Ref, font: &Font, gid: u16) -> (Ref, u8) { - let color_font = self.map.entry(font.clone()).or_insert_with(|| { - let global_bbox = font.ttf().global_bounding_box(); - let bbox = Rect::new( - font.to_em(global_bbox.x_min).to_font_units(), - font.to_em(global_bbox.y_min).to_font_units(), - font.to_em(global_bbox.x_max).to_font_units(), - font.to_em(global_bbox.y_max).to_font_units(), - ); - ColorFont { - bbox, - refs: Vec::new(), - glyphs: Vec::new(), - glyph_indices: HashMap::new(), - } - }); - - if let Some(index_of_glyph) = color_font.glyph_indices.get(&gid) { - // If we already know this glyph, return it. - (color_font.refs[index_of_glyph / 256], *index_of_glyph as u8) - } else { - // Otherwise, allocate a new ColorGlyph in the font, and a new Type3 font - // if needed - let index = color_font.glyphs.len(); - if index % 256 == 0 { - let new_ref = alloc.bump(); - self.all_refs.push(new_ref); - color_font.refs.push(new_ref); - } - - let instructions = frame_for_glyph(font, gid); - color_font.glyphs.push(ColorGlyph { gid, frame: instructions }); - color_font.glyph_indices.insert(gid, index); - - (color_font.refs[index / 256], index as u8) - } - } -} - /// Additional methods for [`Abs`]. trait AbsExt { /// Convert an to a number of points. diff --git a/crates/typst-pdf/src/named_destination.rs b/crates/typst-pdf/src/named_destination.rs new file mode 100644 index 000000000..f9729ca1c --- /dev/null +++ b/crates/typst-pdf/src/named_destination.rs @@ -0,0 +1,78 @@ +use std::collections::{HashMap, HashSet}; + +use pdf_writer::{writers::Destination, Ref}; +use typst::foundations::{Label, NativeElement}; +use typst::introspection::Location; +use typst::layout::Abs; +use typst::model::HeadingElem; + +use crate::{AbsExt, PdfChunk, Renumber, WithGlobalRefs}; + +/// A list of destinations in the PDF document (a specific point on a specific +/// page), that have a name associated with them. +/// +/// Typst creates a named destination for each heading in the document, that +/// will then be written in the document catalog. PDF readers can then display +/// them to show a clickable outline of the document. +#[derive(Default)] +pub struct NamedDestinations { + /// A map between elements and their associated labels + pub loc_to_dest: HashMap, + /// A sorted list of all named destinations. + pub dests: Vec<(Label, Ref)>, +} + +impl Renumber for NamedDestinations { + fn renumber(&mut self, offset: i32) { + for (_, reference) in &mut self.dests { + reference.renumber(offset); + } + } +} + +/// Fills in the map and vector for named destinations and writes the indirect +/// destination objects. +pub fn write_named_destinations( + context: &WithGlobalRefs, +) -> (PdfChunk, NamedDestinations) { + let mut chunk = PdfChunk::new(); + let mut out = NamedDestinations::default(); + let mut seen = HashSet::new(); + + // Find all headings that have a label and are the first among other + // headings with the same label. + let mut matches: Vec<_> = context + .document + .introspector + .query(&HeadingElem::elem().select()) + .iter() + .filter_map(|elem| elem.location().zip(elem.label())) + .filter(|&(_, label)| seen.insert(label)) + .collect(); + + // Named destinations must be sorted by key. + matches.sort_by_key(|&(_, label)| label); + + for (loc, label) in matches { + let pos = context.document.introspector.position(loc); + let index = pos.page.get() - 1; + let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); + + if let Some((Some(page), Some(page_ref))) = + context.pages.get(index).zip(context.globals.pages.get(index)) + { + let dest_ref = chunk.alloc(); + let x = pos.point.x.to_f32(); + let y = (page.content.size.y - y).to_f32(); + out.dests.push((label, dest_ref)); + out.loc_to_dest.insert(loc, label); + chunk + .indirect(dest_ref) + .start::() + .page(*page_ref) + .xyz(x, y, None); + } + } + + (chunk, out) +} diff --git a/crates/typst-pdf/src/outline.rs b/crates/typst-pdf/src/outline.rs index e2195bb76..94d55b54b 100644 --- a/crates/typst-pdf/src/outline.rs +++ b/crates/typst-pdf/src/outline.rs @@ -1,14 +1,19 @@ use std::num::NonZeroUsize; -use pdf_writer::{Finish, Ref, TextStr}; +use pdf_writer::{Finish, Pdf, Ref, TextStr}; + use typst::foundations::{NativeElement, Packed, StyleChain}; use typst::layout::Abs; use typst::model::HeadingElem; -use crate::{AbsExt, PdfContext}; +use crate::{AbsExt, WithEverything}; /// Construct the outline for the document. -pub(crate) fn write_outline(ctx: &mut PdfContext) -> Option { +pub(crate) fn write_outline( + chunk: &mut Pdf, + alloc: &mut Ref, + ctx: &WithEverything, +) -> Option { let mut tree: Vec = vec![]; // Stores the level of the topmost skipped ancestor of the next bookmarked @@ -95,20 +100,28 @@ pub(crate) fn write_outline(ctx: &mut PdfContext) -> Option { return None; } - let root_id = ctx.alloc.bump(); - let start_ref = ctx.alloc; + let root_id = alloc.bump(); + let start_ref = *alloc; let len = tree.len(); let mut prev_ref = None; for (i, node) in tree.iter().enumerate() { - prev_ref = Some(write_outline_item(ctx, node, root_id, prev_ref, i + 1 == len)); + prev_ref = Some(write_outline_item( + ctx, + chunk, + alloc, + node, + root_id, + prev_ref, + i + 1 == len, + )); } - ctx.pdf + chunk .outline(root_id) .first(start_ref) .last(Ref::new( - ctx.alloc.get() - tree.last().map(|child| child.len() as i32).unwrap_or(1), + alloc.get() - tree.last().map(|child| child.len() as i32).unwrap_or(1), )) .count(tree.len() as i32); @@ -116,7 +129,7 @@ pub(crate) fn write_outline(ctx: &mut PdfContext) -> Option { } /// A heading in the outline panel. -#[derive(Debug, Clone)] +#[derive(Debug)] struct HeadingNode<'a> { element: &'a Packed, level: NonZeroUsize, @@ -144,16 +157,18 @@ impl<'a> HeadingNode<'a> { /// Write an outline item and all its children. fn write_outline_item( - ctx: &mut PdfContext, + ctx: &WithEverything, + chunk: &mut Pdf, + alloc: &mut Ref, node: &HeadingNode, parent_ref: Ref, prev_ref: Option, is_last: bool, ) -> Ref { - let id = ctx.alloc.bump(); + let id = alloc.bump(); let next_ref = Ref::new(id.get() + node.len() as i32); - let mut outline = ctx.pdf.outline_item(id); + let mut outline = chunk.outline_item(id); outline.parent(parent_ref); if !is_last { @@ -178,11 +193,13 @@ fn write_outline_item( let index = pos.page.get() - 1; // Don't link to non-exported pages. - if let Some(Some(page)) = ctx.pages.get(index) { + if let Some((Some(page), Some(page_ref))) = + ctx.pages.get(index).zip(ctx.globals.pages.get(index)) + { let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); - outline.dest().page(page.id).xyz( + outline.dest().page(*page_ref).xyz( pos.point.x.to_f32(), - (page.size.y - y).to_f32(), + (page.content.size.y - y).to_f32(), None, ); } @@ -193,6 +210,8 @@ fn write_outline_item( for (i, child) in node.children.iter().enumerate() { prev_ref = Some(write_outline_item( ctx, + chunk, + alloc, child, id, prev_ref, diff --git a/crates/typst-pdf/src/page.rs b/crates/typst-pdf/src/page.rs index 42c87f8e0..f796d0c8f 100644 --- a/crates/typst-pdf/src/page.rs +++ b/crates/typst-pdf/src/page.rs @@ -1,43 +1,39 @@ use std::collections::HashMap; use std::num::NonZeroUsize; -use crate::color::PaintEncode; -use crate::extg::ExtGState; -use crate::image::deferred_image; -use crate::{deflate_deferred, AbsExt, EmExt, PdfContext}; -use ecow::{eco_format, EcoString}; -use pdf_writer::types::{ - ActionType, AnnotationFlags, AnnotationType, ColorSpaceOperand, LineCapStyle, - LineJoinStyle, NumberingStyle, TextRenderingMode, -}; -use pdf_writer::writers::{PageLabel, Resources}; -use pdf_writer::{Content, Filter, Finish, Name, Rect, Ref, Str, TextStr}; -use typst::layout::{ - Abs, Em, Frame, FrameItem, GroupItem, Page, Point, Ratio, Size, Transform, +use ecow::EcoString; +use pdf_writer::{ + types::{ActionType, AnnotationFlags, AnnotationType, NumberingStyle}, + Filter, Finish, Name, Rect, Ref, Str, }; +use typst::foundations::Label; +use typst::introspection::Location; +use typst::layout::{Abs, Frame}; use typst::model::{Destination, Numbering}; -use typst::text::color::is_color_glyph; -use typst::text::{Case, Font, TextItem, TextItemView}; -use typst::utils::{Deferred, Numeric, SliceExt}; -use typst::visualize::{ - FixedStroke, Geometry, Image, LineCap, LineJoin, Paint, Path, PathItem, Shape, -}; +use typst::text::Case; + +use crate::{content, AbsExt, PdfChunk, WithDocument, WithRefs, WithResources}; +use crate::{font::improve_glyph_sets, Resources}; /// Construct page objects. #[typst_macros::time(name = "construct pages")] -pub(crate) fn construct_pages(ctx: &mut PdfContext, pages: &[Page]) { +pub fn traverse_pages( + state: &WithDocument, +) -> (PdfChunk, (Vec>, Resources<()>)) { + let mut resources = Resources::default(); + let mut pages = Vec::with_capacity(state.document.pages.len()); let mut skipped_pages = 0; - for (i, page) in pages.iter().enumerate() { - if ctx + for (i, page) in state.document.pages.iter().enumerate() { + if state .exported_pages .as_ref() .is_some_and(|ranges| !ranges.includes_page_index(i)) { // Don't export this page. - ctx.pages.push(None); + pages.push(None); skipped_pages += 1; } else { - let mut encoded = construct_page(ctx, &page.frame); + let mut encoded = construct_page(&mut resources, &page.frame); encoded.label = page .numbering .as_ref() @@ -52,161 +48,84 @@ pub(crate) fn construct_pages(ctx: &mut PdfContext, pages: &[Page]) { // the corresponding real page number in the Typst document. (skipped_pages > 0).then(|| PdfPageLabel::arabic(i + 1)) }); - ctx.pages.push(Some(encoded)); + pages.push(Some(encoded)); } } + + improve_glyph_sets(&mut resources.glyph_sets); + + (PdfChunk::new(), (pages, resources)) } /// Construct a page object. #[typst_macros::time(name = "construct page")] -pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> EncodedPage { - let page_ref = ctx.alloc.bump(); +fn construct_page(out: &mut Resources<()>, frame: &Frame) -> EncodedPage { + let content = content::build(out, frame, None); - let size = frame.size(); - let mut ctx = PageContext::new(ctx, size); + EncodedPage { content, label: None } +} - // Make the coordinate system start at the top-left. - ctx.bottom = size.y.to_f32(); - ctx.transform(Transform { - sx: Ratio::one(), - ky: Ratio::zero(), - kx: Ratio::zero(), - sy: Ratio::new(-1.0), - tx: Abs::zero(), - ty: size.y, - }); - - // Encode the page into the content stream. - write_frame(&mut ctx, frame); - - EncodedPage { - size, - content: deflate_deferred(ctx.content.finish()), - id: page_ref, - uses_opacities: ctx.uses_opacities, - links: ctx.links, - label: None, - resources: ctx.resources, - } +/// Allocate a reference for each exported page. +pub fn alloc_page_refs(context: &WithResources) -> (PdfChunk, Vec>) { + let mut chunk = PdfChunk::new(); + let page_refs = context + .pages + .iter() + .map(|p| p.as_ref().map(|_| chunk.alloc())) + .collect(); + (chunk, page_refs) } /// Write the page tree. -pub(crate) fn write_page_tree(ctx: &mut PdfContext) { - let mut refs = vec![]; +pub fn write_page_tree(ctx: &WithRefs) -> (PdfChunk, Ref) { + let mut chunk = PdfChunk::new(); + let page_tree_ref = chunk.alloc.bump(); + for i in 0..ctx.pages.len() { - write_page(ctx, i, &mut refs); + let content_id = chunk.alloc.bump(); + write_page( + &mut chunk, + ctx, + content_id, + page_tree_ref, + &ctx.references.named_destinations.loc_to_dest, + i, + ); } - ctx.pdf - .pages(ctx.page_tree_ref) - .count(refs.len() as i32) - .kids(refs.iter().copied()); -} + chunk + .pages(page_tree_ref) + .count(ctx.pages.len() as i32) + .kids(ctx.globals.pages.iter().filter_map(Option::as_ref).copied()); -/// Write the global resource dictionary that will be referenced by all pages. -/// -/// We add a reference to this dictionary to each page individually instead of -/// to the root node of the page tree because using the resource inheritance -/// feature breaks PDF merging with Apple Preview. -pub(crate) fn write_global_resources(ctx: &mut PdfContext) { - let images_ref = ctx.alloc.bump(); - let patterns_ref = ctx.alloc.bump(); - let ext_gs_states_ref = ctx.alloc.bump(); - let color_spaces_ref = ctx.alloc.bump(); - - let mut images = ctx.pdf.indirect(images_ref).dict(); - for (image_ref, im) in ctx.image_map.pdf_indices(&ctx.image_refs) { - let name = eco_format!("Im{}", im); - images.pair(Name(name.as_bytes()), image_ref); - } - images.finish(); - - let mut patterns = ctx.pdf.indirect(patterns_ref).dict(); - for (gradient_ref, gr) in ctx.gradient_map.pdf_indices(&ctx.gradient_refs) { - let name = eco_format!("Gr{}", gr); - patterns.pair(Name(name.as_bytes()), gradient_ref); - } - - for (pattern_ref, p) in ctx.pattern_map.pdf_indices(&ctx.pattern_refs) { - let name = eco_format!("P{}", p); - patterns.pair(Name(name.as_bytes()), pattern_ref); - } - patterns.finish(); - - let mut ext_gs_states = ctx.pdf.indirect(ext_gs_states_ref).dict(); - for (gs_ref, gs) in ctx.extg_map.pdf_indices(&ctx.ext_gs_refs) { - let name = eco_format!("Gs{}", gs); - ext_gs_states.pair(Name(name.as_bytes()), gs_ref); - } - ext_gs_states.finish(); - - let color_spaces = ctx.pdf.indirect(color_spaces_ref).dict(); - ctx.colors.write_color_spaces(color_spaces, &mut ctx.alloc); - - let mut resources = ctx.pdf.indirect(ctx.global_resources_ref).start::(); - resources.pair(Name(b"XObject"), images_ref); - resources.pair(Name(b"Pattern"), patterns_ref); - resources.pair(Name(b"ExtGState"), ext_gs_states_ref); - resources.pair(Name(b"ColorSpace"), color_spaces_ref); - - let mut fonts = resources.fonts(); - for (font_ref, f) in ctx.font_map.pdf_indices(&ctx.font_refs) { - let name = eco_format!("F{}", f); - fonts.pair(Name(name.as_bytes()), font_ref); - } - - for font in &ctx.color_font_map.all_refs { - let name = eco_format!("Cf{}", font.get()); - fonts.pair(Name(name.as_bytes()), font); - } - fonts.finish(); - - resources.finish(); - - // Also write the resources for Type3 fonts, that only contains images, - // color spaces and regular fonts (COLR glyphs depend on them). - if !ctx.color_font_map.all_refs.is_empty() { - let mut resources = - ctx.pdf.indirect(ctx.type3_font_resources_ref).start::(); - resources.pair(Name(b"XObject"), images_ref); - resources.pair(Name(b"Pattern"), patterns_ref); - resources.pair(Name(b"ExtGState"), ext_gs_states_ref); - resources.pair(Name(b"ColorSpace"), color_spaces_ref); - - let mut fonts = resources.fonts(); - for (font_ref, f) in ctx.font_map.pdf_indices(&ctx.font_refs) { - let name = eco_format!("F{}", f); - fonts.pair(Name(name.as_bytes()), font_ref); - } - fonts.finish(); - - resources.finish(); - } - - // Write all of the functions used by the document. - ctx.colors.write_functions(&mut ctx.pdf); + (chunk, page_tree_ref) } /// Write a page tree node. -fn write_page(ctx: &mut PdfContext, i: usize, refs: &mut Vec) { - let Some(page) = &ctx.pages[i] else { +fn write_page( + chunk: &mut PdfChunk, + ctx: &WithRefs, + content_id: Ref, + page_tree_ref: Ref, + loc_to_dest: &HashMap, + i: usize, +) { + let Some((page, page_ref)) = ctx.pages[i].as_ref().zip(ctx.globals.pages[i]) else { // Page excluded from export. return; }; - let content_id = ctx.alloc.bump(); - refs.push(page.id); + let global_resources_ref = ctx.resources.reference; + let mut page_writer = chunk.page(page_ref); + page_writer.parent(page_tree_ref); - let mut page_writer = ctx.pdf.page(page.id); - page_writer.parent(ctx.page_tree_ref); - - let w = page.size.x.to_f32(); - let h = page.size.y.to_f32(); + let w = page.content.size.x.to_f32(); + let h = page.content.size.y.to_f32(); page_writer.media_box(Rect::new(0.0, 0.0, w, h)); page_writer.contents(content_id); - page_writer.pair(Name(b"Resources"), ctx.global_resources_ref); + page_writer.pair(Name(b"Resources"), global_resources_ref); - if page.uses_opacities { + if page.content.uses_opacities { page_writer .group() .transparency() @@ -217,7 +136,7 @@ fn write_page(ctx: &mut PdfContext, i: usize, refs: &mut Vec) { } let mut annotations = page_writer.annotations(); - for (dest, rect) in &page.links { + for (dest, rect) in &page.content.links { let mut annotation = annotations.push(); annotation.subtype(AnnotationType::Link).rect(*rect); annotation.border(0.0, 0.0, 0.0, None).flags(AnnotationFlags::PRINT); @@ -232,7 +151,7 @@ fn write_page(ctx: &mut PdfContext, i: usize, refs: &mut Vec) { } Destination::Position(pos) => *pos, Destination::Location(loc) => { - if let Some(key) = ctx.loc_to_dest.get(loc) { + if let Some(key) = loc_to_dest.get(loc) { annotation .action() .action_type(ActionType::GoTo) @@ -249,102 +168,45 @@ fn write_page(ctx: &mut PdfContext, i: usize, refs: &mut Vec) { let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); // Don't add links to non-exported pages. - if let Some(Some(page)) = ctx.pages.get(index) { + if let Some((Some(page), Some(page_ref))) = + ctx.pages.get(index).zip(ctx.globals.pages.get(index)) + { annotation .action() .action_type(ActionType::GoTo) .destination() - .page(page.id) - .xyz(pos.point.x.to_f32(), (page.size.y - y).to_f32(), None); + .page(*page_ref) + .xyz(pos.point.x.to_f32(), (page.content.size.y - y).to_f32(), None); } } annotations.finish(); page_writer.finish(); - ctx.pdf - .stream(content_id, page.content.wait()) + chunk + .stream(content_id, page.content.content.wait()) .filter(Filter::FlateDecode); } -/// Write the page labels. -/// They are numbered according to the page's final number, considering pages -/// which were removed from export, and not according to the page's real or -/// logical number in the initial Typst document. -pub(crate) fn write_page_labels(ctx: &mut PdfContext) -> Vec<(NonZeroUsize, Ref)> { - // If there is no exported page labeled, we skip the writing - if !ctx.pages.iter().filter_map(Option::as_ref).any(|p| { - p.label - .as_ref() - .is_some_and(|l| l.prefix.is_some() || l.style.is_some()) - }) { - return Vec::new(); - } - - let mut result = vec![]; - let empty_label = PdfPageLabel::default(); - let mut prev: Option<&PdfPageLabel> = None; - - // Skip non-exported pages for numbering. - for (i, page) in ctx.pages.iter().filter_map(Option::as_ref).enumerate() { - let nr = NonZeroUsize::new(1 + i).unwrap(); - // If there are pages with empty labels between labeled pages, we must - // write empty PageLabel entries. - let label = page.label.as_ref().unwrap_or(&empty_label); - - if let Some(pre) = prev { - if label.prefix == pre.prefix - && label.style == pre.style - && label.offset == pre.offset.map(|n| n.saturating_add(1)) - { - prev = Some(label); - continue; - } - } - - let id = ctx.alloc.bump(); - let mut entry = ctx.pdf.indirect(id).start::(); - - // Only add what is actually provided. Don't add empty prefix string if - // it wasn't given for example. - if let Some(prefix) = &label.prefix { - entry.prefix(TextStr(prefix)); - } - - if let Some(style) = label.style { - entry.style(to_pdf_numbering_style(style)); - } - - if let Some(offset) = label.offset { - entry.offset(offset.get() as i32); - } - - result.push((nr, id)); - prev = Some(label); - } - - result -} - /// Specification for a PDF page label. #[derive(Debug, Clone, PartialEq, Hash, Default)] -struct PdfPageLabel { +pub(crate) struct PdfPageLabel { /// Can be any string or none. Will always be prepended to the numbering style. - prefix: Option, + pub prefix: Option, /// Based on the numbering pattern. /// /// If `None` or numbering is a function, the field will be empty. - style: Option, + pub style: Option, /// Offset for the page label start. /// /// Describes where to start counting from when setting a style. /// (Has to be greater or equal than 1) - offset: Option, + pub offset: Option, } /// A PDF page label number style. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -enum PdfPageLabelStyle { +pub enum PdfPageLabelStyle { /// Decimal arabic numerals (1, 2, 3). Arabic, /// Lowercase roman numerals (i, ii, iii). @@ -413,708 +275,20 @@ impl PdfPageLabel { } } +impl PdfPageLabelStyle { + pub fn to_pdf_numbering_style(self) -> NumberingStyle { + match self { + PdfPageLabelStyle::Arabic => NumberingStyle::Arabic, + PdfPageLabelStyle::LowerRoman => NumberingStyle::LowerRoman, + PdfPageLabelStyle::UpperRoman => NumberingStyle::UpperRoman, + PdfPageLabelStyle::LowerAlpha => NumberingStyle::LowerAlpha, + PdfPageLabelStyle::UpperAlpha => NumberingStyle::UpperAlpha, + } + } +} + /// Data for an exported page. pub struct EncodedPage { - /// The indirect object id of the page. - pub id: Ref, - /// The page's dimensions. - pub size: Size, - /// The page's content stream. - pub content: Deferred>, - /// Whether the page uses opacities. - pub uses_opacities: bool, - /// Links in the PDF coordinate system. - pub links: Vec<(Destination, Rect)>, - /// The page's used resources - pub resources: HashMap, - /// The page's PDF label. - label: Option, -} - -/// Represents a resource being used in a PDF page by its name. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct PageResource { - kind: ResourceKind, - name: EcoString, -} - -impl PageResource { - pub fn new(kind: ResourceKind, name: EcoString) -> Self { - Self { kind, name } - } -} - -/// A kind of resource being used in a PDF page. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub enum ResourceKind { - XObject, - Font, - Gradient, - Pattern, - ExtGState, -} - -impl PageResource { - /// Returns the name of the resource. - pub fn name(&self) -> Name<'_> { - Name(self.name.as_bytes()) - } - - /// Returns whether the resource is an XObject. - pub fn is_x_object(&self) -> bool { - matches!(self.kind, ResourceKind::XObject) - } - - /// Returns whether the resource is a font. - pub fn is_font(&self) -> bool { - matches!(self.kind, ResourceKind::Font) - } - - /// Returns whether the resource is a gradient. - pub fn is_gradient(&self) -> bool { - matches!(self.kind, ResourceKind::Gradient) - } - - /// Returns whether the resource is a pattern. - pub fn is_pattern(&self) -> bool { - matches!(self.kind, ResourceKind::Pattern) - } - - /// Returns whether the resource is an external graphics state. - pub fn is_ext_g_state(&self) -> bool { - matches!(self.kind, ResourceKind::ExtGState) - } -} - -/// An exporter for the contents of a single PDF page. -pub struct PageContext<'a, 'b> { - pub(crate) parent: &'a mut PdfContext<'b>, - pub content: Content, - state: State, - saves: Vec, - pub bottom: f32, - uses_opacities: bool, - links: Vec<(Destination, Rect)>, - /// Keep track of the resources being used in the page. - pub resources: HashMap, -} - -impl<'a, 'b> PageContext<'a, 'b> { - pub fn new(parent: &'a mut PdfContext<'b>, size: Size) -> Self { - PageContext { - parent, - uses_opacities: false, - content: Content::new(), - state: State::new(size), - saves: vec![], - bottom: 0.0, - links: vec![], - resources: HashMap::default(), - } - } -} - -/// A simulated graphics state used to deduplicate graphics state changes and -/// keep track of the current transformation matrix for link annotations. -#[derive(Debug, Clone)] -struct State { - /// The transform of the current item. - transform: Transform, - /// The transform of first hard frame in the hierarchy. - container_transform: Transform, - /// The size of the first hard frame in the hierarchy. - size: Size, - font: Option<(Font, Abs)>, - fill: Option, - fill_space: Option>, - external_graphics_state: Option, - stroke: Option, - stroke_space: Option>, - text_rendering_mode: TextRenderingMode, -} - -impl State { - /// Creates a new, clean state for a given page `size`. - pub fn new(size: Size) -> Self { - Self { - transform: Transform::identity(), - container_transform: Transform::identity(), - size, - font: None, - fill: None, - fill_space: None, - external_graphics_state: None, - stroke: None, - stroke_space: None, - text_rendering_mode: TextRenderingMode::Fill, - } - } - - /// Creates the [`Transforms`] structure for the current item. - pub fn transforms(&self, size: Size, pos: Point) -> Transforms { - Transforms { - transform: self.transform.pre_concat(Transform::translate(pos.x, pos.y)), - container_transform: self.container_transform, - container_size: self.size, - size, - } - } -} - -/// Subset of the state used to calculate the transform of gradients and patterns. -#[derive(Debug, Clone, Copy)] -pub(super) struct Transforms { - /// The transform of the current item. - pub transform: Transform, - /// The transform of first hard frame in the hierarchy. - pub container_transform: Transform, - /// The size of the first hard frame in the hierarchy. - pub container_size: Size, - /// The size of the item. - pub size: Size, -} - -impl PageContext<'_, '_> { - fn save_state(&mut self) { - self.saves.push(self.state.clone()); - self.content.save_state(); - } - - fn restore_state(&mut self) { - self.content.restore_state(); - self.state = self.saves.pop().expect("missing state save"); - } - - fn set_external_graphics_state(&mut self, graphics_state: &ExtGState) { - let current_state = self.state.external_graphics_state.as_ref(); - if current_state != Some(graphics_state) { - let index = self.parent.extg_map.insert(*graphics_state); - let name = eco_format!("Gs{index}"); - self.content.set_parameters(Name(name.as_bytes())); - self.resources - .insert(PageResource::new(ResourceKind::ExtGState, name), index); - - if graphics_state.uses_opacities() { - self.uses_opacities = true; - } - } - } - - fn set_opacities(&mut self, stroke: Option<&FixedStroke>, fill: Option<&Paint>) { - let stroke_opacity = stroke - .map(|stroke| { - let color = match &stroke.paint { - Paint::Solid(color) => *color, - Paint::Gradient(_) | Paint::Pattern(_) => return 255, - }; - - color.alpha().map_or(255, |v| (v * 255.0).round() as u8) - }) - .unwrap_or(255); - let fill_opacity = fill - .map(|paint| { - let color = match paint { - Paint::Solid(color) => *color, - Paint::Gradient(_) | Paint::Pattern(_) => return 255, - }; - - color.alpha().map_or(255, |v| (v * 255.0).round() as u8) - }) - .unwrap_or(255); - self.set_external_graphics_state(&ExtGState { stroke_opacity, fill_opacity }); - } - - pub fn transform(&mut self, transform: Transform) { - let Transform { sx, ky, kx, sy, tx, ty } = transform; - self.state.transform = self.state.transform.pre_concat(transform); - if self.state.container_transform.is_identity() { - self.state.container_transform = self.state.transform; - } - self.content.transform([ - sx.get() as _, - ky.get() as _, - kx.get() as _, - sy.get() as _, - tx.to_f32(), - ty.to_f32(), - ]); - } - - fn group_transform(&mut self, transform: Transform) { - self.state.container_transform = - self.state.container_transform.pre_concat(transform); - } - - fn set_font(&mut self, font: &Font, size: Abs) { - if self.state.font.as_ref().map(|(f, s)| (f, *s)) != Some((font, size)) { - let index = self.parent.font_map.insert(font.clone()); - let name = eco_format!("F{index}"); - self.content.set_font(Name(name.as_bytes()), size.to_f32()); - self.resources - .insert(PageResource::new(ResourceKind::Font, name), index); - self.state.font = Some((font.clone(), size)); - } - } - - fn size(&mut self, size: Size) { - self.state.size = size; - } - - fn set_fill(&mut self, fill: &Paint, on_text: bool, transforms: Transforms) { - if self.state.fill.as_ref() != Some(fill) - || matches!(self.state.fill, Some(Paint::Gradient(_))) - { - fill.set_as_fill(self, on_text, transforms); - self.state.fill = Some(fill.clone()); - } - } - - pub fn set_fill_color_space(&mut self, space: Name<'static>) { - if self.state.fill_space != Some(space) { - self.content.set_fill_color_space(ColorSpaceOperand::Named(space)); - self.state.fill_space = Some(space); - } - } - - pub fn reset_fill_color_space(&mut self) { - self.state.fill_space = None; - } - - fn set_stroke( - &mut self, - stroke: &FixedStroke, - on_text: bool, - transforms: Transforms, - ) { - if self.state.stroke.as_ref() != Some(stroke) - || matches!( - self.state.stroke.as_ref().map(|s| &s.paint), - Some(Paint::Gradient(_)) - ) - { - let FixedStroke { paint, thickness, cap, join, dash, miter_limit } = stroke; - paint.set_as_stroke(self, on_text, transforms); - - self.content.set_line_width(thickness.to_f32()); - if self.state.stroke.as_ref().map(|s| &s.cap) != Some(cap) { - self.content.set_line_cap(to_pdf_line_cap(*cap)); - } - if self.state.stroke.as_ref().map(|s| &s.join) != Some(join) { - self.content.set_line_join(to_pdf_line_join(*join)); - } - if self.state.stroke.as_ref().map(|s| &s.dash) != Some(dash) { - if let Some(pattern) = dash { - self.content.set_dash_pattern( - pattern.array.iter().map(|l| l.to_f32()), - pattern.phase.to_f32(), - ); - } else { - self.content.set_dash_pattern([], 0.0); - } - } - if self.state.stroke.as_ref().map(|s| &s.miter_limit) != Some(miter_limit) { - self.content.set_miter_limit(miter_limit.get() as f32); - } - self.state.stroke = Some(stroke.clone()); - } - } - - pub fn set_stroke_color_space(&mut self, space: Name<'static>) { - if self.state.stroke_space != Some(space) { - self.content.set_stroke_color_space(ColorSpaceOperand::Named(space)); - self.state.stroke_space = Some(space); - } - } - - pub fn reset_stroke_color_space(&mut self) { - self.state.stroke_space = None; - } - - fn set_text_rendering_mode(&mut self, mode: TextRenderingMode) { - if self.state.text_rendering_mode != mode { - self.content.set_text_rendering_mode(mode); - self.state.text_rendering_mode = mode; - } - } -} - -/// Encode a frame into the content stream. -pub(crate) fn write_frame(ctx: &mut PageContext, frame: &Frame) { - for &(pos, ref item) in frame.items() { - let x = pos.x.to_f32(); - let y = pos.y.to_f32(); - match item { - FrameItem::Group(group) => write_group(ctx, pos, group), - FrameItem::Text(text) => write_text(ctx, pos, text), - FrameItem::Shape(shape, _) => write_shape(ctx, pos, shape), - FrameItem::Image(image, size, _) => write_image(ctx, x, y, image, *size), - FrameItem::Link(dest, size) => write_link(ctx, pos, dest, *size), - FrameItem::Tag(_) => {} - } - } -} - -/// Encode a group into the content stream. -fn write_group(ctx: &mut PageContext, pos: Point, group: &GroupItem) { - let translation = Transform::translate(pos.x, pos.y); - - ctx.save_state(); - - if group.frame.kind().is_hard() { - ctx.group_transform( - ctx.state - .transform - .post_concat(ctx.state.container_transform.invert().unwrap()) - .pre_concat(translation) - .pre_concat(group.transform), - ); - ctx.size(group.frame.size()); - } - - ctx.transform(translation.pre_concat(group.transform)); - if let Some(clip_path) = &group.clip_path { - write_path(ctx, 0.0, 0.0, clip_path); - ctx.content.clip_nonzero(); - ctx.content.end_path(); - } - - write_frame(ctx, &group.frame); - ctx.restore_state(); -} - -/// Encode a text run into the content stream. -fn write_text(ctx: &mut PageContext, pos: Point, text: &TextItem) { - let ttf = text.font.ttf(); - let tables = ttf.tables(); - - // If the text run contains either only color glyphs (used for emojis for - // example) or normal text we can render it directly - let has_color_glyphs = tables.sbix.is_some() - || tables.cbdt.is_some() - || tables.svg.is_some() - || tables.colr.is_some(); - if !has_color_glyphs { - write_normal_text(ctx, pos, TextItemView::all_of(text)); - return; - } - - let color_glyph_count = - text.glyphs.iter().filter(|g| is_color_glyph(&text.font, g)).count(); - - if color_glyph_count == text.glyphs.len() { - write_color_glyphs(ctx, pos, TextItemView::all_of(text)); - } else if color_glyph_count == 0 { - write_normal_text(ctx, pos, TextItemView::all_of(text)); - } else { - // Otherwise we need to split it in smaller text runs - let mut offset = 0; - let mut position_in_run = Abs::zero(); - for (color, sub_run) in - text.glyphs.group_by_key(|g| is_color_glyph(&text.font, g)) - { - let end = offset + sub_run.len(); - - // Build a sub text-run - let text_item_view = TextItemView::from_glyph_range(text, offset..end); - - // Adjust the position of the run on the line - let pos = pos + Point::new(position_in_run, Abs::zero()); - position_in_run += text_item_view.width(); - offset = end; - // Actually write the sub text-run - if color { - write_color_glyphs(ctx, pos, text_item_view); - } else { - write_normal_text(ctx, pos, text_item_view); - } - } - } -} - -// Encodes a text run (without any color glyph) into the content stream. -fn write_normal_text(ctx: &mut PageContext, pos: Point, text: TextItemView) { - let x = pos.x.to_f32(); - let y = pos.y.to_f32(); - - *ctx.parent.languages.entry(text.item.lang).or_insert(0) += text.glyph_range.len(); - - let glyph_set = ctx.parent.glyph_sets.entry(text.item.font.clone()).or_default(); - for g in text.glyphs() { - let t = text.text(); - let segment = &t[g.range()]; - glyph_set.entry(g.id).or_insert_with(|| segment.into()); - } - - let fill_transform = ctx.state.transforms(Size::zero(), pos); - ctx.set_fill(&text.item.fill, true, fill_transform); - - let stroke = text.item.stroke.as_ref().and_then(|stroke| { - if stroke.thickness.to_f32() > 0.0 { - Some(stroke) - } else { - None - } - }); - - if let Some(stroke) = stroke { - ctx.set_stroke(stroke, true, fill_transform); - ctx.set_text_rendering_mode(TextRenderingMode::FillStroke); - } else { - ctx.set_text_rendering_mode(TextRenderingMode::Fill); - } - - ctx.set_font(&text.item.font, text.item.size); - ctx.set_opacities(text.item.stroke.as_ref(), Some(&text.item.fill)); - ctx.content.begin_text(); - - // Position the text. - ctx.content.set_text_matrix([1.0, 0.0, 0.0, -1.0, x, y]); - - let mut positioned = ctx.content.show_positioned(); - let mut items = positioned.items(); - let mut adjustment = Em::zero(); - let mut encoded = vec![]; - - // Write the glyphs with kerning adjustments. - for glyph in text.glyphs() { - adjustment += glyph.x_offset; - - if !adjustment.is_zero() { - if !encoded.is_empty() { - items.show(Str(&encoded)); - encoded.clear(); - } - - items.adjust(-adjustment.to_font_units()); - adjustment = Em::zero(); - } - - let cid = crate::font::glyph_cid(&text.item.font, glyph.id); - encoded.push((cid >> 8) as u8); - encoded.push((cid & 0xff) as u8); - - if let Some(advance) = text.item.font.advance(glyph.id) { - adjustment += glyph.x_advance - advance; - } - - adjustment -= glyph.x_offset; - } - - if !encoded.is_empty() { - items.show(Str(&encoded)); - } - - items.finish(); - positioned.finish(); - ctx.content.end_text(); -} - -// Encodes a text run made only of color glyphs into the content stream -fn write_color_glyphs(ctx: &mut PageContext, pos: Point, text: TextItemView) { - let x = pos.x.to_f32(); - let y = pos.y.to_f32(); - - let mut last_font = None; - - ctx.content.begin_text(); - ctx.content.set_text_matrix([1.0, 0.0, 0.0, -1.0, x, y]); - // So that the next call to ctx.set_font() will change the font to one that - // displays regular glyphs and not color glyphs. - ctx.state.font = None; - - let glyph_set = ctx.parent.glyph_sets.entry(text.item.font.clone()).or_default(); - - for glyph in text.glyphs() { - // Retrieve the Type3 font reference and the glyph index in the font. - let (font, index) = ctx.parent.color_font_map.get( - &mut ctx.parent.alloc, - &text.item.font, - glyph.id, - ); - - if last_font != Some(font.get()) { - ctx.content.set_font( - Name(eco_format!("Cf{}", font.get()).as_bytes()), - text.item.size.to_f32(), - ); - last_font = Some(font.get()); - } - - ctx.content.show(Str(&[index])); - - glyph_set - .entry(glyph.id) - .or_insert_with(|| text.text()[glyph.range()].into()); - } - ctx.content.end_text(); -} - -/// Encode a geometrical shape into the content stream. -fn write_shape(ctx: &mut PageContext, pos: Point, shape: &Shape) { - let x = pos.x.to_f32(); - let y = pos.y.to_f32(); - - let stroke = shape.stroke.as_ref().and_then(|stroke| { - if stroke.thickness.to_f32() > 0.0 { - Some(stroke) - } else { - None - } - }); - - if shape.fill.is_none() && stroke.is_none() { - return; - } - - if let Some(fill) = &shape.fill { - ctx.set_fill(fill, false, ctx.state.transforms(shape.geometry.bbox_size(), pos)); - } - - if let Some(stroke) = stroke { - ctx.set_stroke( - stroke, - false, - ctx.state.transforms(shape.geometry.bbox_size(), pos), - ); - } - - ctx.set_opacities(stroke, shape.fill.as_ref()); - - match shape.geometry { - Geometry::Line(target) => { - let dx = target.x.to_f32(); - let dy = target.y.to_f32(); - ctx.content.move_to(x, y); - ctx.content.line_to(x + dx, y + dy); - } - Geometry::Rect(size) => { - let w = size.x.to_f32(); - let h = size.y.to_f32(); - if w.abs() > f32::EPSILON && h.abs() > f32::EPSILON { - ctx.content.rect(x, y, w, h); - } - } - Geometry::Path(ref path) => { - write_path(ctx, x, y, path); - } - } - - match (&shape.fill, stroke) { - (None, None) => unreachable!(), - (Some(_), None) => ctx.content.fill_nonzero(), - (None, Some(_)) => ctx.content.stroke(), - (Some(_), Some(_)) => ctx.content.fill_nonzero_and_stroke(), - }; -} - -/// Encode a bezier path into the content stream. -fn write_path(ctx: &mut PageContext, x: f32, y: f32, path: &Path) { - for elem in &path.0 { - match elem { - PathItem::MoveTo(p) => { - ctx.content.move_to(x + p.x.to_f32(), y + p.y.to_f32()) - } - PathItem::LineTo(p) => { - ctx.content.line_to(x + p.x.to_f32(), y + p.y.to_f32()) - } - PathItem::CubicTo(p1, p2, p3) => ctx.content.cubic_to( - x + p1.x.to_f32(), - y + p1.y.to_f32(), - x + p2.x.to_f32(), - y + p2.y.to_f32(), - x + p3.x.to_f32(), - y + p3.y.to_f32(), - ), - PathItem::ClosePath => ctx.content.close_path(), - }; - } -} - -/// Encode a vector or raster image into the content stream. -fn write_image(ctx: &mut PageContext, x: f32, y: f32, image: &Image, size: Size) { - let index = ctx.parent.image_map.insert(image.clone()); - ctx.parent - .image_deferred_map - .entry(index) - .or_insert_with(|| deferred_image(image.clone())); - - let name = eco_format!("Im{index}"); - let w = size.x.to_f32(); - let h = size.y.to_f32(); - ctx.content.save_state(); - ctx.content.transform([w, 0.0, 0.0, -h, x, y + h]); - - if let Some(alt) = image.alt() { - let mut image_span = - ctx.content.begin_marked_content_with_properties(Name(b"Span")); - let mut image_alt = image_span.properties(); - image_alt.pair(Name(b"Alt"), pdf_writer::Str(alt.as_bytes())); - image_alt.finish(); - image_span.finish(); - - ctx.content.x_object(Name(name.as_bytes())); - ctx.content.end_marked_content(); - } else { - ctx.content.x_object(Name(name.as_bytes())); - } - - ctx.resources - .insert(PageResource::new(ResourceKind::XObject, name.clone()), index); - ctx.content.restore_state(); -} - -/// Save a link for later writing in the annotations dictionary. -fn write_link(ctx: &mut PageContext, pos: Point, dest: &Destination, size: Size) { - let mut min_x = Abs::inf(); - let mut min_y = Abs::inf(); - let mut max_x = -Abs::inf(); - let mut max_y = -Abs::inf(); - - // Compute the bounding box of the transformed link. - for point in [ - pos, - pos + Point::with_x(size.x), - pos + Point::with_y(size.y), - pos + size.to_point(), - ] { - let t = point.transform(ctx.state.transform); - min_x.set_min(t.x); - min_y.set_min(t.y); - max_x.set_max(t.x); - max_y.set_max(t.y); - } - - let x1 = min_x.to_f32(); - let x2 = max_x.to_f32(); - let y1 = max_y.to_f32(); - let y2 = min_y.to_f32(); - let rect = Rect::new(x1, y1, x2, y2); - - ctx.links.push((dest.clone(), rect)); -} - -fn to_pdf_line_cap(cap: LineCap) -> LineCapStyle { - match cap { - LineCap::Butt => LineCapStyle::ButtCap, - LineCap::Round => LineCapStyle::RoundCap, - LineCap::Square => LineCapStyle::ProjectingSquareCap, - } -} - -fn to_pdf_line_join(join: LineJoin) -> LineJoinStyle { - match join { - LineJoin::Miter => LineJoinStyle::MiterJoin, - LineJoin::Round => LineJoinStyle::RoundJoin, - LineJoin::Bevel => LineJoinStyle::BevelJoin, - } -} - -fn to_pdf_numbering_style(style: PdfPageLabelStyle) -> NumberingStyle { - match style { - PdfPageLabelStyle::Arabic => NumberingStyle::Arabic, - PdfPageLabelStyle::LowerRoman => NumberingStyle::LowerRoman, - PdfPageLabelStyle::UpperRoman => NumberingStyle::UpperRoman, - PdfPageLabelStyle::LowerAlpha => NumberingStyle::LowerAlpha, - PdfPageLabelStyle::UpperAlpha => NumberingStyle::UpperAlpha, - } + pub content: content::Encoded, + pub label: Option, } diff --git a/crates/typst-pdf/src/pattern.rs b/crates/typst-pdf/src/pattern.rs index 7fb3d6e8e..e06c04f87 100644 --- a/crates/typst-pdf/src/pattern.rs +++ b/crates/typst-pdf/src/pattern.rs @@ -1,88 +1,72 @@ +use std::collections::HashMap; + use ecow::eco_format; -use pdf_writer::types::{ColorSpaceOperand, PaintType, TilingType}; -use pdf_writer::{Filter, Finish, Name, Rect}; +use pdf_writer::{ + types::{ColorSpaceOperand, PaintType, TilingType}, + Filter, Name, Rect, Ref, +}; + use typst::layout::{Abs, Ratio, Transform}; use typst::utils::Numeric; use typst::visualize::{Pattern, RelativeTo}; -use crate::color::PaintEncode; -use crate::page::{construct_page, PageContext, PageResource, ResourceKind, Transforms}; -use crate::{transform_to_array, PdfContext}; +use crate::{color::PaintEncode, resources::Remapper, Resources, WithGlobalRefs}; +use crate::{content, resources::ResourcesRefs}; +use crate::{transform_to_array, PdfChunk}; /// Writes the actual patterns (tiling patterns) to the PDF. /// This is performed once after writing all pages. -pub(crate) fn write_patterns(ctx: &mut PdfContext) { - for PdfPattern { transform, pattern, content, resources } in ctx.pattern_map.items() { - let tiling = ctx.alloc.bump(); - ctx.pattern_refs.push(tiling); +pub fn write_patterns(context: &WithGlobalRefs) -> (PdfChunk, HashMap) { + let mut chunk = PdfChunk::new(); + let mut out = HashMap::new(); + context.resources.traverse(&mut |resources| { + let Some(patterns) = &resources.patterns else { + return; + }; - let mut tiling_pattern = ctx.pdf.tiling_pattern(tiling, content); - tiling_pattern - .tiling_type(TilingType::ConstantSpacing) - .paint_type(PaintType::Colored) - .bbox(Rect::new( - 0.0, - 0.0, - pattern.size().x.to_pt() as _, - pattern.size().y.to_pt() as _, - )) - .x_step((pattern.size().x + pattern.spacing().x).to_pt() as _) - .y_step((pattern.size().y + pattern.spacing().y).to_pt() as _); + for pdf_pattern in patterns.remapper.items() { + let PdfPattern { transform, pattern, content, .. } = pdf_pattern; + if out.contains_key(pdf_pattern) { + continue; + } - let mut resources_map = tiling_pattern.resources(); + let tiling = chunk.alloc(); + out.insert(pdf_pattern.clone(), tiling); - resources_map.x_objects().pairs( - resources - .iter() - .filter(|(res, _)| res.is_x_object()) - .map(|(res, ref_)| (res.name(), ctx.image_refs[*ref_])), - ); + let mut tiling_pattern = chunk.tiling_pattern(tiling, content); + tiling_pattern + .tiling_type(TilingType::ConstantSpacing) + .paint_type(PaintType::Colored) + .bbox(Rect::new( + 0.0, + 0.0, + pattern.size().x.to_pt() as _, + pattern.size().y.to_pt() as _, + )) + .x_step((pattern.size().x + pattern.spacing().x).to_pt() as _) + .y_step((pattern.size().y + pattern.spacing().y).to_pt() as _); - resources_map.fonts().pairs( - resources - .iter() - .filter(|(res, _)| res.is_font()) - .map(|(res, ref_)| (res.name(), ctx.font_refs[*ref_])), - ); + // The actual resource dict will be written in a later step + tiling_pattern.pair(Name(b"Resources"), patterns.resources.reference); - ctx.colors - .write_color_spaces(resources_map.color_spaces(), &mut ctx.alloc); + tiling_pattern + .matrix(transform_to_array( + transform + .pre_concat(Transform::scale(Ratio::one(), -Ratio::one())) + .post_concat(Transform::translate( + Abs::zero(), + pattern.spacing().y, + )), + )) + .filter(Filter::FlateDecode); + } + }); - resources_map - .patterns() - .pairs( - resources - .iter() - .filter(|(res, _)| res.is_pattern()) - .map(|(res, ref_)| (res.name(), ctx.pattern_refs[*ref_])), - ) - .pairs( - resources - .iter() - .filter(|(res, _)| res.is_gradient()) - .map(|(res, ref_)| (res.name(), ctx.gradient_refs[*ref_])), - ); - - resources_map.ext_g_states().pairs( - resources - .iter() - .filter(|(res, _)| res.is_ext_g_state()) - .map(|(res, ref_)| (res.name(), ctx.ext_gs_refs[*ref_])), - ); - - resources_map.finish(); - tiling_pattern - .matrix(transform_to_array( - transform - .pre_concat(Transform::scale(Ratio::one(), -Ratio::one())) - .post_concat(Transform::translate(Abs::zero(), pattern.spacing().y)), - )) - .filter(Filter::FlateDecode); - } + (chunk, out) } /// A pattern and its transform. -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct PdfPattern { /// The transform to apply to the pattern. pub transform: Transform, @@ -90,17 +74,20 @@ pub struct PdfPattern { pub pattern: Pattern, /// The rendered pattern. pub content: Vec, - /// The resources used by the pattern. - pub resources: Vec<(PageResource, usize)>, } /// Registers a pattern with the PDF. fn register_pattern( - ctx: &mut PageContext, + ctx: &mut content::Builder, pattern: &Pattern, on_text: bool, - mut transforms: Transforms, + mut transforms: content::Transforms, ) -> usize { + let patterns = ctx + .resources + .patterns + .get_or_insert_with(|| Box::new(PatternRemapper::new())); + // Edge cases for strokes. if transforms.size.x.is_zero() { transforms.size.x = Abs::pt(1.0); @@ -116,22 +103,24 @@ fn register_pattern( }; // Render the body. - let content = construct_page(ctx.parent, pattern.frame()); + let content = content::build(&mut patterns.resources, pattern.frame(), None); - let mut pdf_pattern = PdfPattern { + let pdf_pattern = PdfPattern { transform, pattern: pattern.clone(), content: content.content.wait().clone(), - resources: content.resources.into_iter().collect(), }; - pdf_pattern.resources.sort(); - - ctx.parent.pattern_map.insert(pdf_pattern) + patterns.remapper.insert(pdf_pattern) } impl PaintEncode for Pattern { - fn set_as_fill(&self, ctx: &mut PageContext, on_text: bool, transforms: Transforms) { + fn set_as_fill( + &self, + ctx: &mut content::Builder, + on_text: bool, + transforms: content::Transforms, + ) { ctx.reset_fill_color_space(); let index = register_pattern(ctx, self, on_text, transforms); @@ -140,15 +129,13 @@ impl PaintEncode for Pattern { ctx.content.set_fill_color_space(ColorSpaceOperand::Pattern); ctx.content.set_fill_pattern(None, name); - ctx.resources - .insert(PageResource::new(ResourceKind::Pattern, id), index); } fn set_as_stroke( &self, - ctx: &mut PageContext, + ctx: &mut content::Builder, on_text: bool, - transforms: Transforms, + transforms: content::Transforms, ) { ctx.reset_stroke_color_space(); @@ -158,7 +145,30 @@ impl PaintEncode for Pattern { ctx.content.set_stroke_color_space(ColorSpaceOperand::Pattern); ctx.content.set_stroke_pattern(None, name); - ctx.resources - .insert(PageResource::new(ResourceKind::Pattern, id), index); + } +} + +/// De-duplicate patterns and the resources they require to be drawn. +pub struct PatternRemapper { + /// Pattern de-duplicator. + pub remapper: Remapper, + /// PDF resources that are used by these patterns. + pub resources: Resources, +} + +impl PatternRemapper<()> { + pub fn new() -> Self { + Self { + remapper: Remapper::new("P"), + resources: Resources::default(), + } + } + + /// Allocate a reference to the resource dictionary of these patterns. + pub fn with_refs(self, refs: &ResourcesRefs) -> PatternRemapper { + PatternRemapper { + remapper: self.remapper, + resources: self.resources.with_refs(refs), + } } } diff --git a/crates/typst-pdf/src/resources.rs b/crates/typst-pdf/src/resources.rs new file mode 100644 index 000000000..a0a7c71d6 --- /dev/null +++ b/crates/typst-pdf/src/resources.rs @@ -0,0 +1,325 @@ +//! PDF resources. +//! +//! Resources are defined in dictionaries. They map identifiers such as `Im0` to +//! a PDF reference. Each [content stream] is associated with a resource dictionary. +//! The identifiers defined in the resources can then be used in content streams. +//! +//! [content stream]: `crate::content` + +use std::collections::{BTreeMap, HashMap}; +use std::hash::Hash; + +use ecow::{eco_format, EcoString}; +use pdf_writer::{Dict, Finish, Name, Ref}; +use typst::text::Lang; +use typst::{text::Font, utils::Deferred, visualize::Image}; + +use crate::{ + color::ColorSpaces, color_font::ColorFontMap, extg::ExtGState, gradient::PdfGradient, + image::EncodedImage, pattern::PatternRemapper, PdfChunk, Renumber, WithEverything, + WithResources, +}; + +/// All the resources that have been collected when traversing the document. +/// +/// This does not allocate references to resources, only track what was used +/// and deduplicate what can be deduplicated. +/// +/// You may notice that this structure is a tree: [`PatternRemapper`] and +/// [`ColorFontMap`] (that are present in the fields of [`Resources`]), +/// themselves contain [`Resources`] (that will be called "sub-resources" from +/// now on). Because color glyphs and patterns are defined using content +/// streams, just like pages, they can refer to resources too, which are tracked +/// by the respective sub-resources. +/// +/// Each instance of this structure will become a `/Resources` dictionary in +/// the final PDF. It is not possible to use a single shared dictionary for all +/// pages, patterns and color fonts, because if a resource is listed in its own +/// `/Resources` dictionary, some PDF readers will fail to open the document. +/// +/// Because we need to lazily initialize sub-resources (we don't know how deep +/// the tree will be before reading the document), and that this is done in a +/// context where no PDF reference allocator is available, `Resources` are +/// originally created with the type parameter `R = ()`. The reference for each +/// dictionary will only be allocated in the next phase, once we know the shape +/// of the tree, at which point `R` becomes `Ref`. No other value of `R` should +/// ever exist. +pub struct Resources { + /// The global reference to this resource dictionary, or `()` if it has not + /// been allocated yet. + pub reference: R, + + /// Handles color space writing. + pub colors: ColorSpaces, + + /// Deduplicates fonts used across the document. + pub fonts: Remapper, + /// Deduplicates images used across the document. + pub images: Remapper, + /// Handles to deferred image conversions. + pub deferred_images: HashMap>, + /// Deduplicates gradients used across the document. + pub gradients: Remapper, + /// Deduplicates patterns used across the document. + pub patterns: Option>>, + /// Deduplicates external graphics states used across the document. + pub ext_gs: Remapper, + /// Deduplicates color glyphs. + pub color_fonts: Option>>, + + // The fields below do not correspond to actual resources that will be + // written in a dictionary, but are more meta-data about resources that + // can't really live somewhere else. + /// The number of glyphs for all referenced languages in the content stream. + /// We keep track of this to determine the main document language. + /// BTreeMap is used to write sorted list of languages to metadata. + pub languages: BTreeMap, + + /// For each font a mapping from used glyphs to their text representation. + /// May contain multiple chars in case of ligatures or similar things. The + /// same glyph can have a different text representation within one document, + /// then we just save the first one. The resulting strings are used for the + /// PDF's /ToUnicode map for glyphs that don't have an entry in the font's + /// cmap. This is important for copy-paste and searching. + pub glyph_sets: HashMap>, +} + +impl Renumber for Resources { + fn renumber(&mut self, offset: i32) { + self.reference.renumber(offset); + + if let Some(color_fonts) = &mut self.color_fonts { + color_fonts.resources.renumber(offset); + } + + if let Some(patterns) = &mut self.patterns { + patterns.resources.renumber(offset); + } + } +} + +impl Default for Resources<()> { + fn default() -> Self { + Resources { + reference: (), + colors: ColorSpaces::default(), + fonts: Remapper::new("F"), + images: Remapper::new("Im"), + deferred_images: HashMap::new(), + gradients: Remapper::new("Gr"), + patterns: None, + ext_gs: Remapper::new("Gs"), + color_fonts: None, + languages: BTreeMap::new(), + glyph_sets: HashMap::new(), + } + } +} + +impl Resources<()> { + /// Associate a reference with this resource dictionary (and do so + /// recursively for sub-resources). + pub fn with_refs(self, refs: &ResourcesRefs) -> Resources { + Resources { + reference: refs.reference, + colors: self.colors, + fonts: self.fonts, + images: self.images, + deferred_images: self.deferred_images, + gradients: self.gradients, + patterns: self + .patterns + .zip(refs.patterns.as_ref()) + .map(|(p, r)| Box::new(p.with_refs(r))), + ext_gs: self.ext_gs, + color_fonts: self + .color_fonts + .zip(refs.color_fonts.as_ref()) + .map(|(c, r)| Box::new(c.with_refs(r))), + languages: self.languages, + glyph_sets: self.glyph_sets, + } + } +} + +impl Resources { + /// Run a function on this resource dictionary and all + /// of its sub-resources. + pub fn traverse

(&self, process: &mut P) + where + P: FnMut(&Self), + { + process(self); + if let Some(color_fonts) = &self.color_fonts { + color_fonts.resources.traverse(process) + } + if let Some(patterns) = &self.patterns { + patterns.resources.traverse(process) + } + } +} + +/// References for a resource tree. +/// +/// This structure is a tree too, that should have the same structure as the +/// corresponding `Resources`. +pub struct ResourcesRefs { + pub reference: Ref, + pub color_fonts: Option>, + pub patterns: Option>, +} + +impl Renumber for ResourcesRefs { + fn renumber(&mut self, offset: i32) { + self.reference.renumber(offset); + if let Some(color_fonts) = &mut self.color_fonts { + color_fonts.renumber(offset); + } + if let Some(patterns) = &mut self.patterns { + patterns.renumber(offset); + } + } +} + +/// Allocate references for all resource dictionaries. +pub fn alloc_resources_refs(context: &WithResources) -> (PdfChunk, ResourcesRefs) { + let mut chunk = PdfChunk::new(); + /// Recursively explore resource dictionaries and assign them references. + fn refs_for(resources: &Resources<()>, chunk: &mut PdfChunk) -> ResourcesRefs { + ResourcesRefs { + reference: chunk.alloc(), + color_fonts: resources + .color_fonts + .as_ref() + .map(|c| Box::new(refs_for(&c.resources, chunk))), + patterns: resources + .patterns + .as_ref() + .map(|p| Box::new(refs_for(&p.resources, chunk))), + } + } + + let refs = refs_for(&context.resources, &mut chunk); + (chunk, refs) +} + +/// Write the resource dictionaries that will be referenced by all pages. +/// +/// We add a reference to this dictionary to each page individually instead of +/// to the root node of the page tree because using the resource inheritance +/// feature breaks PDF merging with Apple Preview. +/// +/// Also write resource dictionaries for Type3 fonts and patterns. +pub fn write_resource_dictionaries(ctx: &WithEverything) -> (PdfChunk, ()) { + let mut chunk = PdfChunk::new(); + let mut used_color_spaces = ColorSpaces::default(); + + ctx.resources.traverse(&mut |resources| { + used_color_spaces.merge(&resources.colors); + + let images_ref = chunk.alloc.bump(); + let patterns_ref = chunk.alloc.bump(); + let ext_gs_states_ref = chunk.alloc.bump(); + let color_spaces_ref = chunk.alloc.bump(); + + let mut color_font_slices = Vec::new(); + let mut color_font_numbers = HashMap::new(); + if let Some(color_fonts) = &resources.color_fonts { + for (_, font_slice) in color_fonts.iter() { + color_font_numbers.insert(font_slice.clone(), color_font_slices.len()); + color_font_slices.push(font_slice); + } + } + let color_font_remapper = Remapper { + prefix: "Cf", + to_pdf: color_font_numbers, + to_items: color_font_slices, + }; + + resources + .images + .write(&ctx.references.images, &mut chunk.indirect(images_ref).dict()); + + let mut patterns_dict = chunk.indirect(patterns_ref).dict(); + resources + .gradients + .write(&ctx.references.gradients, &mut patterns_dict); + if let Some(p) = &resources.patterns { + p.remapper.write(&ctx.references.patterns, &mut patterns_dict); + } + patterns_dict.finish(); + + resources + .ext_gs + .write(&ctx.references.ext_gs, &mut chunk.indirect(ext_gs_states_ref).dict()); + + let mut res_dict = chunk + .indirect(resources.reference) + .start::(); + res_dict.pair(Name(b"XObject"), images_ref); + res_dict.pair(Name(b"Pattern"), patterns_ref); + res_dict.pair(Name(b"ExtGState"), ext_gs_states_ref); + res_dict.pair(Name(b"ColorSpace"), color_spaces_ref); + + // TODO: can't this be an indirect reference too? + let mut fonts_dict = res_dict.fonts(); + resources.fonts.write(&ctx.references.fonts, &mut fonts_dict); + color_font_remapper.write(&ctx.references.color_fonts, &mut fonts_dict); + fonts_dict.finish(); + + res_dict.finish(); + + let color_spaces = chunk.indirect(color_spaces_ref).dict(); + resources + .colors + .write_color_spaces(color_spaces, &ctx.globals.color_functions); + }); + + used_color_spaces.write_functions(&mut chunk, &ctx.globals.color_functions); + + (chunk, ()) +} + +/// Assigns new, consecutive PDF-internal indices to items. +pub struct Remapper { + /// The prefix to use when naming these resources. + prefix: &'static str, + /// Forwards from the items to the pdf indices. + to_pdf: HashMap, + /// Backwards from the pdf indices to the items. + to_items: Vec, +} + +impl Remapper +where + T: Eq + Hash + Clone, +{ + /// Create an empty mapping. + pub fn new(prefix: &'static str) -> Self { + Self { prefix, to_pdf: HashMap::new(), to_items: vec![] } + } + + /// Insert an item in the mapping if it was not already present. + pub fn insert(&mut self, item: T) -> usize { + let to_layout = &mut self.to_items; + *self.to_pdf.entry(item.clone()).or_insert_with(|| { + let pdf_index = to_layout.len(); + to_layout.push(item); + pdf_index + }) + } + + /// All items in this + pub fn items(&self) -> impl Iterator + '_ { + self.to_items.iter() + } + + /// Write this list of items in a Resource dictionary. + fn write(&self, mapping: &HashMap, dict: &mut Dict) { + for (number, item) in self.items().enumerate() { + let name = eco_format!("{}{}", self.prefix, number); + let reference = mapping[item]; + dict.pair(Name(name.as_bytes()), reference); + } + } +}