diff --git a/Cargo.lock b/Cargo.lock index 72f757cdc..3e99ea814 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2255,15 +2255,13 @@ dependencies = [ [[package]] name = "subsetter" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09eab8a83bff89ba2200bd4c59be45c7c787f988431b936099a5a266c957f2f9" +version = "0.11.0" +source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350" [[package]] name = "svg2pdf" version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31565956eb1dc398c0d9776ee1d1bac4e34759af63dcbe0520df32313a5b53b" +source = "git+https://github.com/typst/svg2pdf?rev=39f8ad3#39f8ad3b35e14cfcabf3d5d916899f7ac78790f7" dependencies = [ "fontdb", "image 0.25.1", diff --git a/Cargo.toml b/Cargo.toml index 367e835a4..ee50b6667 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -98,8 +98,8 @@ shell-escape = "0.1.5" siphasher = "1" smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] } stacker = "0.1.15" -subsetter = "0.1.1" -svg2pdf = "0.11.0" +subsetter = { git = "https://github.com/typst/subsetter", rev = "4e0058b" } +svg2pdf = { git = "https://github.com/typst/svg2pdf", rev = "39f8ad3" } syn = { version = "2", features = ["full", "extra-traits"] } syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] } tar = "0.4" diff --git a/crates/typst-pdf/src/color_font.rs b/crates/typst-pdf/src/color_font.rs index f4621ca0c..201915b19 100644 --- a/crates/typst-pdf/src/color_font.rs +++ b/crates/typst-pdf/src/color_font.rs @@ -115,7 +115,7 @@ pub fn write_color_fonts( pdf_font.finish(); // Encode a CMAP to make it possible to search or copy glyphs. - let glyph_set = resources.glyph_sets.get(&font_slice.font).unwrap(); + let glyph_set = resources.color_glyph_sets.get(&font_slice.font).unwrap(); let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); for (index, glyph) in subset.iter().enumerate() { let Some(text) = glyph_set.get(&glyph.gid) else { diff --git a/crates/typst-pdf/src/content.rs b/crates/typst-pdf/src/content.rs index c5327c188..8ae2c424d 100644 --- a/crates/typst-pdf/src/content.rs +++ b/crates/typst-pdf/src/content.rs @@ -476,6 +476,12 @@ fn write_normal_text(ctx: &mut Builder, pos: Point, text: TextItemView) { let mut adjustment = Em::zero(); let mut encoded = vec![]; + let glyph_remapper = ctx + .resources + .glyph_remappers + .entry(text.item.font.clone()) + .or_default(); + // Write the glyphs with kerning adjustments. for glyph in text.glyphs() { adjustment += glyph.x_offset; @@ -490,7 +496,26 @@ fn write_normal_text(ctx: &mut Builder, pos: Point, text: TextItemView) { adjustment = Em::zero(); } - let cid = crate::font::glyph_cid(&text.item.font, glyph.id); + // In PDF, we use CIDs to index the glyphs in a font, not GIDs. What a + // CID actually refers to depends on the type of font we are embedding: + // + // - For TrueType fonts, the CIDs are defined by an external mapping. + // - For SID-keyed CFF fonts, the CID is the same as the GID in the font. + // - For CID-keyed CFF fonts, the CID refers to the CID in the font. + // + // (See in the PDF-spec for more details on this.) + // + // However, in our case: + // - We use the identity-mapping for TrueType fonts. + // - SID-keyed fonts will get converted into CID-keyed fonts by the + // subsetter. + // - CID-keyed fonts will be rewritten in a way so that the mapping + // between CID and GID is always the identity mapping, regardless of + // the mapping before. + // + // Because of this, we can always use the remapped GID as the CID, + // regardless of which type of font we are actually embedding. + let cid = glyph_remapper.remap(glyph.id); encoded.push((cid >> 8) as u8); encoded.push((cid & 0xff) as u8); @@ -523,7 +548,11 @@ fn write_color_glyphs(ctx: &mut Builder, pos: Point, text: TextItemView) { // displays regular glyphs and not color glyphs. ctx.state.font = None; - let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default(); + let glyph_set = ctx + .resources + .color_glyph_sets + .entry(text.item.font.clone()) + .or_default(); for glyph in text.glyphs() { // Retrieve the Type3 font reference and the glyph index in the font. diff --git a/crates/typst-pdf/src/font.rs b/crates/typst-pdf/src/font.rs index 22c3d22fe..6c6e76823 100644 --- a/crates/typst-pdf/src/font.rs +++ b/crates/typst-pdf/src/font.rs @@ -8,6 +8,7 @@ use pdf_writer::{ writers::FontDescriptor, Chunk, Filter, Finish, Name, Rect, Ref, Str, }; +use subsetter::GlyphRemapper; use ttf_parser::{name_id, GlyphId, Tag}; use typst::text::Font; use typst::utils::SliceExt; @@ -43,6 +44,7 @@ pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap) { out.insert(font.clone(), type0_ref); let glyph_set = resources.glyph_sets.get(font).unwrap(); + let glyph_remapper = resources.glyph_remappers.get(font).unwrap(); let ttf = font.ttf(); // Do we have a TrueType or CFF font? @@ -87,16 +89,15 @@ pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap) { } // Extract the widths of all glyphs. - let mut widths = vec![]; - for gid in std::iter::once(0).chain(glyph_set.keys().copied()) { - let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0); - let units = font.to_em(width).to_font_units(); - let cid = glyph_cid(font, gid); - if usize::from(cid) >= widths.len() { - widths.resize(usize::from(cid) + 1, 0.0); - widths[usize::from(cid)] = units; - } - } + // `remapped_gids` returns an iterator over the old GIDs in their new sorted + // order, so we can append the widths as is. + let widths = glyph_remapper + .remapped_gids() + .map(|gid| { + let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0); + font.to_em(width).to_font_units() + }) + .collect::>(); // Write all non-zero glyph widths. let mut first = 0; @@ -115,19 +116,15 @@ pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap) { // Write the /ToUnicode character map, which maps glyph ids back to // unicode codepoints to enable copying out of the PDF. - let cmap = create_cmap(font, glyph_set); + let cmap = create_cmap(glyph_set, glyph_remapper); chunk.cmap(cmap_ref, &cmap.finish()); - // Subset and write the font's bytes. - let glyphs: Vec<_> = glyph_set.keys().copied().collect(); - let data = subset_font(font, &glyphs); - - let mut stream = chunk.stream(data_ref, &data); + let subset = subset_font(font, glyph_remapper); + let mut stream = chunk.stream(data_ref, &subset); stream.filter(Filter::FlateDecode); if is_cff { stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C")); } - stream.finish(); let mut font_descriptor = @@ -194,15 +191,18 @@ pub fn write_font_descriptor<'a>( /// Subset a font to the given glyphs. /// -/// - For a font with TrueType outlines, this returns the whole OpenType font. -/// - For a font with CFF outlines, this returns just the CFF font program. +/// - For a font with TrueType outlines, this produces the whole OpenType font. +/// - For a font with CFF outlines, this produces just the CFF font program. +/// +/// In both cases, this returns the already compressed data. #[comemo::memoize] #[typst_macros::time(name = "subset font")] -fn subset_font(font: &Font, glyphs: &[u16]) -> Arc> { +fn subset_font(font: &Font, glyph_remapper: &GlyphRemapper) -> Arc> { let data = font.data(); - let profile = subsetter::Profile::pdf(glyphs); - let subsetted = subsetter::subset(data, font.index(), profile); - let mut data = subsetted.as_deref().unwrap_or(data); + // TODO: Fail export instead of unwrapping once export diagnoistics exist. + let subsetted = subsetter::subset(data, font.index(), glyph_remapper).unwrap(); + + let mut data = subsetted.as_ref(); // Extract the standalone CFF font program if applicable. let raw = ttf_parser::RawFace::parse(data, 0).unwrap(); @@ -259,46 +259,19 @@ pub fn improve_glyph_sets(glyph_sets: &mut HashMap) -> UnicodeCmap { +fn create_cmap( + glyph_set: &BTreeMap, + glyph_remapper: &GlyphRemapper, +) -> UnicodeCmap { // Produce a reverse mapping from glyphs' CIDs to unicode strings. let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); for (&g, text) in glyph_set.iter() { + // See commend in `write_normal_text` for why we can choose the CID this way. + let cid = glyph_remapper.get(g).unwrap(); if !text.is_empty() { - cmap.pair_with_multiple(glyph_cid(font, g), text.chars()); + cmap.pair_with_multiple(cid, text.chars()); } } cmap } - -/// Get the CID for a glyph id. -/// -/// When writing text into a PDF, we have to specify CIDs (character ids) not -/// GIDs (glyph IDs). -/// -/// Most of the time, the mapping between these two is an identity mapping. In -/// particular, for TrueType fonts, the mapping is an identity mapping because -/// of this line above: -/// ```ignore -/// cid.cid_to_gid_map_predefined(Name(b"Identity")); -/// ``` -/// -/// However, CID-keyed CFF fonts may have a non-identity mapping defined in -/// their charset. For those, we must map the glyph IDs in a `TextItem` to CIDs. -/// The font defines the map through its charset. The charset usually maps -/// glyphs to SIDs (string ids) specifying the glyph's name. Not for CID-keyed -/// fonts though! For these, the SIDs are CIDs in disguise. Relevant quote from -/// the CFF spec: -/// -/// > The charset data, although in the same format as non-CIDFonts, will -/// > represent CIDs rather than SIDs, [...] -/// -/// This function performs the mapping from glyph ID to CID. It also works for -/// non CID-keyed fonts. Then, it will simply return the glyph ID. -pub(super) fn glyph_cid(font: &Font, glyph_id: u16) -> u16 { - font.ttf() - .tables() - .cff - .and_then(|cff| cff.glyph_cid(ttf_parser::GlyphId(glyph_id))) - .unwrap_or(glyph_id) -} diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 1d43a43b9..0df67c615 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -183,7 +183,8 @@ fn encode_alpha(raster: &RasterImage) -> (Vec, Filter) { /// Encode an SVG into a chunk of PDF objects. #[typst_macros::time(name = "encode svg")] fn encode_svg(svg: &SvgImage) -> (Chunk, Ref) { - svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default()) + // TODO: Don't unwrap once we have export diagostics. + svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default()).unwrap() } /// A pre-encoded image. diff --git a/crates/typst-pdf/src/page.rs b/crates/typst-pdf/src/page.rs index c6881eb61..2983f504f 100644 --- a/crates/typst-pdf/src/page.rs +++ b/crates/typst-pdf/src/page.rs @@ -53,6 +53,7 @@ pub fn traverse_pages( } improve_glyph_sets(&mut resources.glyph_sets); + improve_glyph_sets(&mut resources.color_glyph_sets); (PdfChunk::new(), (pages, resources)) } diff --git a/crates/typst-pdf/src/resources.rs b/crates/typst-pdf/src/resources.rs index a0a7c71d6..a2cf56878 100644 --- a/crates/typst-pdf/src/resources.rs +++ b/crates/typst-pdf/src/resources.rs @@ -11,6 +11,7 @@ use std::hash::Hash; use ecow::{eco_format, EcoString}; use pdf_writer::{Dict, Finish, Name, Ref}; +use subsetter::GlyphRemapper; use typst::text::Lang; use typst::{text::Font, utils::Deferred, visualize::Image}; @@ -82,6 +83,10 @@ pub struct Resources { /// PDF's /ToUnicode map for glyphs that don't have an entry in the font's /// cmap. This is important for copy-paste and searching. pub glyph_sets: HashMap>, + /// Same as `glyph_sets`, but for color fonts. + pub color_glyph_sets: HashMap>, + /// Stores the glyph remapper for each font for the subsetter. + pub glyph_remappers: HashMap, } impl Renumber for Resources { @@ -112,6 +117,8 @@ impl Default for Resources<()> { color_fonts: None, languages: BTreeMap::new(), glyph_sets: HashMap::new(), + color_glyph_sets: HashMap::new(), + glyph_remappers: HashMap::new(), } } } @@ -138,6 +145,8 @@ impl Resources<()> { .map(|(c, r)| Box::new(c.with_refs(r))), languages: self.languages, glyph_sets: self.glyph_sets, + color_glyph_sets: self.color_glyph_sets, + glyph_remappers: self.glyph_remappers, } } }