From 821536b253bed0753eb031d4cc7fbd407dce9f39 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 25 Aug 2021 01:45:17 +0200 Subject: [PATCH] Subset glyf and loca tables --- src/export/pdf.rs | 34 ++++-- src/export/subset.rs | 260 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 244 insertions(+), 50 deletions(-) diff --git a/src/export/pdf.rs b/src/export/pdf.rs index 433dc8443..9b9d0e8e6 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -1,7 +1,7 @@ //! Exporting into PDF documents. use std::cmp::Eq; -use std::collections::{BTreeSet, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::hash::Hash; use std::rc::Rc; @@ -38,12 +38,14 @@ struct PdfExporter<'a> { frames: &'a [Rc], fonts: &'a FontStore, images: &'a ImageStore, + glyphs: HashMap>, font_map: Remapper, image_map: Remapper, } impl<'a> PdfExporter<'a> { fn new(ctx: &'a Context, frames: &'a [Rc]) -> Self { + let mut glyphs = HashMap::>::new(); let mut font_map = Remapper::new(); let mut image_map = Remapper::new(); let mut alpha_masks = 0; @@ -51,7 +53,11 @@ impl<'a> PdfExporter<'a> { for frame in frames { for (_, element) in frame.elements() { match *element { - Element::Text(ref text) => font_map.insert(text.face_id), + Element::Text(ref text) => { + font_map.insert(text.face_id); + let set = glyphs.entry(text.face_id).or_default(); + set.extend(text.glyphs.iter().map(|g| g.id)); + } Element::Geometry(_, _) => {} Element::Image(id, _) => { let img = ctx.images.get(id); @@ -74,6 +80,7 @@ impl<'a> PdfExporter<'a> { frames, fonts: &ctx.fonts, images: &ctx.images, + glyphs, font_map, image_map, } @@ -278,6 +285,7 @@ impl<'a> PdfExporter<'a> { fn write_fonts(&mut self) { for (refs, face_id) in self.refs.fonts().zip(self.font_map.layout_indices()) { + let glyphs = &self.glyphs[&face_id]; let face = self.fonts.get(face_id); let ttf = face.ttf(); @@ -370,15 +378,19 @@ impl<'a> PdfExporter<'a> { // unicode codepoints to enable copying out of the PDF. self.writer.cmap(refs.cmap, &{ // Deduplicate glyph-to-unicode mappings with a set. - let mut mapping = BTreeSet::new(); + let mut mapping = BTreeMap::new(); for subtable in ttf.character_mapping_subtables() { - subtable.codepoints(|n| { - if let Some(c) = std::char::from_u32(n) { - if let Some(g) = ttf.glyph_index(c) { - mapping.insert((g.0, c)); + if subtable.is_unicode() { + subtable.codepoints(|n| { + if let Some(c) = std::char::from_u32(n) { + if let Some(GlyphId(g)) = ttf.glyph_index(c) { + if glyphs.contains(&g) { + mapping.insert(g, c); + } + } } - } - }) + }); + } } let mut cmap = UnicodeCmap::new(cmap_name, system_info); @@ -388,9 +400,9 @@ impl<'a> PdfExporter<'a> { cmap.finish() }); - // Susbet and write the face's bytes. + // Subset and write the face's bytes. let original = face.buffer(); - let subsetted = subset(original, face.index()); + let subsetted = subset(original, face.index(), glyphs.iter().copied()); let data = subsetted.as_deref().unwrap_or(original); self.writer.stream(refs.data, data); } diff --git a/src/export/subset.rs b/src/export/subset.rs index bf62502bb..87e0c6802 100644 --- a/src/export/subset.rs +++ b/src/export/subset.rs @@ -1,21 +1,31 @@ //! Font subsetting. use std::borrow::Cow; +use std::collections::HashSet; +use std::convert::TryInto; -use ttf_parser::parser::{FromData, LazyArray16, Offset, Offset32, Stream}; +use ttf_parser::parser::{ + FromData, LazyArray16, LazyArray32, Offset16, Offset32, Stream, F2DOT14, +}; use ttf_parser::{Face, Tag}; /// Subset a font face. /// +/// This will remove the outlines of all glyphs that are not part of the given +/// iterator. Furthmore, all character mapping and layout tables are dropped as +/// shaping has already happened. +/// /// Returns `None` if the font data is invalid. -pub fn subset(data: &[u8], index: u32) -> Option> { - let mut s = Subsetter::new(data, index)?; - s.subset()?; - Some(s.encode()) +pub fn subset(data: &[u8], index: u32, glyphs: I) -> Option> +where + I: IntoIterator, +{ + Subsetter::new(data, index, glyphs.into_iter().collect())?.subset() } struct Subsetter<'a> { face: Face<'a>, + glyphs: Vec, magic: Magic, records: LazyArray16<'a, TableRecord>, tables: Vec<(Tag, Cow<'a, [u8]>)>, @@ -23,7 +33,7 @@ struct Subsetter<'a> { impl<'a> Subsetter<'a> { /// Parse the font header and create a new subsetter. - fn new(data: &'a [u8], index: u32) -> Option { + fn new(data: &'a [u8], index: u32, glyphs: Vec) -> Option { let face = Face::from_slice(data, index).ok()?; let mut s = Stream::new(&data); @@ -53,40 +63,21 @@ impl<'a> Subsetter<'a> { // Read the table records. let records = s.read_array16::(count)?; - Some(Self { face, magic, records, tables: vec![] }) - } - - /// Subset, drop and copy tables. - fn subset(&mut self) -> Option<()> { - for record in self.records { - let tag = record.tag; - let data = self.face.table_data(tag)?; - - match &tag.to_bytes() { - // Glyphs are already mapped. - b"cmap" => {} - - // Layout is already finished. - b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | b"ankr" | b"kern" - | b"kerx" | b"mort" | b"morx" | b"trak" | b"bsln" | b"just" - | b"feat" | b"prop" => {} - - // TODO: Subset. - // b"loca" => {} - // b"glyf" => {} - // b"sbix" => {} - // b"SVG " => {} - // b"post" => {} - - // All other tables are simply copied. - _ => self.tables.push((tag, Cow::Borrowed(data))), - } - } - Some(()) + Some(Self { + face, + glyphs, + magic, + records, + tables: vec![], + }) } /// Encode the subsetted font file. - fn encode(mut self) -> Vec { + fn subset(mut self) -> Option> { + // Subset the individual tables and save them in `self.tables`. + self.subset_tables()?; + + // Start writing a brand new font. let mut w = Vec::new(); w.write(self.magic); @@ -111,7 +102,7 @@ impl<'a> Subsetter<'a> { // Write table records. let mut offset = 12 + self.tables.len() * TableRecord::SIZE; for (tag, data) in &mut self.tables { - if *tag == Tag::from_bytes(b"head") { + if *tag == tg(b"head") { // Zero out checksum field in head table. data.to_mut()[8 .. 12].copy_from_slice(&[0; 4]); checksum_adjustment_offset = Some(offset + 8); @@ -143,8 +134,52 @@ impl<'a> Subsetter<'a> { w[i .. i + 4].copy_from_slice(&val.to_be_bytes()); } - w + Some(w) } + + /// Subset, drop and copy tables. + fn subset_tables(&mut self) -> Option<()> { + for record in self.records { + let tag = record.tag; + let data = self.face.table_data(tag)?; + + match &tag.to_bytes() { + // Glyphs are already mapped. + b"cmap" => {} + + // Layout is already finished. + b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | b"ankr" | b"kern" + | b"kerx" | b"mort" | b"morx" | b"trak" | b"bsln" | b"just" + | b"feat" | b"prop" => {} + + // Loca is created when subsetting glyf. + b"loca" => {} + b"glyf" => { + let head = self.face.table_data(tg(b"head"))?; + let short = Stream::read_at::(head, 50)? == 0; + if short { + self.subset_glyf_loca::(); + } else { + self.subset_glyf_loca::(); + } + } + + // TODO: Subset. + // b"sbix" => {} + // b"SVG " => {} + // b"post" => {} + + // All other tables are simply copied. + _ => self.tables.push((tag, Cow::Borrowed(data))), + } + } + Some(()) + } +} + +/// Helper function to create a tag from bytes. +fn tg(bytes: &[u8; 4]) -> Tag { + Tag::from_bytes(bytes) } /// Calculate a checksum over the sliced data as sum of u32's. The data length @@ -187,12 +222,24 @@ impl ToData for u16 { } } +impl ToData for Offset16 { + fn write(&self, data: &mut Vec) { + self.0.write(data); + } +} + impl ToData for u32 { fn write(&self, data: &mut Vec) { data.extend(&self.to_be_bytes()); } } +impl ToData for Offset32 { + fn write(&self, data: &mut Vec) { + self.0.write(data); + } +} + impl ToData for Tag { fn write(&self, data: &mut Vec) { self.as_u32().write(data); @@ -262,3 +309,138 @@ impl ToData for TableRecord { self.length.write(data); } } + +impl Subsetter<'_> { + /// Subset the glyf and loca tables. + fn subset_glyf_loca(&mut self) -> Option<()> { + let loca = self.face.table_data(tg(b"loca"))?; + let glyf = self.face.table_data(tg(b"glyf"))?; + + let offsets = LazyArray32::::new(loca); + let slice = |id: u16| { + let from = offsets.get(u32::from(id))?.to_usize(); + let to = offsets.get(u32::from(id) + 1)?.to_usize(); + glyf.get(from .. to) + }; + + // To compute the set of all glyphs we want to keep, we use a work stack + // containing glyphs whose components we still need to consider. + let mut glyphs = HashSet::new(); + let mut work: Vec = std::mem::take(&mut self.glyphs); + + // Always include the notdef glyph. + work.push(0); + + // Find composite glyph descriptions. + while let Some(id) = work.pop() { + if glyphs.insert(id) { + let mut s = Stream::new(slice(id)?); + if let Some(num_contours) = s.read::() { + // Negative means this is a composite glyph. + if num_contours < 0 { + // Skip min/max metrics. + s.read::(); + s.read::(); + s.read::(); + s.read::(); + + // Read component glyphs. + work.extend(component_glyphs(s)); + } + } + } + } + + let mut sub_loca = vec![]; + let mut sub_glyf = vec![]; + + for id in 0 .. self.face.number_of_glyphs() { + sub_loca.write(T::from_usize(sub_glyf.len())?); + + // If the glyph shouldn't be contained in the subset, it will still + // get a loca entry, but the glyf data is simply empty. + if glyphs.contains(&id) { + sub_glyf.extend(slice(id)?); + } + } + + sub_loca.write(T::from_usize(sub_glyf.len())?); + + self.tables.push((tg(b"loca"), Cow::Owned(sub_loca))); + self.tables.push((tg(b"glyf"), Cow::Owned(sub_glyf))); + + Some(()) + } +} + +/// Offsets for loca table. +trait LocaOffset: Sized + FromData + ToData { + fn to_usize(self) -> usize; + fn from_usize(offset: usize) -> Option; +} + +impl LocaOffset for Offset16 { + fn to_usize(self) -> usize { + 2 * usize::from(self.0) + } + + fn from_usize(offset: usize) -> Option { + if offset % 2 == 0 { + (offset / 2).try_into().ok().map(Self) + } else { + None + } + } +} + +impl LocaOffset for Offset32 { + fn to_usize(self) -> usize { + self.0 as usize + } + + fn from_usize(offset: usize) -> Option { + offset.try_into().ok().map(Self) + } +} + +/// Returns an iterator over the component glyphs referenced by the given +/// `glyf` table composite glyph description. +fn component_glyphs(mut s: Stream) -> impl Iterator + '_ { + const ARG_1_AND_2_ARE_WORDS: u16 = 0x0001; + const WE_HAVE_A_SCALE: u16 = 0x0008; + const MORE_COMPONENTS: u16 = 0x0020; + const WE_HAVE_AN_X_AND_Y_SCALE: u16 = 0x0040; + const WE_HAVE_A_TWO_BY_TWO: u16 = 0x0080; + + let mut done = false; + std::iter::from_fn(move || { + if done { + return None; + } + + let flags = s.read::()?; + let component = s.read::()?; + + if flags & ARG_1_AND_2_ARE_WORDS != 0 { + s.skip::(); + s.skip::(); + } else { + s.skip::(); + } + + if flags & WE_HAVE_A_SCALE != 0 { + s.skip::(); + } else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 { + s.skip::(); + s.skip::(); + } else if flags & WE_HAVE_A_TWO_BY_TWO != 0 { + s.skip::(); + s.skip::(); + s.skip::(); + s.skip::(); + } + + done = flags & MORE_COMPONENTS == 0; + Some(component) + }) +}