diff --git a/src/export/pdf.rs b/src/export/pdf.rs
index 433dc8443..9b9d0e8e6 100644
--- a/src/export/pdf.rs
+++ b/src/export/pdf.rs
@@ -1,7 +1,7 @@
//! Exporting into PDF documents.
use std::cmp::Eq;
-use std::collections::{BTreeSet, HashMap};
+use std::collections::{BTreeMap, HashMap, HashSet};
use std::hash::Hash;
use std::rc::Rc;
@@ -38,12 +38,14 @@ struct PdfExporter<'a> {
frames: &'a [Rc],
fonts: &'a FontStore,
images: &'a ImageStore,
+ glyphs: HashMap>,
font_map: Remapper,
image_map: Remapper,
}
impl<'a> PdfExporter<'a> {
fn new(ctx: &'a Context, frames: &'a [Rc]) -> Self {
+ let mut glyphs = HashMap::>::new();
let mut font_map = Remapper::new();
let mut image_map = Remapper::new();
let mut alpha_masks = 0;
@@ -51,7 +53,11 @@ impl<'a> PdfExporter<'a> {
for frame in frames {
for (_, element) in frame.elements() {
match *element {
- Element::Text(ref text) => font_map.insert(text.face_id),
+ Element::Text(ref text) => {
+ font_map.insert(text.face_id);
+ let set = glyphs.entry(text.face_id).or_default();
+ set.extend(text.glyphs.iter().map(|g| g.id));
+ }
Element::Geometry(_, _) => {}
Element::Image(id, _) => {
let img = ctx.images.get(id);
@@ -74,6 +80,7 @@ impl<'a> PdfExporter<'a> {
frames,
fonts: &ctx.fonts,
images: &ctx.images,
+ glyphs,
font_map,
image_map,
}
@@ -278,6 +285,7 @@ impl<'a> PdfExporter<'a> {
fn write_fonts(&mut self) {
for (refs, face_id) in self.refs.fonts().zip(self.font_map.layout_indices()) {
+ let glyphs = &self.glyphs[&face_id];
let face = self.fonts.get(face_id);
let ttf = face.ttf();
@@ -370,15 +378,19 @@ impl<'a> PdfExporter<'a> {
// unicode codepoints to enable copying out of the PDF.
self.writer.cmap(refs.cmap, &{
// Deduplicate glyph-to-unicode mappings with a set.
- let mut mapping = BTreeSet::new();
+ let mut mapping = BTreeMap::new();
for subtable in ttf.character_mapping_subtables() {
- subtable.codepoints(|n| {
- if let Some(c) = std::char::from_u32(n) {
- if let Some(g) = ttf.glyph_index(c) {
- mapping.insert((g.0, c));
+ if subtable.is_unicode() {
+ subtable.codepoints(|n| {
+ if let Some(c) = std::char::from_u32(n) {
+ if let Some(GlyphId(g)) = ttf.glyph_index(c) {
+ if glyphs.contains(&g) {
+ mapping.insert(g, c);
+ }
+ }
}
- }
- })
+ });
+ }
}
let mut cmap = UnicodeCmap::new(cmap_name, system_info);
@@ -388,9 +400,9 @@ impl<'a> PdfExporter<'a> {
cmap.finish()
});
- // Susbet and write the face's bytes.
+ // Subset and write the face's bytes.
let original = face.buffer();
- let subsetted = subset(original, face.index());
+ let subsetted = subset(original, face.index(), glyphs.iter().copied());
let data = subsetted.as_deref().unwrap_or(original);
self.writer.stream(refs.data, data);
}
diff --git a/src/export/subset.rs b/src/export/subset.rs
index bf62502bb..87e0c6802 100644
--- a/src/export/subset.rs
+++ b/src/export/subset.rs
@@ -1,21 +1,31 @@
//! Font subsetting.
use std::borrow::Cow;
+use std::collections::HashSet;
+use std::convert::TryInto;
-use ttf_parser::parser::{FromData, LazyArray16, Offset, Offset32, Stream};
+use ttf_parser::parser::{
+ FromData, LazyArray16, LazyArray32, Offset16, Offset32, Stream, F2DOT14,
+};
use ttf_parser::{Face, Tag};
/// Subset a font face.
///
+/// This will remove the outlines of all glyphs that are not part of the given
+/// iterator. Furthmore, all character mapping and layout tables are dropped as
+/// shaping has already happened.
+///
/// Returns `None` if the font data is invalid.
-pub fn subset(data: &[u8], index: u32) -> Option> {
- let mut s = Subsetter::new(data, index)?;
- s.subset()?;
- Some(s.encode())
+pub fn subset(data: &[u8], index: u32, glyphs: I) -> Option>
+where
+ I: IntoIterator- ,
+{
+ Subsetter::new(data, index, glyphs.into_iter().collect())?.subset()
}
struct Subsetter<'a> {
face: Face<'a>,
+ glyphs: Vec,
magic: Magic,
records: LazyArray16<'a, TableRecord>,
tables: Vec<(Tag, Cow<'a, [u8]>)>,
@@ -23,7 +33,7 @@ struct Subsetter<'a> {
impl<'a> Subsetter<'a> {
/// Parse the font header and create a new subsetter.
- fn new(data: &'a [u8], index: u32) -> Option {
+ fn new(data: &'a [u8], index: u32, glyphs: Vec) -> Option {
let face = Face::from_slice(data, index).ok()?;
let mut s = Stream::new(&data);
@@ -53,40 +63,21 @@ impl<'a> Subsetter<'a> {
// Read the table records.
let records = s.read_array16::(count)?;
- Some(Self { face, magic, records, tables: vec![] })
- }
-
- /// Subset, drop and copy tables.
- fn subset(&mut self) -> Option<()> {
- for record in self.records {
- let tag = record.tag;
- let data = self.face.table_data(tag)?;
-
- match &tag.to_bytes() {
- // Glyphs are already mapped.
- b"cmap" => {}
-
- // Layout is already finished.
- b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | b"ankr" | b"kern"
- | b"kerx" | b"mort" | b"morx" | b"trak" | b"bsln" | b"just"
- | b"feat" | b"prop" => {}
-
- // TODO: Subset.
- // b"loca" => {}
- // b"glyf" => {}
- // b"sbix" => {}
- // b"SVG " => {}
- // b"post" => {}
-
- // All other tables are simply copied.
- _ => self.tables.push((tag, Cow::Borrowed(data))),
- }
- }
- Some(())
+ Some(Self {
+ face,
+ glyphs,
+ magic,
+ records,
+ tables: vec![],
+ })
}
/// Encode the subsetted font file.
- fn encode(mut self) -> Vec {
+ fn subset(mut self) -> Option> {
+ // Subset the individual tables and save them in `self.tables`.
+ self.subset_tables()?;
+
+ // Start writing a brand new font.
let mut w = Vec::new();
w.write(self.magic);
@@ -111,7 +102,7 @@ impl<'a> Subsetter<'a> {
// Write table records.
let mut offset = 12 + self.tables.len() * TableRecord::SIZE;
for (tag, data) in &mut self.tables {
- if *tag == Tag::from_bytes(b"head") {
+ if *tag == tg(b"head") {
// Zero out checksum field in head table.
data.to_mut()[8 .. 12].copy_from_slice(&[0; 4]);
checksum_adjustment_offset = Some(offset + 8);
@@ -143,8 +134,52 @@ impl<'a> Subsetter<'a> {
w[i .. i + 4].copy_from_slice(&val.to_be_bytes());
}
- w
+ Some(w)
}
+
+ /// Subset, drop and copy tables.
+ fn subset_tables(&mut self) -> Option<()> {
+ for record in self.records {
+ let tag = record.tag;
+ let data = self.face.table_data(tag)?;
+
+ match &tag.to_bytes() {
+ // Glyphs are already mapped.
+ b"cmap" => {}
+
+ // Layout is already finished.
+ b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | b"ankr" | b"kern"
+ | b"kerx" | b"mort" | b"morx" | b"trak" | b"bsln" | b"just"
+ | b"feat" | b"prop" => {}
+
+ // Loca is created when subsetting glyf.
+ b"loca" => {}
+ b"glyf" => {
+ let head = self.face.table_data(tg(b"head"))?;
+ let short = Stream::read_at::(head, 50)? == 0;
+ if short {
+ self.subset_glyf_loca::();
+ } else {
+ self.subset_glyf_loca::();
+ }
+ }
+
+ // TODO: Subset.
+ // b"sbix" => {}
+ // b"SVG " => {}
+ // b"post" => {}
+
+ // All other tables are simply copied.
+ _ => self.tables.push((tag, Cow::Borrowed(data))),
+ }
+ }
+ Some(())
+ }
+}
+
+/// Helper function to create a tag from bytes.
+fn tg(bytes: &[u8; 4]) -> Tag {
+ Tag::from_bytes(bytes)
}
/// Calculate a checksum over the sliced data as sum of u32's. The data length
@@ -187,12 +222,24 @@ impl ToData for u16 {
}
}
+impl ToData for Offset16 {
+ fn write(&self, data: &mut Vec) {
+ self.0.write(data);
+ }
+}
+
impl ToData for u32 {
fn write(&self, data: &mut Vec) {
data.extend(&self.to_be_bytes());
}
}
+impl ToData for Offset32 {
+ fn write(&self, data: &mut Vec) {
+ self.0.write(data);
+ }
+}
+
impl ToData for Tag {
fn write(&self, data: &mut Vec) {
self.as_u32().write(data);
@@ -262,3 +309,138 @@ impl ToData for TableRecord {
self.length.write(data);
}
}
+
+impl Subsetter<'_> {
+ /// Subset the glyf and loca tables.
+ fn subset_glyf_loca(&mut self) -> Option<()> {
+ let loca = self.face.table_data(tg(b"loca"))?;
+ let glyf = self.face.table_data(tg(b"glyf"))?;
+
+ let offsets = LazyArray32::::new(loca);
+ let slice = |id: u16| {
+ let from = offsets.get(u32::from(id))?.to_usize();
+ let to = offsets.get(u32::from(id) + 1)?.to_usize();
+ glyf.get(from .. to)
+ };
+
+ // To compute the set of all glyphs we want to keep, we use a work stack
+ // containing glyphs whose components we still need to consider.
+ let mut glyphs = HashSet::new();
+ let mut work: Vec = std::mem::take(&mut self.glyphs);
+
+ // Always include the notdef glyph.
+ work.push(0);
+
+ // Find composite glyph descriptions.
+ while let Some(id) = work.pop() {
+ if glyphs.insert(id) {
+ let mut s = Stream::new(slice(id)?);
+ if let Some(num_contours) = s.read::() {
+ // Negative means this is a composite glyph.
+ if num_contours < 0 {
+ // Skip min/max metrics.
+ s.read::();
+ s.read::();
+ s.read::();
+ s.read::();
+
+ // Read component glyphs.
+ work.extend(component_glyphs(s));
+ }
+ }
+ }
+ }
+
+ let mut sub_loca = vec![];
+ let mut sub_glyf = vec![];
+
+ for id in 0 .. self.face.number_of_glyphs() {
+ sub_loca.write(T::from_usize(sub_glyf.len())?);
+
+ // If the glyph shouldn't be contained in the subset, it will still
+ // get a loca entry, but the glyf data is simply empty.
+ if glyphs.contains(&id) {
+ sub_glyf.extend(slice(id)?);
+ }
+ }
+
+ sub_loca.write(T::from_usize(sub_glyf.len())?);
+
+ self.tables.push((tg(b"loca"), Cow::Owned(sub_loca)));
+ self.tables.push((tg(b"glyf"), Cow::Owned(sub_glyf)));
+
+ Some(())
+ }
+}
+
+/// Offsets for loca table.
+trait LocaOffset: Sized + FromData + ToData {
+ fn to_usize(self) -> usize;
+ fn from_usize(offset: usize) -> Option;
+}
+
+impl LocaOffset for Offset16 {
+ fn to_usize(self) -> usize {
+ 2 * usize::from(self.0)
+ }
+
+ fn from_usize(offset: usize) -> Option {
+ if offset % 2 == 0 {
+ (offset / 2).try_into().ok().map(Self)
+ } else {
+ None
+ }
+ }
+}
+
+impl LocaOffset for Offset32 {
+ fn to_usize(self) -> usize {
+ self.0 as usize
+ }
+
+ fn from_usize(offset: usize) -> Option {
+ offset.try_into().ok().map(Self)
+ }
+}
+
+/// Returns an iterator over the component glyphs referenced by the given
+/// `glyf` table composite glyph description.
+fn component_glyphs(mut s: Stream) -> impl Iterator
- + '_ {
+ const ARG_1_AND_2_ARE_WORDS: u16 = 0x0001;
+ const WE_HAVE_A_SCALE: u16 = 0x0008;
+ const MORE_COMPONENTS: u16 = 0x0020;
+ const WE_HAVE_AN_X_AND_Y_SCALE: u16 = 0x0040;
+ const WE_HAVE_A_TWO_BY_TWO: u16 = 0x0080;
+
+ let mut done = false;
+ std::iter::from_fn(move || {
+ if done {
+ return None;
+ }
+
+ let flags = s.read::()?;
+ let component = s.read::()?;
+
+ if flags & ARG_1_AND_2_ARE_WORDS != 0 {
+ s.skip::();
+ s.skip::();
+ } else {
+ s.skip::();
+ }
+
+ if flags & WE_HAVE_A_SCALE != 0 {
+ s.skip::();
+ } else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 {
+ s.skip::();
+ s.skip::();
+ } else if flags & WE_HAVE_A_TWO_BY_TWO != 0 {
+ s.skip::();
+ s.skip::();
+ s.skip::();
+ s.skip::();
+ }
+
+ done = flags & MORE_COMPONENTS == 0;
+ Some(component)
+ })
+}