//! Subsetting of opentype fonts. use std::collections::HashMap; use std::io::{Cursor, Seek, SeekFrom}; use byteorder::{BE, ReadBytesExt, WriteBytesExt}; use opentype::{OpenTypeReader, Outlines, Table, TableRecord, Tag}; use opentype::tables::{Header, CharMap, Locations, HorizontalMetrics, Glyphs}; use crate::size::Size; use super::{Font, FontError, FontResult}; /// Subsets a font. #[derive(Debug)] pub struct Subsetter<'a> { // The original font font: &'a Font, reader: OpenTypeReader>, outlines: Outlines, tables: Vec, glyphs: Vec, // The subsetted font chars: Vec, records: Vec, body: Vec, } impl<'a> Subsetter<'a> { /// Subset a font. See [`Font::subetted`] for more details. pub fn subset(font: &Font, chars: C, tables: I) -> Result where C: IntoIterator, I: IntoIterator, S: AsRef { let mut reader = OpenTypeReader::from_slice(&font.program); let outlines = reader.outlines()?; let table_records = reader.tables()?.to_vec(); let chars: Vec<_> = chars.into_iter().collect(); let subsetter = Subsetter { font, reader, outlines, tables: table_records, glyphs: Vec::with_capacity(1 + chars.len()), chars, records: vec![], body: vec![], }; subsetter.run(tables) } /// Do the subsetting. fn run(mut self, tables: I) -> FontResult where I: IntoIterator, S: AsRef { if self.outlines == Outlines::CFF { return Err(FontError::UnsupportedFont("CFF outlines".to_string())); } // Find out which glyphs to include based on which characters we want and // which glyphs are additionally used by composite glyphs. self.find_glyphs()?; // Copy/subset all the tables the caller wants. for table in tables.into_iter() { let tag = table.as_ref().parse() .map_err(|_| FontError::UnsupportedTable(table.as_ref().to_string()))?; if self.contains_table(tag) { self.subset_table(tag)?; } } // Preprend the new header to the body. We have to do this last, because // we only have the necessary information now. self.write_header()?; Ok(Font { name: self.font.name.clone(), mapping: self.compute_mapping(), widths: self.compute_widths()?, program: self.body, default_glyph: self.font.default_glyph, metrics: self.font.metrics, }) } /// Store all glyphs the subset shall contain into `self.glyphs`. fn find_glyphs(&mut self) -> FontResult<()> { if self.outlines == Outlines::TrueType { let char_map = self.read_table::()?; let glyf = self.read_table::()?; // The default glyph should always be at index 0. self.glyphs.push(self.font.default_glyph); for &c in &self.chars { let glyph = char_map.get(c).ok_or_else(|| FontError::MissingCharacter(c))?; self.glyphs.push(glyph); } // Collect the glyphs not used mapping from characters but used in // composite glyphs, too. let mut i = 0; while i < self.glyphs.len() as u16 { let glyph_id = self.glyphs[i as usize]; let glyph = glyf.get(glyph_id).take_invalid("missing glyf entry")?; for &composite in &glyph.composites { if self.glyphs.iter().rev().all(|&x| x != composite) { self.glyphs.push(composite); } } i += 1; } } else { unimplemented!() } Ok(()) } /// Prepend the new header to the constructed body. fn write_header(&mut self) -> FontResult<()> { // Create an output buffer const BASE_HEADER_LEN: usize = 12; const TABLE_RECORD_LEN: usize = 16; let header_len = BASE_HEADER_LEN + self.records.len() * TABLE_RECORD_LEN; let mut header = Vec::with_capacity(header_len); let num_tables = self.records.len() as u16; let mut max_power = 1u16; while max_power * 2 <= num_tables { max_power *= 2; } max_power = std::cmp::min(max_power, num_tables); let search_range = max_power * 16; let entry_selector = (max_power as f32).log2() as u16; let range_shift = num_tables * 16 - search_range; // Write the base OpenType header header.write_u32::(match self.outlines { Outlines::TrueType => 0x00010000, Outlines::CFF => 0x4f54544f, })?; header.write_u16::(num_tables)?; header.write_u16::(search_range)?; header.write_u16::(entry_selector)?; header.write_u16::(range_shift)?; // Write the table records for record in &self.records { header.extend(record.tag.value()); header.write_u32::(record.check_sum)?; header.write_u32::(header_len as u32 + record.offset)?; header.write_u32::(record.length)?; } // Prepend the fresh header to the body. header.append(&mut self.body); self.body = header; Ok(()) } /// Compute the new subsetted widths vector. fn compute_widths(&self) -> FontResult> { let mut widths = Vec::with_capacity(self.glyphs.len()); for &glyph in &self.glyphs { let &width = self.font.widths.get(glyph as usize) .take_invalid("missing glyph width")?; widths.push(width); } Ok(widths) } /// Compute the new character to glyph id mapping. fn compute_mapping(&self) -> HashMap { // The mapping is basically just the index into the char vector, but we add one // to each index here because we added the default glyph to the front. self.chars.iter().enumerate() .map(|(i, &c)| (c, 1 + i as u16)) .collect::>() } /// Subset and write the table with the given tag to the output. fn subset_table(&mut self, tag: Tag) -> FontResult<()> { match tag.value() { // These tables can just be copied. b"head" | b"name" | b"OS/2" | b"cvt " | b"fpgm" | b"prep" | b"gasp" => self.copy_table(tag), // These tables have more complex subsetting routines. b"hhea" => self.subset_hhea(), b"hmtx" => self.subset_hmtx(), b"maxp" => self.subset_maxp(), b"post" => self.subset_post(), b"cmap" => self.subset_cmap(), b"glyf" => self.subset_glyf(), b"loca" => self.subset_loca(), _ => Err(FontError::UnsupportedTable(tag.to_string())) } } /// Copy the table body without modification. fn copy_table(&mut self, tag: Tag) -> FontResult<()> { self.write_table_body(tag, |this| { let table = this.read_table_data(tag)?; Ok(this.body.extend(table)) }) } /// Subset the `hhea` table by changing the number of horizontal metrics in it. fn subset_hhea(&mut self) -> FontResult<()> { let tag = "hhea".parse().unwrap(); let hhea = self.read_table_data(tag)?; let glyph_count = self.glyphs.len() as u16; self.write_table_body(tag, |this| { this.body.extend(&hhea[..hhea.len() - 2]); this.body.write_u16::(glyph_count)?; Ok(()) }) } /// Subset the `hmtx` table by changing the included metrics. fn subset_hmtx(&mut self) -> FontResult<()> { let tag = "hmtx".parse().unwrap(); let hmtx = self.read_table::()?; self.write_table_body(tag, |this| { for &glyph in &this.glyphs { let metrics = hmtx.get(glyph).take_invalid("missing glyph metrics")?; this.body.write_u16::(metrics.advance_width)?; this.body.write_i16::(metrics.left_side_bearing)?; } Ok(()) }) } /// Subset the `maxp` table by changing the glyph count in it. fn subset_maxp(&mut self) -> FontResult<()> { let tag = "maxp".parse().unwrap(); let maxp = self.read_table_data(tag)?; let glyph_count = self.glyphs.len() as u16; self.write_table_body(tag, |this| { this.body.extend(&maxp[..4]); this.body.write_u16::(glyph_count)?; Ok(this.body.extend(&maxp[6..])) }) } /// Subset the `post` table by removing all name information. fn subset_post(&mut self) -> FontResult<()> { let tag = "post".parse().unwrap(); let post = self.read_table_data(tag)?; self.write_table_body(tag, |this| { this.body.write_u32::(0x00030000)?; Ok(this.body.extend(&post[4..32])) }) } /// Subset the `cmap` table by only including the selected characters. /// Always uses format 12 for simplicity. fn subset_cmap(&mut self) -> FontResult<()> { let tag = "cmap".parse().unwrap(); self.write_table_body(tag, |this| { let mut groups = Vec::new(); // Find out which chars are in consecutive groups. let mut end = 0; let len = this.chars.len(); while end < len { // Compute the end of the consecutive group. let start = end; while end + 1 < len && this.chars[end+1] as u32 == this.chars[end] as u32 + 1 { end += 1; } // Add one to the start because we inserted the default glyph in front. let glyph_id = 1 + start; groups.push((this.chars[start], this.chars[end], glyph_id)); end += 1; } // Write the table header. this.body.write_u16::(0)?; this.body.write_u16::(1)?; this.body.write_u16::(3)?; this.body.write_u16::(10)?; this.body.write_u32::(12)?; // Write the subtable header. this.body.write_u16::(12)?; this.body.write_u16::(0)?; this.body.write_u32::((16 + 12 * groups.len()) as u32)?; this.body.write_u32::(0)?; this.body.write_u32::(groups.len() as u32)?; // Write the subtable body. for group in &groups { this.body.write_u32::(group.0 as u32)?; this.body.write_u32::(group.1 as u32)?; this.body.write_u32::(group.2 as u32)?; } Ok(()) }) } /// Subset the `glyf` table by changing the indices of composite glyphs. fn subset_glyf(&mut self) -> FontResult<()> { let tag = "glyf".parse().unwrap(); let loca = self.read_table::()?; let glyf = self.read_table_data(tag)?; self.write_table_body(tag, |this| { for &glyph in &this.glyphs { // Find out the location of the glyph in the glyf table. let start = loca.offset(glyph).take_invalid("missing loca entry")?; let end = loca.offset(glyph + 1).take_invalid("missing loca entry")?; // If this glyph has no contours, skip it. if end == start { continue; } let mut glyph_data = glyf.get(start as usize .. end as usize) .take_invalid("missing glyph data")?.to_vec(); let mut cursor = Cursor::new(&mut glyph_data); // This is a composite glyph let num_contours = cursor.read_i16::()?; if num_contours < 0 { cursor.seek(SeekFrom::Current(8))?; loop { let flags = cursor.read_u16::()?; let old_glyph_index = cursor.read_u16::()?; // Compute the new glyph index by searching for it's index // in the glyph vector. let new_glyph_index = this.glyphs.iter() .position(|&g| g == old_glyph_index) .take_invalid("invalid composite glyph")? as u16; // Overwrite the old index with the new one. cursor.seek(SeekFrom::Current(-2))?; cursor.write_u16::(new_glyph_index)?; // This was the last component if flags & 0x0020 == 0 { break; } // Skip additional arguments. let skip = if flags & 1 != 0 { 4 } else { 2 } + if flags & 8 != 0 { 2 } else if flags & 64 != 0 { 4 } else if flags & 128 != 0 { 8 } else { 0 }; cursor.seek(SeekFrom::Current(skip))?; } } this.body.extend(glyph_data); } Ok(()) }) } /// Subset the `loca` table by changing to the new offsets. fn subset_loca(&mut self) -> FontResult<()> { let format = self.read_table::
()?.index_to_loc_format; let tag = "loca".parse().unwrap(); let loca = self.read_table::()?; self.write_table_body(tag, |this| { let mut offset = 0; for &glyph in &this.glyphs { if format == 0 { this.body.write_u16::((offset / 2) as u16)?; } else { this.body.write_u32::(offset)?; } let len = loca.length(glyph).take_invalid("missing loca entry")?; offset += len; } // Write the final offset (so that it is known how long the last glyph is). if format == 0 { this.body.write_u16::((offset / 2) as u16)?; } else { this.body.write_u32::(offset)?; } Ok(()) }) } /// Let a writer write the table body and then store the relevant metadata. fn write_table_body(&mut self, tag: Tag, writer: F) -> FontResult<()> where F: FnOnce(&mut Self) -> FontResult<()> { // Run the writer and capture the length. let start = self.body.len(); writer(self)?; let end = self.body.len(); // Pad with zeros. while (self.body.len() - start) % 4 != 0 { self.body.push(0); } Ok(self.records.push(TableRecord { tag, check_sum: calculate_check_sum(&self.body[start..]), offset: start as u32, length: (end - start) as u32, })) } /// Whether this font contains a given table. fn contains_table(&self, tag: Tag) -> bool { self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok() } /// Read a table with the opentype reader. fn read_table(&mut self) -> FontResult { self.reader.read_table::().map_err(Into::into) } /// Read the raw table data of a table. fn read_table_data(&self, tag: Tag) -> FontResult<&'a [u8]> { let record = match self.tables.binary_search_by_key(&tag, |r| r.tag) { Ok(index) => &self.tables[index], Err(_) => return Err(FontError::MissingTable(tag.to_string())), }; self.font.program .get(record.offset as usize .. (record.offset + record.length) as usize) .take_invalid("missing table data") } } /// Calculate a checksum over the sliced data as sum of u32's. The data /// length has to be a multiple of four. fn calculate_check_sum(data: &[u8]) -> u32 { let mut sum = 0u32; data.chunks_exact(4).for_each(|c| { sum = sum.wrapping_add( ((c[0] as u32) << 24) + ((c[1] as u32) << 16) + ((c[2] as u32) << 8) + (c[3] as u32) ); }); sum } /// Helper trait to create subsetting errors more easily. trait TakeInvalid: Sized { /// Pull the type out of self, returning an invalid font /// error if self was not valid. fn take_invalid>(self, message: S) -> FontResult; } impl TakeInvalid for Option { fn take_invalid>(self, message: S) -> FontResult { self.ok_or(FontError::InvalidFont(message.into())) } } #[cfg(test)] mod tests { use std::fs; use crate::font::Font; use opentype::{OpenTypeReader, TableRecord}; use opentype::tables::{CharMap, Locations}; const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz"; /// Stores some tables for inspections. struct Tables<'a> { cmap: CharMap, loca: Locations, glyf_data: &'a [u8], } impl<'a> Tables<'a> { /// Load the tables from the font. fn new(font: &'a Font) -> Tables<'a> { let mut reader = OpenTypeReader::from_slice(&font.program); let cmap = reader.read_table::().unwrap(); let loca = reader.read_table::().unwrap(); let &TableRecord { offset, length, .. } = reader.get_table_record("glyf").unwrap(); let glyf_data = &font.program[offset as usize .. (offset + length) as usize]; Tables { cmap, loca, glyf_data } } /// Return the glyph data for the given character. fn glyph_data(&self, character: char) -> Option<&'a [u8]> { let glyph = self.cmap.get(character)?; let start = self.loca.offset(glyph)?; let end = self.loca.offset(glyph + 1)?; Some(&self.glyf_data[start as usize .. end as usize]) } } /// Return the original and subsetted version of a font with the characters /// included that are given as the chars of the string. fn subset(font: &str, chars: &str) -> (Font, Font) { let program = fs::read(format!("../fonts/{}", font)).unwrap(); let font = Font::new(program).unwrap(); let subsetted = font.subsetted( chars.chars(), &["name", "OS/2", "post", "head", "hhea", "hmtx", "maxp", "cmap", "cvt ", "fpgm", "prep", "gasp", "loca", "glyf"][..] ).unwrap(); (font, subsetted) } /// A test that creates a subsetted fonts in the `target` directory /// for manual inspection. #[test] fn manual_files() { let subsetted = subset("SourceSansPro-Regular.ttf", ALPHABET).1; fs::write("../target/SourceSansPro-Subsetted.ttf", &subsetted.program).unwrap(); let subsetted = subset("NotoSans-Regular.ttf", ALPHABET).1; fs::write("../target/NotoSans-Subsetted.ttf", &subsetted.program).unwrap(); } /// Tests whether the glyph data for specific glyphs match in the original /// and subsetted version. #[test] fn glyph_data() { let (font, subsetted) = subset("SourceSansPro-Regular.ttf", ALPHABET); let font_tables = Tables::new(&font); let subset_tables = Tables::new(&subsetted); // Go through all characters but skip the composite glyphs. for c in ALPHABET.chars().filter(|&x| x != 'i' && x != 'j') { assert_eq!(font_tables.glyph_data(c), subset_tables.glyph_data(c)); } } }