From d217d4f02aabdddd2174a68d71c1e35685a302c1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sun, 3 Mar 2019 18:36:56 +0100 Subject: [PATCH] =?UTF-8?q?Fix=20subsetting=20for=20composite=20glyphs=20?= =?UTF-8?q?=F0=9F=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/font.rs | 152 +++++++++++++++++++++++++++++++++++++++++++--------- src/pdf.rs | 16 ++---- 2 files changed, 131 insertions(+), 37 deletions(-) diff --git a/src/font.rs b/src/font.rs index ddbcf1bf7..3e62f0849 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,7 +1,7 @@ //! Font utility and subsetting. use std::fmt; -use std::io::{self, Cursor}; +use std::io::{self, Cursor, Seek, SeekFrom}; use std::collections::HashMap; use byteorder::{BE, ReadBytesExt, WriteBytesExt}; use opentype::{OpenTypeReader, Outlines, TableRecord, Tag}; @@ -39,7 +39,6 @@ impl Font { { let mut chars: Vec = chars.into_iter().collect(); chars.sort(); - let mut cursor = Cursor::new(&self.program); let mut reader = OpenTypeReader::new(&mut cursor); let outlines = reader.outlines()?; @@ -54,6 +53,7 @@ impl Font { cmap: None, hmtx: None, loca: None, + glyphs: Vec::with_capacity(chars.len()), chars, records: Vec::new(), body: Vec::new(), @@ -70,6 +70,7 @@ struct Subsetter<'p> { cmap: Option, hmtx: Option, loca: Option>, + glyphs: Vec, // Subsetted font chars: Vec, @@ -84,6 +85,10 @@ impl<'p> Subsetter<'p> { I1: IntoIterator, S1: AsRef, I2: IntoIterator, S2: AsRef { + // Find out which glyphs to include based on which characters we want + // and which glyphs are used by composition. + self.build_glyphs()?; + // Iterate through the needed tables first for table in needed_tables.into_iter() { let table = table.as_ref(); @@ -116,6 +121,64 @@ impl<'p> Subsetter<'p> { Ok((self.body, mapping)) } + fn build_glyphs(&mut self) -> SubsetResult<()> { + self.read_cmap()?; + let cmap = self.cmap.as_ref().unwrap(); + + for &c in &self.chars { + self.glyphs.push(take_char(cmap.get(c), c)?) + } + + // Composite glyphs may need additional glyphs we have not yet in our list. + // So now we have a look at the glyf table to check that and add glyphs + // we need additionally. + if self.contains("glyf".parse().unwrap()) { + self.read_loca()?; + let loca = self.loca.as_ref().unwrap(); + let table = self.get_table_data("glyf".parse().unwrap())?; + + let mut i = 0; + while i < self.glyphs.len() { + let glyph = self.glyphs[i]; + + let start = *take_invalid(loca.get(glyph as usize))? as usize; + let end = *take_invalid(loca.get(glyph as usize + 1))? as usize; + + let glyph = table.get(start..end).ok_or(SubsettingError::InvalidFont)?; + + if end > start { + let mut cursor = Cursor::new(&glyph); + let num_contours = cursor.read_i16::()?; + + // This is a composite glyph + if num_contours < 0 { + cursor.seek(SeekFrom::Current(8))?; + loop { + let flags = cursor.read_u16::()?; + let glyph_index = cursor.read_u16::()?; + + if self.glyphs.iter().rev().find(|&&x| x == glyph_index).is_none() { + self.glyphs.push(glyph_index); + } + + // This was the last component + if flags & 0x0020 == 0 { + break; + } + + let args_len = if flags & 0x0001 == 1 { 4 } else { 2 }; + cursor.seek(SeekFrom::Current(args_len))?; + } + } + } + + i += 1; + } + } + + Ok(()) + } + fn write_header(&mut self) -> SubsetResult<()> { // Create an output buffer let header_len = 12 + self.records.len() * 16; @@ -165,7 +228,7 @@ impl<'p> Subsetter<'p> { }, b"hhea" => { let table = self.get_table_data(tag)?; - let glyph_count = self.chars.len() as u16; + let glyph_count = self.glyphs.len() as u16; self.write_table_body(tag, |this| { this.body.extend(&table[..table.len() - 2]); Ok(this.body.write_u16::(glyph_count)?) @@ -173,7 +236,7 @@ impl<'p> Subsetter<'p> { }, b"maxp" => { let table = self.get_table_data(tag)?; - let glyph_count = self.chars.len() as u16; + let glyph_count = self.glyphs.len() as u16; self.write_table_body(tag, |this| { this.body.extend(&table[..4]); this.body.write_u16::(glyph_count)?; @@ -182,14 +245,11 @@ impl<'p> Subsetter<'p> { }, b"hmtx" => { self.write_table_body(tag, |this| { - this.read_cmap()?; this.read_hmtx()?; - let cmap = this.cmap.as_ref().unwrap(); let metrics = this.hmtx.as_ref().unwrap(); - for &c in &this.chars { - let glyph_id = take(cmap.get(c), c)?; - let metrics = take(metrics.get(glyph_id), c)?; + for &glyph in &this.glyphs { + let metrics = take_invalid(metrics.get(glyph))?; this.body.write_i16::(metrics.advance_width)?; this.body.write_i16::(metrics.left_side_bearing)?; @@ -199,40 +259,70 @@ impl<'p> Subsetter<'p> { }, b"loca" => { self.write_table_body(tag, |this| { - this.read_cmap()?; this.read_loca()?; - let cmap = this.cmap.as_ref().unwrap(); let loca = this.loca.as_ref().unwrap(); let mut offset = 0; - for &c in &this.chars { + for &glyph in &this.glyphs { this.body.write_u32::(offset)?; - let glyph = take(cmap.get(c), c)? as usize; - let len = take(loca.get(glyph + 1), c)? - take(loca.get(glyph), c)?; + let len = take_invalid(loca.get(glyph as usize + 1))? + - take_invalid(loca.get(glyph as usize))?; offset += len; } this.body.write_u32::(offset)?; Ok(()) }) }, + b"glyf" => { self.write_table_body(tag, |this| { - let table = this.get_table_data(tag)?; - this.read_cmap()?; this.read_loca()?; - let cmap = this.cmap.as_ref().unwrap(); let loca = this.loca.as_ref().unwrap(); + let table = this.get_table_data(tag)?; - for &c in &this.chars { - let glyph = take(cmap.get(c), c)? as usize; - let start = *take(loca.get(glyph), c)? as usize; - let end = *take(loca.get(glyph + 1), c)? as usize; - let shapes = table.get(start..end).ok_or(SubsettingError::InvalidFont)?; - this.body.extend(shapes); + for &glyph in &this.glyphs { + let start = *take_invalid(loca.get(glyph as usize))? as usize; + let end = *take_invalid(loca.get(glyph as usize + 1))? as usize; + + let mut data = table.get(start..end) + .ok_or(SubsettingError::InvalidFont)?.to_vec(); + + if end > start { + let mut cursor = Cursor::new(&mut data); + let num_contours = cursor.read_i16::()?; + + // This is a composite glyph + if num_contours < 0 { + cursor.seek(SeekFrom::Current(8))?; + loop { + let flags = cursor.read_u16::()?; + + let glyph_index = cursor.read_u16::()?; + let new_glyph_index = this.glyphs.iter() + .position(|&g| g == glyph_index) + .ok_or(SubsettingError::InvalidFont)? as u16; + + cursor.seek(SeekFrom::Current(-2))?; + cursor.write_u16::(new_glyph_index)?; + + // This was the last component + if flags & 0x0020 == 0 { + break; + } + + + let args_len = if flags & 0x0001 == 1 { 4 } else { 2 }; + cursor.seek(SeekFrom::Current(args_len))?; + } + } + } + + this.body.extend(data); } Ok(()) }) }, + b"cmap" => { // Always uses format 12 for simplicity self.write_table_body(tag, |this| { @@ -365,21 +455,33 @@ fn calculate_check_sum(data: &[u8]) -> u32 { } /// Returns an error about a missing character or the wrapped data. -fn take(opt: Option, c: char) -> SubsetResult { - opt.ok_or(SubsettingError::MissingCharacter(c)) +fn take_char(opt: Option, character: char) -> SubsetResult { + opt.ok_or(SubsettingError::MissingCharacter(character)) } +/// Returns an error about a missing glyph or the wrapped data. +fn take_invalid(opt: Option) -> SubsetResult { + opt.ok_or(SubsettingError::InvalidFont) +} + + type SubsetResult = Result; /// A failure when subsetting a font. #[derive(Debug)] pub enum SubsettingError { + /// A requested table was not present in the source font. MissingTable(String), + /// The table is unknown to the engine (unimplemented or invalid). UnsupportedTable(String), + /// A requested character was not present in the source. MissingCharacter(char), + /// The font is invalid. InvalidFont, + /// There was an error while parsing the font file. FontError(opentype::Error), + /// A general I/O error. IoError(io::Error), } diff --git a/src/pdf.rs b/src/pdf.rs index 68de5d3e2..0d7ad298f 100644 --- a/src/pdf.rs +++ b/src/pdf.rs @@ -346,16 +346,8 @@ mod pdf_tests { "); } - // #[test] - // fn pdf_fix_1() { - // use unicode_normalization::UnicodeNormalization; - - // let text = "Hello World! from Typeset‼"; - // let chars = text.nfd().collect::>(); - - // // Create a subsetted pdf font. - // let data = std::fs::read("../fonts/NotoSans-Regular.ttf").unwrap(); - // let font = PdfFont::new("NotoSans-Regular", data, chars).unwrap(); - // std::fs::write("../target/NotoTest.ttf", font.data).unwrap(); - // } + #[test] + fn pdf_composite_glyph() { + test("composite-glyph", "Composite character‼"); + } }