mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Subset glyf and loca tables
This commit is contained in:
parent
6f84cf3c22
commit
821536b253
@ -1,7 +1,7 @@
|
||||
//! Exporting into PDF documents.
|
||||
|
||||
use std::cmp::Eq;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::hash::Hash;
|
||||
use std::rc::Rc;
|
||||
|
||||
@ -38,12 +38,14 @@ struct PdfExporter<'a> {
|
||||
frames: &'a [Rc<Frame>],
|
||||
fonts: &'a FontStore,
|
||||
images: &'a ImageStore,
|
||||
glyphs: HashMap<FaceId, HashSet<u16>>,
|
||||
font_map: Remapper<FaceId>,
|
||||
image_map: Remapper<ImageId>,
|
||||
}
|
||||
|
||||
impl<'a> PdfExporter<'a> {
|
||||
fn new(ctx: &'a Context, frames: &'a [Rc<Frame>]) -> Self {
|
||||
let mut glyphs = HashMap::<FaceId, HashSet<u16>>::new();
|
||||
let mut font_map = Remapper::new();
|
||||
let mut image_map = Remapper::new();
|
||||
let mut alpha_masks = 0;
|
||||
@ -51,7 +53,11 @@ impl<'a> PdfExporter<'a> {
|
||||
for frame in frames {
|
||||
for (_, element) in frame.elements() {
|
||||
match *element {
|
||||
Element::Text(ref text) => font_map.insert(text.face_id),
|
||||
Element::Text(ref text) => {
|
||||
font_map.insert(text.face_id);
|
||||
let set = glyphs.entry(text.face_id).or_default();
|
||||
set.extend(text.glyphs.iter().map(|g| g.id));
|
||||
}
|
||||
Element::Geometry(_, _) => {}
|
||||
Element::Image(id, _) => {
|
||||
let img = ctx.images.get(id);
|
||||
@ -74,6 +80,7 @@ impl<'a> PdfExporter<'a> {
|
||||
frames,
|
||||
fonts: &ctx.fonts,
|
||||
images: &ctx.images,
|
||||
glyphs,
|
||||
font_map,
|
||||
image_map,
|
||||
}
|
||||
@ -278,6 +285,7 @@ impl<'a> PdfExporter<'a> {
|
||||
|
||||
fn write_fonts(&mut self) {
|
||||
for (refs, face_id) in self.refs.fonts().zip(self.font_map.layout_indices()) {
|
||||
let glyphs = &self.glyphs[&face_id];
|
||||
let face = self.fonts.get(face_id);
|
||||
let ttf = face.ttf();
|
||||
|
||||
@ -370,15 +378,19 @@ impl<'a> PdfExporter<'a> {
|
||||
// unicode codepoints to enable copying out of the PDF.
|
||||
self.writer.cmap(refs.cmap, &{
|
||||
// Deduplicate glyph-to-unicode mappings with a set.
|
||||
let mut mapping = BTreeSet::new();
|
||||
let mut mapping = BTreeMap::new();
|
||||
for subtable in ttf.character_mapping_subtables() {
|
||||
subtable.codepoints(|n| {
|
||||
if let Some(c) = std::char::from_u32(n) {
|
||||
if let Some(g) = ttf.glyph_index(c) {
|
||||
mapping.insert((g.0, c));
|
||||
if subtable.is_unicode() {
|
||||
subtable.codepoints(|n| {
|
||||
if let Some(c) = std::char::from_u32(n) {
|
||||
if let Some(GlyphId(g)) = ttf.glyph_index(c) {
|
||||
if glyphs.contains(&g) {
|
||||
mapping.insert(g, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut cmap = UnicodeCmap::new(cmap_name, system_info);
|
||||
@ -388,9 +400,9 @@ impl<'a> PdfExporter<'a> {
|
||||
cmap.finish()
|
||||
});
|
||||
|
||||
// Susbet and write the face's bytes.
|
||||
// Subset and write the face's bytes.
|
||||
let original = face.buffer();
|
||||
let subsetted = subset(original, face.index());
|
||||
let subsetted = subset(original, face.index(), glyphs.iter().copied());
|
||||
let data = subsetted.as_deref().unwrap_or(original);
|
||||
self.writer.stream(refs.data, data);
|
||||
}
|
||||
|
@ -1,21 +1,31 @@
|
||||
//! Font subsetting.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use ttf_parser::parser::{FromData, LazyArray16, Offset, Offset32, Stream};
|
||||
use ttf_parser::parser::{
|
||||
FromData, LazyArray16, LazyArray32, Offset16, Offset32, Stream, F2DOT14,
|
||||
};
|
||||
use ttf_parser::{Face, Tag};
|
||||
|
||||
/// Subset a font face.
|
||||
///
|
||||
/// This will remove the outlines of all glyphs that are not part of the given
|
||||
/// iterator. Furthmore, all character mapping and layout tables are dropped as
|
||||
/// shaping has already happened.
|
||||
///
|
||||
/// Returns `None` if the font data is invalid.
|
||||
pub fn subset(data: &[u8], index: u32) -> Option<Vec<u8>> {
|
||||
let mut s = Subsetter::new(data, index)?;
|
||||
s.subset()?;
|
||||
Some(s.encode())
|
||||
pub fn subset<I>(data: &[u8], index: u32, glyphs: I) -> Option<Vec<u8>>
|
||||
where
|
||||
I: IntoIterator<Item = u16>,
|
||||
{
|
||||
Subsetter::new(data, index, glyphs.into_iter().collect())?.subset()
|
||||
}
|
||||
|
||||
struct Subsetter<'a> {
|
||||
face: Face<'a>,
|
||||
glyphs: Vec<u16>,
|
||||
magic: Magic,
|
||||
records: LazyArray16<'a, TableRecord>,
|
||||
tables: Vec<(Tag, Cow<'a, [u8]>)>,
|
||||
@ -23,7 +33,7 @@ struct Subsetter<'a> {
|
||||
|
||||
impl<'a> Subsetter<'a> {
|
||||
/// Parse the font header and create a new subsetter.
|
||||
fn new(data: &'a [u8], index: u32) -> Option<Self> {
|
||||
fn new(data: &'a [u8], index: u32, glyphs: Vec<u16>) -> Option<Self> {
|
||||
let face = Face::from_slice(data, index).ok()?;
|
||||
let mut s = Stream::new(&data);
|
||||
|
||||
@ -53,40 +63,21 @@ impl<'a> Subsetter<'a> {
|
||||
// Read the table records.
|
||||
let records = s.read_array16::<TableRecord>(count)?;
|
||||
|
||||
Some(Self { face, magic, records, tables: vec![] })
|
||||
}
|
||||
|
||||
/// Subset, drop and copy tables.
|
||||
fn subset(&mut self) -> Option<()> {
|
||||
for record in self.records {
|
||||
let tag = record.tag;
|
||||
let data = self.face.table_data(tag)?;
|
||||
|
||||
match &tag.to_bytes() {
|
||||
// Glyphs are already mapped.
|
||||
b"cmap" => {}
|
||||
|
||||
// Layout is already finished.
|
||||
b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | b"ankr" | b"kern"
|
||||
| b"kerx" | b"mort" | b"morx" | b"trak" | b"bsln" | b"just"
|
||||
| b"feat" | b"prop" => {}
|
||||
|
||||
// TODO: Subset.
|
||||
// b"loca" => {}
|
||||
// b"glyf" => {}
|
||||
// b"sbix" => {}
|
||||
// b"SVG " => {}
|
||||
// b"post" => {}
|
||||
|
||||
// All other tables are simply copied.
|
||||
_ => self.tables.push((tag, Cow::Borrowed(data))),
|
||||
}
|
||||
}
|
||||
Some(())
|
||||
Some(Self {
|
||||
face,
|
||||
glyphs,
|
||||
magic,
|
||||
records,
|
||||
tables: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
/// Encode the subsetted font file.
|
||||
fn encode(mut self) -> Vec<u8> {
|
||||
fn subset(mut self) -> Option<Vec<u8>> {
|
||||
// Subset the individual tables and save them in `self.tables`.
|
||||
self.subset_tables()?;
|
||||
|
||||
// Start writing a brand new font.
|
||||
let mut w = Vec::new();
|
||||
w.write(self.magic);
|
||||
|
||||
@ -111,7 +102,7 @@ impl<'a> Subsetter<'a> {
|
||||
// Write table records.
|
||||
let mut offset = 12 + self.tables.len() * TableRecord::SIZE;
|
||||
for (tag, data) in &mut self.tables {
|
||||
if *tag == Tag::from_bytes(b"head") {
|
||||
if *tag == tg(b"head") {
|
||||
// Zero out checksum field in head table.
|
||||
data.to_mut()[8 .. 12].copy_from_slice(&[0; 4]);
|
||||
checksum_adjustment_offset = Some(offset + 8);
|
||||
@ -143,8 +134,52 @@ impl<'a> Subsetter<'a> {
|
||||
w[i .. i + 4].copy_from_slice(&val.to_be_bytes());
|
||||
}
|
||||
|
||||
w
|
||||
Some(w)
|
||||
}
|
||||
|
||||
/// Subset, drop and copy tables.
|
||||
fn subset_tables(&mut self) -> Option<()> {
|
||||
for record in self.records {
|
||||
let tag = record.tag;
|
||||
let data = self.face.table_data(tag)?;
|
||||
|
||||
match &tag.to_bytes() {
|
||||
// Glyphs are already mapped.
|
||||
b"cmap" => {}
|
||||
|
||||
// Layout is already finished.
|
||||
b"GPOS" | b"GSUB" | b"BASE" | b"JSTF" | b"MATH" | b"ankr" | b"kern"
|
||||
| b"kerx" | b"mort" | b"morx" | b"trak" | b"bsln" | b"just"
|
||||
| b"feat" | b"prop" => {}
|
||||
|
||||
// Loca is created when subsetting glyf.
|
||||
b"loca" => {}
|
||||
b"glyf" => {
|
||||
let head = self.face.table_data(tg(b"head"))?;
|
||||
let short = Stream::read_at::<i16>(head, 50)? == 0;
|
||||
if short {
|
||||
self.subset_glyf_loca::<Offset16>();
|
||||
} else {
|
||||
self.subset_glyf_loca::<Offset32>();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Subset.
|
||||
// b"sbix" => {}
|
||||
// b"SVG " => {}
|
||||
// b"post" => {}
|
||||
|
||||
// All other tables are simply copied.
|
||||
_ => self.tables.push((tag, Cow::Borrowed(data))),
|
||||
}
|
||||
}
|
||||
Some(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to create a tag from bytes.
|
||||
fn tg(bytes: &[u8; 4]) -> Tag {
|
||||
Tag::from_bytes(bytes)
|
||||
}
|
||||
|
||||
/// Calculate a checksum over the sliced data as sum of u32's. The data length
|
||||
@ -187,12 +222,24 @@ impl ToData for u16 {
|
||||
}
|
||||
}
|
||||
|
||||
impl ToData for Offset16 {
|
||||
fn write(&self, data: &mut Vec<u8>) {
|
||||
self.0.write(data);
|
||||
}
|
||||
}
|
||||
|
||||
impl ToData for u32 {
|
||||
fn write(&self, data: &mut Vec<u8>) {
|
||||
data.extend(&self.to_be_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
impl ToData for Offset32 {
|
||||
fn write(&self, data: &mut Vec<u8>) {
|
||||
self.0.write(data);
|
||||
}
|
||||
}
|
||||
|
||||
impl ToData for Tag {
|
||||
fn write(&self, data: &mut Vec<u8>) {
|
||||
self.as_u32().write(data);
|
||||
@ -262,3 +309,138 @@ impl ToData for TableRecord {
|
||||
self.length.write(data);
|
||||
}
|
||||
}
|
||||
|
||||
impl Subsetter<'_> {
|
||||
/// Subset the glyf and loca tables.
|
||||
fn subset_glyf_loca<T: LocaOffset>(&mut self) -> Option<()> {
|
||||
let loca = self.face.table_data(tg(b"loca"))?;
|
||||
let glyf = self.face.table_data(tg(b"glyf"))?;
|
||||
|
||||
let offsets = LazyArray32::<T>::new(loca);
|
||||
let slice = |id: u16| {
|
||||
let from = offsets.get(u32::from(id))?.to_usize();
|
||||
let to = offsets.get(u32::from(id) + 1)?.to_usize();
|
||||
glyf.get(from .. to)
|
||||
};
|
||||
|
||||
// To compute the set of all glyphs we want to keep, we use a work stack
|
||||
// containing glyphs whose components we still need to consider.
|
||||
let mut glyphs = HashSet::new();
|
||||
let mut work: Vec<u16> = std::mem::take(&mut self.glyphs);
|
||||
|
||||
// Always include the notdef glyph.
|
||||
work.push(0);
|
||||
|
||||
// Find composite glyph descriptions.
|
||||
while let Some(id) = work.pop() {
|
||||
if glyphs.insert(id) {
|
||||
let mut s = Stream::new(slice(id)?);
|
||||
if let Some(num_contours) = s.read::<i16>() {
|
||||
// Negative means this is a composite glyph.
|
||||
if num_contours < 0 {
|
||||
// Skip min/max metrics.
|
||||
s.read::<i16>();
|
||||
s.read::<i16>();
|
||||
s.read::<i16>();
|
||||
s.read::<i16>();
|
||||
|
||||
// Read component glyphs.
|
||||
work.extend(component_glyphs(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut sub_loca = vec![];
|
||||
let mut sub_glyf = vec![];
|
||||
|
||||
for id in 0 .. self.face.number_of_glyphs() {
|
||||
sub_loca.write(T::from_usize(sub_glyf.len())?);
|
||||
|
||||
// If the glyph shouldn't be contained in the subset, it will still
|
||||
// get a loca entry, but the glyf data is simply empty.
|
||||
if glyphs.contains(&id) {
|
||||
sub_glyf.extend(slice(id)?);
|
||||
}
|
||||
}
|
||||
|
||||
sub_loca.write(T::from_usize(sub_glyf.len())?);
|
||||
|
||||
self.tables.push((tg(b"loca"), Cow::Owned(sub_loca)));
|
||||
self.tables.push((tg(b"glyf"), Cow::Owned(sub_glyf)));
|
||||
|
||||
Some(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Offsets for loca table.
|
||||
trait LocaOffset: Sized + FromData + ToData {
|
||||
fn to_usize(self) -> usize;
|
||||
fn from_usize(offset: usize) -> Option<Self>;
|
||||
}
|
||||
|
||||
impl LocaOffset for Offset16 {
|
||||
fn to_usize(self) -> usize {
|
||||
2 * usize::from(self.0)
|
||||
}
|
||||
|
||||
fn from_usize(offset: usize) -> Option<Self> {
|
||||
if offset % 2 == 0 {
|
||||
(offset / 2).try_into().ok().map(Self)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LocaOffset for Offset32 {
|
||||
fn to_usize(self) -> usize {
|
||||
self.0 as usize
|
||||
}
|
||||
|
||||
fn from_usize(offset: usize) -> Option<Self> {
|
||||
offset.try_into().ok().map(Self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the component glyphs referenced by the given
|
||||
/// `glyf` table composite glyph description.
|
||||
fn component_glyphs(mut s: Stream) -> impl Iterator<Item = u16> + '_ {
|
||||
const ARG_1_AND_2_ARE_WORDS: u16 = 0x0001;
|
||||
const WE_HAVE_A_SCALE: u16 = 0x0008;
|
||||
const MORE_COMPONENTS: u16 = 0x0020;
|
||||
const WE_HAVE_AN_X_AND_Y_SCALE: u16 = 0x0040;
|
||||
const WE_HAVE_A_TWO_BY_TWO: u16 = 0x0080;
|
||||
|
||||
let mut done = false;
|
||||
std::iter::from_fn(move || {
|
||||
if done {
|
||||
return None;
|
||||
}
|
||||
|
||||
let flags = s.read::<u16>()?;
|
||||
let component = s.read::<u16>()?;
|
||||
|
||||
if flags & ARG_1_AND_2_ARE_WORDS != 0 {
|
||||
s.skip::<i16>();
|
||||
s.skip::<i16>();
|
||||
} else {
|
||||
s.skip::<u16>();
|
||||
}
|
||||
|
||||
if flags & WE_HAVE_A_SCALE != 0 {
|
||||
s.skip::<F2DOT14>();
|
||||
} else if flags & WE_HAVE_AN_X_AND_Y_SCALE != 0 {
|
||||
s.skip::<F2DOT14>();
|
||||
s.skip::<F2DOT14>();
|
||||
} else if flags & WE_HAVE_A_TWO_BY_TWO != 0 {
|
||||
s.skip::<F2DOT14>();
|
||||
s.skip::<F2DOT14>();
|
||||
s.skip::<F2DOT14>();
|
||||
s.skip::<F2DOT14>();
|
||||
}
|
||||
|
||||
done = flags & MORE_COMPONENTS == 0;
|
||||
Some(component)
|
||||
})
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user