mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Improve subsetting (#4373)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
parent
34550220ae
commit
feedfe80cb
8
Cargo.lock
generated
8
Cargo.lock
generated
@ -2255,15 +2255,13 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "subsetter"
|
name = "subsetter"
|
||||||
version = "0.1.1"
|
version = "0.11.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350"
|
||||||
checksum = "09eab8a83bff89ba2200bd4c59be45c7c787f988431b936099a5a266c957f2f9"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "svg2pdf"
|
name = "svg2pdf"
|
||||||
version = "0.11.0"
|
version = "0.11.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/typst/svg2pdf?rev=39f8ad3#39f8ad3b35e14cfcabf3d5d916899f7ac78790f7"
|
||||||
checksum = "e31565956eb1dc398c0d9776ee1d1bac4e34759af63dcbe0520df32313a5b53b"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fontdb",
|
"fontdb",
|
||||||
"image 0.25.1",
|
"image 0.25.1",
|
||||||
|
@ -98,8 +98,8 @@ shell-escape = "0.1.5"
|
|||||||
siphasher = "1"
|
siphasher = "1"
|
||||||
smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] }
|
smallvec = { version = "1.11.1", features = ["union", "const_generics", "const_new"] }
|
||||||
stacker = "0.1.15"
|
stacker = "0.1.15"
|
||||||
subsetter = "0.1.1"
|
subsetter = { git = "https://github.com/typst/subsetter", rev = "4e0058b" }
|
||||||
svg2pdf = "0.11.0"
|
svg2pdf = { git = "https://github.com/typst/svg2pdf", rev = "39f8ad3" }
|
||||||
syn = { version = "2", features = ["full", "extra-traits"] }
|
syn = { version = "2", features = ["full", "extra-traits"] }
|
||||||
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
|
syntect = { version = "5", default-features = false, features = ["parsing", "regex-fancy", "plist-load", "yaml-load"] }
|
||||||
tar = "0.4"
|
tar = "0.4"
|
||||||
|
@ -115,7 +115,7 @@ pub fn write_color_fonts(
|
|||||||
pdf_font.finish();
|
pdf_font.finish();
|
||||||
|
|
||||||
// Encode a CMAP to make it possible to search or copy glyphs.
|
// Encode a CMAP to make it possible to search or copy glyphs.
|
||||||
let glyph_set = resources.glyph_sets.get(&font_slice.font).unwrap();
|
let glyph_set = resources.color_glyph_sets.get(&font_slice.font).unwrap();
|
||||||
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
|
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
|
||||||
for (index, glyph) in subset.iter().enumerate() {
|
for (index, glyph) in subset.iter().enumerate() {
|
||||||
let Some(text) = glyph_set.get(&glyph.gid) else {
|
let Some(text) = glyph_set.get(&glyph.gid) else {
|
||||||
|
@ -476,6 +476,12 @@ fn write_normal_text(ctx: &mut Builder, pos: Point, text: TextItemView) {
|
|||||||
let mut adjustment = Em::zero();
|
let mut adjustment = Em::zero();
|
||||||
let mut encoded = vec![];
|
let mut encoded = vec![];
|
||||||
|
|
||||||
|
let glyph_remapper = ctx
|
||||||
|
.resources
|
||||||
|
.glyph_remappers
|
||||||
|
.entry(text.item.font.clone())
|
||||||
|
.or_default();
|
||||||
|
|
||||||
// Write the glyphs with kerning adjustments.
|
// Write the glyphs with kerning adjustments.
|
||||||
for glyph in text.glyphs() {
|
for glyph in text.glyphs() {
|
||||||
adjustment += glyph.x_offset;
|
adjustment += glyph.x_offset;
|
||||||
@ -490,7 +496,26 @@ fn write_normal_text(ctx: &mut Builder, pos: Point, text: TextItemView) {
|
|||||||
adjustment = Em::zero();
|
adjustment = Em::zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
let cid = crate::font::glyph_cid(&text.item.font, glyph.id);
|
// In PDF, we use CIDs to index the glyphs in a font, not GIDs. What a
|
||||||
|
// CID actually refers to depends on the type of font we are embedding:
|
||||||
|
//
|
||||||
|
// - For TrueType fonts, the CIDs are defined by an external mapping.
|
||||||
|
// - For SID-keyed CFF fonts, the CID is the same as the GID in the font.
|
||||||
|
// - For CID-keyed CFF fonts, the CID refers to the CID in the font.
|
||||||
|
//
|
||||||
|
// (See in the PDF-spec for more details on this.)
|
||||||
|
//
|
||||||
|
// However, in our case:
|
||||||
|
// - We use the identity-mapping for TrueType fonts.
|
||||||
|
// - SID-keyed fonts will get converted into CID-keyed fonts by the
|
||||||
|
// subsetter.
|
||||||
|
// - CID-keyed fonts will be rewritten in a way so that the mapping
|
||||||
|
// between CID and GID is always the identity mapping, regardless of
|
||||||
|
// the mapping before.
|
||||||
|
//
|
||||||
|
// Because of this, we can always use the remapped GID as the CID,
|
||||||
|
// regardless of which type of font we are actually embedding.
|
||||||
|
let cid = glyph_remapper.remap(glyph.id);
|
||||||
encoded.push((cid >> 8) as u8);
|
encoded.push((cid >> 8) as u8);
|
||||||
encoded.push((cid & 0xff) as u8);
|
encoded.push((cid & 0xff) as u8);
|
||||||
|
|
||||||
@ -523,7 +548,11 @@ fn write_color_glyphs(ctx: &mut Builder, pos: Point, text: TextItemView) {
|
|||||||
// displays regular glyphs and not color glyphs.
|
// displays regular glyphs and not color glyphs.
|
||||||
ctx.state.font = None;
|
ctx.state.font = None;
|
||||||
|
|
||||||
let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default();
|
let glyph_set = ctx
|
||||||
|
.resources
|
||||||
|
.color_glyph_sets
|
||||||
|
.entry(text.item.font.clone())
|
||||||
|
.or_default();
|
||||||
|
|
||||||
for glyph in text.glyphs() {
|
for glyph in text.glyphs() {
|
||||||
// Retrieve the Type3 font reference and the glyph index in the font.
|
// Retrieve the Type3 font reference and the glyph index in the font.
|
||||||
|
@ -8,6 +8,7 @@ use pdf_writer::{
|
|||||||
writers::FontDescriptor,
|
writers::FontDescriptor,
|
||||||
Chunk, Filter, Finish, Name, Rect, Ref, Str,
|
Chunk, Filter, Finish, Name, Rect, Ref, Str,
|
||||||
};
|
};
|
||||||
|
use subsetter::GlyphRemapper;
|
||||||
use ttf_parser::{name_id, GlyphId, Tag};
|
use ttf_parser::{name_id, GlyphId, Tag};
|
||||||
use typst::text::Font;
|
use typst::text::Font;
|
||||||
use typst::utils::SliceExt;
|
use typst::utils::SliceExt;
|
||||||
@ -43,6 +44,7 @@ pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap<Font, Ref>) {
|
|||||||
out.insert(font.clone(), type0_ref);
|
out.insert(font.clone(), type0_ref);
|
||||||
|
|
||||||
let glyph_set = resources.glyph_sets.get(font).unwrap();
|
let glyph_set = resources.glyph_sets.get(font).unwrap();
|
||||||
|
let glyph_remapper = resources.glyph_remappers.get(font).unwrap();
|
||||||
let ttf = font.ttf();
|
let ttf = font.ttf();
|
||||||
|
|
||||||
// Do we have a TrueType or CFF font?
|
// Do we have a TrueType or CFF font?
|
||||||
@ -87,16 +89,15 @@ pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap<Font, Ref>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract the widths of all glyphs.
|
// Extract the widths of all glyphs.
|
||||||
let mut widths = vec![];
|
// `remapped_gids` returns an iterator over the old GIDs in their new sorted
|
||||||
for gid in std::iter::once(0).chain(glyph_set.keys().copied()) {
|
// order, so we can append the widths as is.
|
||||||
let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0);
|
let widths = glyph_remapper
|
||||||
let units = font.to_em(width).to_font_units();
|
.remapped_gids()
|
||||||
let cid = glyph_cid(font, gid);
|
.map(|gid| {
|
||||||
if usize::from(cid) >= widths.len() {
|
let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0);
|
||||||
widths.resize(usize::from(cid) + 1, 0.0);
|
font.to_em(width).to_font_units()
|
||||||
widths[usize::from(cid)] = units;
|
})
|
||||||
}
|
.collect::<Vec<_>>();
|
||||||
}
|
|
||||||
|
|
||||||
// Write all non-zero glyph widths.
|
// Write all non-zero glyph widths.
|
||||||
let mut first = 0;
|
let mut first = 0;
|
||||||
@ -115,19 +116,15 @@ pub fn write_fonts(context: &WithGlobalRefs) -> (PdfChunk, HashMap<Font, Ref>) {
|
|||||||
|
|
||||||
// Write the /ToUnicode character map, which maps glyph ids back to
|
// Write the /ToUnicode character map, which maps glyph ids back to
|
||||||
// unicode codepoints to enable copying out of the PDF.
|
// unicode codepoints to enable copying out of the PDF.
|
||||||
let cmap = create_cmap(font, glyph_set);
|
let cmap = create_cmap(glyph_set, glyph_remapper);
|
||||||
chunk.cmap(cmap_ref, &cmap.finish());
|
chunk.cmap(cmap_ref, &cmap.finish());
|
||||||
|
|
||||||
// Subset and write the font's bytes.
|
let subset = subset_font(font, glyph_remapper);
|
||||||
let glyphs: Vec<_> = glyph_set.keys().copied().collect();
|
let mut stream = chunk.stream(data_ref, &subset);
|
||||||
let data = subset_font(font, &glyphs);
|
|
||||||
|
|
||||||
let mut stream = chunk.stream(data_ref, &data);
|
|
||||||
stream.filter(Filter::FlateDecode);
|
stream.filter(Filter::FlateDecode);
|
||||||
if is_cff {
|
if is_cff {
|
||||||
stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C"));
|
stream.pair(Name(b"Subtype"), Name(b"CIDFontType0C"));
|
||||||
}
|
}
|
||||||
|
|
||||||
stream.finish();
|
stream.finish();
|
||||||
|
|
||||||
let mut font_descriptor =
|
let mut font_descriptor =
|
||||||
@ -194,15 +191,18 @@ pub fn write_font_descriptor<'a>(
|
|||||||
|
|
||||||
/// Subset a font to the given glyphs.
|
/// Subset a font to the given glyphs.
|
||||||
///
|
///
|
||||||
/// - For a font with TrueType outlines, this returns the whole OpenType font.
|
/// - For a font with TrueType outlines, this produces the whole OpenType font.
|
||||||
/// - For a font with CFF outlines, this returns just the CFF font program.
|
/// - For a font with CFF outlines, this produces just the CFF font program.
|
||||||
|
///
|
||||||
|
/// In both cases, this returns the already compressed data.
|
||||||
#[comemo::memoize]
|
#[comemo::memoize]
|
||||||
#[typst_macros::time(name = "subset font")]
|
#[typst_macros::time(name = "subset font")]
|
||||||
fn subset_font(font: &Font, glyphs: &[u16]) -> Arc<Vec<u8>> {
|
fn subset_font(font: &Font, glyph_remapper: &GlyphRemapper) -> Arc<Vec<u8>> {
|
||||||
let data = font.data();
|
let data = font.data();
|
||||||
let profile = subsetter::Profile::pdf(glyphs);
|
// TODO: Fail export instead of unwrapping once export diagnoistics exist.
|
||||||
let subsetted = subsetter::subset(data, font.index(), profile);
|
let subsetted = subsetter::subset(data, font.index(), glyph_remapper).unwrap();
|
||||||
let mut data = subsetted.as_deref().unwrap_or(data);
|
|
||||||
|
let mut data = subsetted.as_ref();
|
||||||
|
|
||||||
// Extract the standalone CFF font program if applicable.
|
// Extract the standalone CFF font program if applicable.
|
||||||
let raw = ttf_parser::RawFace::parse(data, 0).unwrap();
|
let raw = ttf_parser::RawFace::parse(data, 0).unwrap();
|
||||||
@ -259,46 +259,19 @@ pub fn improve_glyph_sets(glyph_sets: &mut HashMap<Font, BTreeMap<u16, EcoString
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Create a /ToUnicode CMap.
|
/// Create a /ToUnicode CMap.
|
||||||
fn create_cmap(font: &Font, glyph_set: &BTreeMap<u16, EcoString>) -> UnicodeCmap {
|
fn create_cmap(
|
||||||
|
glyph_set: &BTreeMap<u16, EcoString>,
|
||||||
|
glyph_remapper: &GlyphRemapper,
|
||||||
|
) -> UnicodeCmap {
|
||||||
// Produce a reverse mapping from glyphs' CIDs to unicode strings.
|
// Produce a reverse mapping from glyphs' CIDs to unicode strings.
|
||||||
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
|
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
|
||||||
for (&g, text) in glyph_set.iter() {
|
for (&g, text) in glyph_set.iter() {
|
||||||
|
// See commend in `write_normal_text` for why we can choose the CID this way.
|
||||||
|
let cid = glyph_remapper.get(g).unwrap();
|
||||||
if !text.is_empty() {
|
if !text.is_empty() {
|
||||||
cmap.pair_with_multiple(glyph_cid(font, g), text.chars());
|
cmap.pair_with_multiple(cid, text.chars());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cmap
|
cmap
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the CID for a glyph id.
|
|
||||||
///
|
|
||||||
/// When writing text into a PDF, we have to specify CIDs (character ids) not
|
|
||||||
/// GIDs (glyph IDs).
|
|
||||||
///
|
|
||||||
/// Most of the time, the mapping between these two is an identity mapping. In
|
|
||||||
/// particular, for TrueType fonts, the mapping is an identity mapping because
|
|
||||||
/// of this line above:
|
|
||||||
/// ```ignore
|
|
||||||
/// cid.cid_to_gid_map_predefined(Name(b"Identity"));
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// However, CID-keyed CFF fonts may have a non-identity mapping defined in
|
|
||||||
/// their charset. For those, we must map the glyph IDs in a `TextItem` to CIDs.
|
|
||||||
/// The font defines the map through its charset. The charset usually maps
|
|
||||||
/// glyphs to SIDs (string ids) specifying the glyph's name. Not for CID-keyed
|
|
||||||
/// fonts though! For these, the SIDs are CIDs in disguise. Relevant quote from
|
|
||||||
/// the CFF spec:
|
|
||||||
///
|
|
||||||
/// > The charset data, although in the same format as non-CIDFonts, will
|
|
||||||
/// > represent CIDs rather than SIDs, [...]
|
|
||||||
///
|
|
||||||
/// This function performs the mapping from glyph ID to CID. It also works for
|
|
||||||
/// non CID-keyed fonts. Then, it will simply return the glyph ID.
|
|
||||||
pub(super) fn glyph_cid(font: &Font, glyph_id: u16) -> u16 {
|
|
||||||
font.ttf()
|
|
||||||
.tables()
|
|
||||||
.cff
|
|
||||||
.and_then(|cff| cff.glyph_cid(ttf_parser::GlyphId(glyph_id)))
|
|
||||||
.unwrap_or(glyph_id)
|
|
||||||
}
|
|
||||||
|
@ -183,7 +183,8 @@ fn encode_alpha(raster: &RasterImage) -> (Vec<u8>, Filter) {
|
|||||||
/// Encode an SVG into a chunk of PDF objects.
|
/// Encode an SVG into a chunk of PDF objects.
|
||||||
#[typst_macros::time(name = "encode svg")]
|
#[typst_macros::time(name = "encode svg")]
|
||||||
fn encode_svg(svg: &SvgImage) -> (Chunk, Ref) {
|
fn encode_svg(svg: &SvgImage) -> (Chunk, Ref) {
|
||||||
svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default())
|
// TODO: Don't unwrap once we have export diagostics.
|
||||||
|
svg2pdf::to_chunk(svg.tree(), svg2pdf::ConversionOptions::default()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A pre-encoded image.
|
/// A pre-encoded image.
|
||||||
|
@ -53,6 +53,7 @@ pub fn traverse_pages(
|
|||||||
}
|
}
|
||||||
|
|
||||||
improve_glyph_sets(&mut resources.glyph_sets);
|
improve_glyph_sets(&mut resources.glyph_sets);
|
||||||
|
improve_glyph_sets(&mut resources.color_glyph_sets);
|
||||||
|
|
||||||
(PdfChunk::new(), (pages, resources))
|
(PdfChunk::new(), (pages, resources))
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,7 @@ use std::hash::Hash;
|
|||||||
|
|
||||||
use ecow::{eco_format, EcoString};
|
use ecow::{eco_format, EcoString};
|
||||||
use pdf_writer::{Dict, Finish, Name, Ref};
|
use pdf_writer::{Dict, Finish, Name, Ref};
|
||||||
|
use subsetter::GlyphRemapper;
|
||||||
use typst::text::Lang;
|
use typst::text::Lang;
|
||||||
use typst::{text::Font, utils::Deferred, visualize::Image};
|
use typst::{text::Font, utils::Deferred, visualize::Image};
|
||||||
|
|
||||||
@ -82,6 +83,10 @@ pub struct Resources<R = Ref> {
|
|||||||
/// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
|
/// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
|
||||||
/// cmap. This is important for copy-paste and searching.
|
/// cmap. This is important for copy-paste and searching.
|
||||||
pub glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
|
pub glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
|
||||||
|
/// Same as `glyph_sets`, but for color fonts.
|
||||||
|
pub color_glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
|
||||||
|
/// Stores the glyph remapper for each font for the subsetter.
|
||||||
|
pub glyph_remappers: HashMap<Font, GlyphRemapper>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: Renumber> Renumber for Resources<R> {
|
impl<R: Renumber> Renumber for Resources<R> {
|
||||||
@ -112,6 +117,8 @@ impl Default for Resources<()> {
|
|||||||
color_fonts: None,
|
color_fonts: None,
|
||||||
languages: BTreeMap::new(),
|
languages: BTreeMap::new(),
|
||||||
glyph_sets: HashMap::new(),
|
glyph_sets: HashMap::new(),
|
||||||
|
color_glyph_sets: HashMap::new(),
|
||||||
|
glyph_remappers: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -138,6 +145,8 @@ impl Resources<()> {
|
|||||||
.map(|(c, r)| Box::new(c.with_refs(r))),
|
.map(|(c, r)| Box::new(c.with_refs(r))),
|
||||||
languages: self.languages,
|
languages: self.languages,
|
||||||
glyph_sets: self.glyph_sets,
|
glyph_sets: self.glyph_sets,
|
||||||
|
color_glyph_sets: self.color_glyph_sets,
|
||||||
|
glyph_remappers: self.glyph_remappers,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user