fix: ToUnicode in PDF should describe CID instead of GID (#3435)

This commit is contained in:
Y.D.X 2024-02-17 20:42:00 +08:00 committed by GitHub
parent 09b364e9a3
commit 394864fd4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -146,7 +146,7 @@ pub(crate) fn write_fonts(ctx: &mut PdfContext) {
// Write the /ToUnicode character map, which maps glyph ids back to // Write the /ToUnicode character map, which maps glyph ids back to
// unicode codepoints to enable copying out of the PDF. // unicode codepoints to enable copying out of the PDF.
let cmap = create_cmap(ttf, glyph_set); let cmap = create_cmap(font, glyph_set);
ctx.pdf.cmap(cmap_ref, &cmap.finish()); ctx.pdf.cmap(cmap_ref, &cmap.finish());
// Subset and write the font's bytes. // Subset and write the font's bytes.
@ -198,10 +198,9 @@ fn subset_tag(glyphs: &BTreeMap<u16, EcoString>) -> EcoString {
} }
/// Create a /ToUnicode CMap. /// Create a /ToUnicode CMap.
fn create_cmap( fn create_cmap(font: &Font, glyph_set: &mut BTreeMap<u16, EcoString>) -> UnicodeCmap {
ttf: &ttf_parser::Face, let ttf = font.ttf();
glyph_set: &mut BTreeMap<u16, EcoString>,
) -> UnicodeCmap {
// For glyphs that have codepoints mapping to them in the font's cmap table, // For glyphs that have codepoints mapping to them in the font's cmap table,
// we prefer them over pre-existing text mappings from the document. Only // we prefer them over pre-existing text mappings from the document. Only
// things that don't have a corresponding codepoint (or only a private-use // things that don't have a corresponding codepoint (or only a private-use
@ -225,11 +224,11 @@ fn create_cmap(
}); });
} }
// Produce a reverse mapping from glyphs to unicode strings. // Produce a reverse mapping from glyphs' CIDs to unicode strings.
let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO);
for (&g, text) in glyph_set.iter() { for (&g, text) in glyph_set.iter() {
if !text.is_empty() { if !text.is_empty() {
cmap.pair_with_multiple(g, text.chars()); cmap.pair_with_multiple(glyph_cid(font, g), text.chars());
} }
} }