From f2882bf85476cd55eaad0594d983fbcf8a1ef778 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 25 Aug 2021 13:24:30 +0200 Subject: [PATCH] Support decoding of mac roman names This allows discovery of Apple fonts without unicode name entries. --- src/export/pdf.rs | 9 ++------- src/font.rs | 31 ++++++++++++++++++++++--------- src/util/mac.rs | 25 +++++++++++++++++++++++++ src/util/mod.rs | 2 ++ 4 files changed, 51 insertions(+), 16 deletions(-) create mode 100644 src/util/mac.rs diff --git a/src/export/pdf.rs b/src/export/pdf.rs index 9b9d0e8e6..65c922ab5 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -15,7 +15,7 @@ use ttf_parser::{name_id, GlyphId, Tag}; use super::subset; use crate::color::Color; -use crate::font::{FaceId, FontStore}; +use crate::font::{find_name, FaceId, FontStore}; use crate::geom::{self, Em, Length, Size}; use crate::image::{Image, ImageId, ImageStore}; use crate::layout::{Element, Frame, Geometry, Paint}; @@ -289,12 +289,7 @@ impl<'a> PdfExporter<'a> { let face = self.fonts.get(face_id); let ttf = face.ttf(); - let name = ttf - .names() - .find(|entry| { - entry.name_id() == name_id::POST_SCRIPT_NAME && entry.is_unicode() - }) - .and_then(|entry| entry.to_string()) + let name = find_name(ttf.names(), name_id::POST_SCRIPT_NAME) .unwrap_or_else(|| "unknown".to_string()); let base_font = format!("ABCDEF+{}", name); diff --git a/src/font.rs b/src/font.rs index 633a1a0e9..00bff02ea 100644 --- a/src/font.rs +++ b/src/font.rs @@ -6,10 +6,11 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use serde::{Deserialize, Serialize}; -use ttf_parser::{name_id, GlyphId}; +use ttf_parser::{name_id, GlyphId, PlatformId}; use crate::geom::Em; use crate::loading::{FileHash, Loader}; +use crate::util::decode_mac_roman; /// A unique identifier for a loaded font face. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] @@ -352,16 +353,11 @@ impl FaceInfo { data: &'a [u8], ) -> impl Iterator + 'a { let count = ttf_parser::fonts_in_collection(data).unwrap_or(1); - (0 .. count).filter_map(move |index| { - fn find_name(face: &ttf_parser::Face, name_id: u16) -> Option { - face.names().find_map(|entry| { - (entry.name_id() == name_id).then(|| entry.to_string()).flatten() - }) - } + (0 .. count).filter_map(move |index| { let face = ttf_parser::Face::from_slice(data, index).ok()?; - let family = find_name(&face, name_id::TYPOGRAPHIC_FAMILY) - .or_else(|| find_name(&face, name_id::FAMILY))?; + let family = find_name(face.names(), name_id::TYPOGRAPHIC_FAMILY) + .or_else(|| find_name(face.names(), name_id::FAMILY))?; let variant = FontVariant { style: match (face.is_italic(), face.is_oblique()) { @@ -383,6 +379,23 @@ impl FaceInfo { } } +/// Find a decodable entry in a name table iterator. +pub fn find_name(mut names: ttf_parser::Names<'_>, name_id: u16) -> Option { + names.find_map(|entry| { + if entry.name_id() == name_id { + if let Some(string) = entry.to_string() { + return Some(string); + } + + if entry.platform_id() == PlatformId::Macintosh && entry.encoding_id() == 0 { + return Some(decode_mac_roman(entry.name())); + } + } + + None + }) +} + /// Properties that distinguish a face from other faces in the same family. #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] #[derive(Serialize, Deserialize)] diff --git a/src/util/mac.rs b/src/util/mac.rs new file mode 100644 index 000000000..95e8fcd68 --- /dev/null +++ b/src/util/mac.rs @@ -0,0 +1,25 @@ +/// Decode mac roman encoded bytes into a string. +pub fn decode_mac_roman(coded: &[u8]) -> String { + coded.iter().copied().map(char_from_mac_roman).collect() +} + +/// Convert a mac roman coded character to a unicode char. +fn char_from_mac_roman(code: u8) -> char { + #[rustfmt::skip] + const TABLE: [char; 128] = [ + 'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è', + 'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü', + '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø', + '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø', + '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ', + '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl', + '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô', + '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ', + ]; + + if code < 128 { + code as char + } else { + TABLE[(code - 128) as usize] + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 309c1241d..3608ca767 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,8 +1,10 @@ //! Utilities. mod eco; +mod mac; pub use eco::EcoString; +pub use mac::decode_mac_roman; use std::cell::RefMut; use std::cmp::Ordering;