mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Implement subsetting engine 🚀
This commit is contained in:
parent
5e135db872
commit
06101492dc
@ -9,3 +9,4 @@ pdf = { path = "../pdf" }
|
|||||||
opentype = { path = "../opentype" }
|
opentype = { path = "../opentype" }
|
||||||
unicode-segmentation = "1.2"
|
unicode-segmentation = "1.2"
|
||||||
unicode-xid = "0.1.0"
|
unicode-xid = "0.1.0"
|
||||||
|
byteorder = "1"
|
||||||
|
411
src/font.rs
Normal file
411
src/font.rs
Normal file
@ -0,0 +1,411 @@
|
|||||||
|
//! Font utility and subsetting.
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
use std::io::{self, Cursor};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use byteorder::{BE, ReadBytesExt, WriteBytesExt};
|
||||||
|
use opentype::{OpenTypeReader, Outlines, TableRecord, Tag};
|
||||||
|
use opentype::tables::{Header, CharMap, MaximumProfile, HorizontalMetrics};
|
||||||
|
|
||||||
|
/// An font wrapper which allows to subset a font.
|
||||||
|
pub struct Font {
|
||||||
|
program: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Font {
|
||||||
|
/// Create a new font from a font program.
|
||||||
|
pub fn new(program: Vec<u8>) -> Font {
|
||||||
|
Font { program }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a subsetted version of this font including only the chars listed in
|
||||||
|
/// `chars`.
|
||||||
|
///
|
||||||
|
/// The resulting pair contains the new font data and the new glyph mapping.
|
||||||
|
///
|
||||||
|
/// All needed tables will be included (returning an error if a table was not present
|
||||||
|
/// in the source font) and optional tables will be included if there were present
|
||||||
|
/// in the source font.
|
||||||
|
pub fn subsetted<C, I1, S1, I2, S2>(
|
||||||
|
&self,
|
||||||
|
chars: C,
|
||||||
|
needed_tables: I1,
|
||||||
|
optional_tables: I2
|
||||||
|
) -> Result<(Vec<u8>, HashMap<char, u16>), SubsettingError>
|
||||||
|
where
|
||||||
|
C: IntoIterator<Item=char>,
|
||||||
|
I1: IntoIterator<Item=S1>, S1: AsRef<str>,
|
||||||
|
I2: IntoIterator<Item=S2>, S2: AsRef<str>
|
||||||
|
{
|
||||||
|
let mut chars: Vec<char> = chars.into_iter().collect();
|
||||||
|
chars.sort();
|
||||||
|
|
||||||
|
let mut cursor = Cursor::new(&self.program);
|
||||||
|
let mut reader = OpenTypeReader::new(&mut cursor);
|
||||||
|
let outlines = reader.outlines()?;
|
||||||
|
let mut tables = reader.tables()?.to_vec();
|
||||||
|
tables.sort_by_key(|r| r.tag);
|
||||||
|
|
||||||
|
Subsetter {
|
||||||
|
program: &self.program,
|
||||||
|
reader,
|
||||||
|
outlines,
|
||||||
|
tables,
|
||||||
|
cmap: None,
|
||||||
|
hmtx: None,
|
||||||
|
loca: None,
|
||||||
|
chars,
|
||||||
|
records: Vec::new(),
|
||||||
|
body: Vec::new(),
|
||||||
|
}.subset(needed_tables, optional_tables)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Subsetter<'p> {
|
||||||
|
// Original font
|
||||||
|
program: &'p [u8],
|
||||||
|
reader: OpenTypeReader<'p, Cursor<&'p Vec<u8>>>,
|
||||||
|
outlines: Outlines,
|
||||||
|
tables: Vec<TableRecord>,
|
||||||
|
cmap: Option<CharMap>,
|
||||||
|
hmtx: Option<HorizontalMetrics>,
|
||||||
|
loca: Option<Vec<u32>>,
|
||||||
|
|
||||||
|
// Subsetted font
|
||||||
|
chars: Vec<char>,
|
||||||
|
records: Vec<TableRecord>,
|
||||||
|
body: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'p> Subsetter<'p> {
|
||||||
|
fn subset<I1, S1, I2, S2>(mut self, needed_tables: I1, optional_tables: I2)
|
||||||
|
-> SubsetResult<(Vec<u8>, HashMap<char, u16>)>
|
||||||
|
where
|
||||||
|
I1: IntoIterator<Item=S1>, S1: AsRef<str>,
|
||||||
|
I2: IntoIterator<Item=S2>, S2: AsRef<str>
|
||||||
|
{
|
||||||
|
// Iterate through the needed tables first
|
||||||
|
for table in needed_tables.into_iter() {
|
||||||
|
let table = table.as_ref();
|
||||||
|
let tag: Tag = table.parse()
|
||||||
|
.map_err(|_| SubsettingError::UnsupportedTable(table.to_string()))?;
|
||||||
|
|
||||||
|
if self.contains(tag) {
|
||||||
|
self.write_table(tag)?;
|
||||||
|
} else {
|
||||||
|
return Err(SubsettingError::MissingTable(tag.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now iterate through the optional tables
|
||||||
|
for table in optional_tables.into_iter() {
|
||||||
|
let table = table.as_ref();
|
||||||
|
let tag: Tag = table.parse()
|
||||||
|
.map_err(|_| SubsettingError::UnsupportedTable(table.to_string()))?;
|
||||||
|
|
||||||
|
if self.contains(tag) {
|
||||||
|
self.write_table(tag)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.write_header()?;
|
||||||
|
|
||||||
|
let mapping = self.chars.into_iter().enumerate().map(|(i, c)| (c, i as u16))
|
||||||
|
.collect::<HashMap<char, u16>>();
|
||||||
|
|
||||||
|
Ok((self.body, mapping))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_header(&mut self) -> SubsetResult<()> {
|
||||||
|
// Create an output buffer
|
||||||
|
let header_len = 12 + self.records.len() * 16;
|
||||||
|
let mut header = Vec::with_capacity(header_len);
|
||||||
|
|
||||||
|
let num_tables = self.records.len() as u16;
|
||||||
|
|
||||||
|
// The highester power lower than the table count.
|
||||||
|
let mut max_power = 1u16;
|
||||||
|
while max_power * 2 <= num_tables {
|
||||||
|
max_power *= 2;
|
||||||
|
}
|
||||||
|
max_power = std::cmp::min(max_power, num_tables);
|
||||||
|
|
||||||
|
let search_range = max_power * 16;
|
||||||
|
let entry_selector = (max_power as f32).log2() as u16;
|
||||||
|
let range_shift = num_tables * 16 - search_range;
|
||||||
|
|
||||||
|
// Write the base header
|
||||||
|
header.write_u32::<BE>(match self.outlines {
|
||||||
|
Outlines::TrueType => 0x00010000,
|
||||||
|
Outlines::CFF => 0x4f54544f,
|
||||||
|
})?;
|
||||||
|
header.write_u16::<BE>(num_tables)?;
|
||||||
|
header.write_u16::<BE>(search_range)?;
|
||||||
|
header.write_u16::<BE>(entry_selector)?;
|
||||||
|
header.write_u16::<BE>(range_shift)?;
|
||||||
|
|
||||||
|
// Write the table records
|
||||||
|
for record in &self.records {
|
||||||
|
header.extend(record.tag.value());
|
||||||
|
header.write_u32::<BE>(record.check_sum)?;
|
||||||
|
header.write_u32::<BE>(header_len as u32 + record.offset)?;
|
||||||
|
header.write_u32::<BE>(record.length)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
header.append(&mut self.body);
|
||||||
|
self.body = header;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_table(&mut self, tag: Tag) -> SubsetResult<()> {
|
||||||
|
match tag.value() {
|
||||||
|
b"head" | b"cvt " | b"prep" | b"fpgm" | b"name" | b"post" | b"OS/2" => {
|
||||||
|
self.copy_table(tag)
|
||||||
|
},
|
||||||
|
b"hhea" => {
|
||||||
|
let table = self.get_table_data(tag)?;
|
||||||
|
let glyph_count = self.chars.len() as u16;
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
this.body.extend(&table[..table.len() - 2]);
|
||||||
|
Ok(this.body.write_u16::<BE>(glyph_count)?)
|
||||||
|
})
|
||||||
|
},
|
||||||
|
b"maxp" => {
|
||||||
|
let table = self.get_table_data(tag)?;
|
||||||
|
let glyph_count = self.chars.len() as u16;
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
this.body.extend(&table[..4]);
|
||||||
|
this.body.write_u16::<BE>(glyph_count)?;
|
||||||
|
Ok(this.body.extend(&table[6..]))
|
||||||
|
})
|
||||||
|
},
|
||||||
|
b"hmtx" => {
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
this.read_cmap()?;
|
||||||
|
this.read_hmtx()?;
|
||||||
|
let cmap = this.cmap.as_ref().unwrap();
|
||||||
|
let metrics = this.hmtx.as_ref().unwrap();
|
||||||
|
|
||||||
|
for &c in &this.chars {
|
||||||
|
let glyph_id = take(cmap.get(c), c)?;
|
||||||
|
let metrics = take(metrics.get(glyph_id), c)?;
|
||||||
|
|
||||||
|
this.body.write_i16::<BE>(metrics.advance_width)?;
|
||||||
|
this.body.write_i16::<BE>(metrics.left_side_bearing)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
},
|
||||||
|
b"loca" => {
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
this.read_cmap()?;
|
||||||
|
this.read_loca()?;
|
||||||
|
let cmap = this.cmap.as_ref().unwrap();
|
||||||
|
let loca = this.loca.as_ref().unwrap();
|
||||||
|
|
||||||
|
let mut offset = 0;
|
||||||
|
for &c in &this.chars {
|
||||||
|
this.body.write_u32::<BE>(offset)?;
|
||||||
|
let glyph = take(cmap.get(c), c)? as usize;
|
||||||
|
let len = take(loca.get(glyph + 1), c)? - take(loca.get(glyph), c)?;
|
||||||
|
offset += len;
|
||||||
|
}
|
||||||
|
this.body.write_u32::<BE>(offset)?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
},
|
||||||
|
b"glyf" => {
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
let table = this.get_table_data(tag)?;
|
||||||
|
this.read_cmap()?;
|
||||||
|
this.read_loca()?;
|
||||||
|
let cmap = this.cmap.as_ref().unwrap();
|
||||||
|
let loca = this.loca.as_ref().unwrap();
|
||||||
|
|
||||||
|
for &c in &this.chars {
|
||||||
|
let glyph = take(cmap.get(c), c)? as usize;
|
||||||
|
let start = *take(loca.get(glyph), c)? as usize;
|
||||||
|
let end = *take(loca.get(glyph + 1), c)? as usize;
|
||||||
|
let shapes = table.get(start..end).ok_or(SubsettingError::InvalidFont)?;
|
||||||
|
this.body.extend(shapes);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
},
|
||||||
|
b"cmap" => {
|
||||||
|
// Always uses format 12 for simplicity
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
// Find out which chars are in consecutive groups
|
||||||
|
let mut groups = Vec::new();
|
||||||
|
let len = this.chars.len();
|
||||||
|
let mut i = 0;
|
||||||
|
while i < len {
|
||||||
|
let start = i;
|
||||||
|
while i + 1 < len && this.chars[i+1] as u32 == this.chars[i] as u32 + 1 {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
groups.push((this.chars[start], this.chars[i], start));
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Table header
|
||||||
|
this.body.write_u16::<BE>(0)?;
|
||||||
|
this.body.write_u16::<BE>(1)?;
|
||||||
|
this.body.write_u16::<BE>(3)?;
|
||||||
|
this.body.write_u16::<BE>(1)?;
|
||||||
|
this.body.write_u32::<BE>(12)?;
|
||||||
|
|
||||||
|
// Subtable header
|
||||||
|
this.body.write_u16::<BE>(12)?;
|
||||||
|
this.body.write_u16::<BE>(0)?;
|
||||||
|
this.body.write_u32::<BE>((16 + 12 * groups.len()) as u32)?;
|
||||||
|
this.body.write_u32::<BE>(0)?;
|
||||||
|
this.body.write_u32::<BE>(groups.len() as u32)?;
|
||||||
|
|
||||||
|
// Subtable body
|
||||||
|
for group in &groups {
|
||||||
|
this.body.write_u32::<BE>(group.0 as u32)?;
|
||||||
|
this.body.write_u32::<BE>(group.1 as u32)?;
|
||||||
|
this.body.write_u32::<BE>(group.2 as u32)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
|
_ => Err(SubsettingError::UnsupportedTable(tag.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy_table(&mut self, tag: Tag) -> SubsetResult<()> {
|
||||||
|
self.write_table_body(tag, |this| {
|
||||||
|
let table = this.get_table_data(tag)?;
|
||||||
|
Ok(this.body.extend(table))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_table_body<F>(&mut self, tag: Tag, writer: F) -> SubsetResult<()>
|
||||||
|
where F: FnOnce(&mut Self) -> SubsetResult<()> {
|
||||||
|
let start = self.body.len();
|
||||||
|
writer(self)?;
|
||||||
|
let end = self.body.len();
|
||||||
|
while (self.body.len() - start) % 4 != 0 {
|
||||||
|
self.body.push(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.records.push(TableRecord {
|
||||||
|
tag,
|
||||||
|
check_sum: calculate_check_sum(&self.body[start..]),
|
||||||
|
offset: start as u32,
|
||||||
|
length: (end - start) as u32,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_table_data(&self, tag: Tag) -> SubsetResult<&'p [u8]> {
|
||||||
|
let record = match self.tables.binary_search_by_key(&tag, |r| r.tag) {
|
||||||
|
Ok(index) => &self.tables[index],
|
||||||
|
Err(_) => return Err(SubsettingError::MissingTable(tag.to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.program.get(record.offset as usize .. (record.offset + record.length) as usize)
|
||||||
|
.ok_or(SubsettingError::InvalidFont)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn contains(&self, tag: Tag) -> bool {
|
||||||
|
self.tables.binary_search_by_key(&tag, |r| r.tag).is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_cmap(&mut self) -> SubsetResult<()> {
|
||||||
|
Ok(if self.cmap.is_none() {
|
||||||
|
self.cmap = Some(self.reader.read_table::<CharMap>()?);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_hmtx(&mut self) -> SubsetResult<()> {
|
||||||
|
Ok(if self.hmtx.is_none() {
|
||||||
|
self.hmtx = Some(self.reader.read_table::<HorizontalMetrics>()?);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_loca(&mut self) -> SubsetResult<()> {
|
||||||
|
Ok(if self.loca.is_none() {
|
||||||
|
let mut table = self.get_table_data("loca".parse().unwrap())?;
|
||||||
|
let format = self.reader.read_table::<Header>()?.index_to_loc_format;
|
||||||
|
let count = self.reader.read_table::<MaximumProfile>()?.num_glyphs + 1;
|
||||||
|
|
||||||
|
let loca = if format == 0 {
|
||||||
|
(0..count).map(|_| table.read_u16::<BE>()
|
||||||
|
.map(|x| (x as u32) * 2))
|
||||||
|
.collect::<io::Result<Vec<u32>>>()
|
||||||
|
} else {
|
||||||
|
(0..count).map(|_| table.read_u32::<BE>())
|
||||||
|
.collect::<io::Result<Vec<u32>>>()
|
||||||
|
}?;
|
||||||
|
|
||||||
|
self.loca = Some(loca);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Calculate a checksum over the sliced data as sum of u32's.
|
||||||
|
/// The data length has to be a multiple of four.
|
||||||
|
fn calculate_check_sum(data: &[u8]) -> u32 {
|
||||||
|
let mut sum = 0u32;
|
||||||
|
data.chunks_exact(4).for_each(|c| {
|
||||||
|
sum = sum.wrapping_add(
|
||||||
|
((c[0] as u32) << 24)
|
||||||
|
+ ((c[1] as u32) << 16)
|
||||||
|
+ ((c[2] as u32) << 8)
|
||||||
|
+ (c[3] as u32)
|
||||||
|
);
|
||||||
|
});
|
||||||
|
sum
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an error about a missing character or the wrapped data.
|
||||||
|
fn take<T>(opt: Option<T>, c: char) -> SubsetResult<T> {
|
||||||
|
opt.ok_or(SubsettingError::MissingCharacter(c))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type SubsetResult<T> = Result<T, SubsettingError>;
|
||||||
|
|
||||||
|
/// A failure when subsetting a font.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum SubsettingError {
|
||||||
|
MissingTable(String),
|
||||||
|
UnsupportedTable(String),
|
||||||
|
MissingCharacter(char),
|
||||||
|
InvalidFont,
|
||||||
|
FontError(opentype::Error),
|
||||||
|
IoError(io::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for SubsettingError {
|
||||||
|
fn from(err: io::Error) -> SubsettingError {
|
||||||
|
SubsettingError::IoError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<opentype::Error> for SubsettingError {
|
||||||
|
fn from(err: opentype::Error) -> SubsettingError {
|
||||||
|
SubsettingError::FontError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for SubsettingError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
use SubsettingError::*;
|
||||||
|
write!(f, "subsetting error: ")?;
|
||||||
|
match self {
|
||||||
|
MissingTable(table) => write!(f, "missing table: {}", table),
|
||||||
|
UnsupportedTable(table) => write!(f, "unsupported table: {}", table),
|
||||||
|
MissingCharacter(c) => write!(f, "missing character: {}", c),
|
||||||
|
InvalidFont => write!(f, "invalid font"),
|
||||||
|
FontError(err) => write!(f, "font error: {}", err),
|
||||||
|
IoError(err) => write!(f, "io error: {}", err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
mod pdf;
|
mod pdf;
|
||||||
mod utility;
|
mod utility;
|
||||||
|
pub mod font;
|
||||||
pub mod parsing;
|
pub mod parsing;
|
||||||
pub mod doc;
|
pub mod doc;
|
||||||
|
|
||||||
|
190
src/pdf.rs
190
src/pdf.rs
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io::{self, Write, Cursor};
|
use std::io::{self, Write, Cursor};
|
||||||
use crate::doc::Document;
|
use std::collections::{HashMap, HashSet};
|
||||||
use pdf::{PdfWriter, Id, Rect, Version, Trailer};
|
use pdf::{PdfWriter, Id, Rect, Version, Trailer};
|
||||||
use pdf::doc::{Catalog, PageTree, Page, Resource, Content};
|
use pdf::doc::{Catalog, PageTree, Page, Resource, Content};
|
||||||
use pdf::text::Text;
|
use pdf::text::Text;
|
||||||
@ -11,6 +11,8 @@ use pdf::font::{
|
|||||||
WidthRecord, FontDescriptor, FontFlags, EmbeddedFont, GlyphUnit
|
WidthRecord, FontDescriptor, FontFlags, EmbeddedFont, GlyphUnit
|
||||||
};
|
};
|
||||||
use opentype::{OpenTypeReader, tables::{self, NameEntry, MacStyleFlags}};
|
use opentype::{OpenTypeReader, tables::{self, NameEntry, MacStyleFlags}};
|
||||||
|
use crate::doc::Document;
|
||||||
|
use crate::font::Font;
|
||||||
|
|
||||||
|
|
||||||
/// A type that is a sink for documents that can be written in the _PDF_ format.
|
/// A type that is a sink for documents that can be written in the _PDF_ format.
|
||||||
@ -47,6 +49,12 @@ impl From<opentype::Error> for PdfWritingError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<crate::font::SubsettingError> for PdfWritingError {
|
||||||
|
fn from(err: crate::font::SubsettingError) -> PdfWritingError {
|
||||||
|
PdfWritingError { message: format!("{}", err) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl fmt::Display for PdfWritingError {
|
impl fmt::Display for PdfWritingError {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "pdf writing error: {}", self.message)
|
write!(f, "pdf writing error: {}", self.message)
|
||||||
@ -60,7 +68,7 @@ struct PdfCreator<'a, W: Write> {
|
|||||||
writer: PdfWriter<'a, W>,
|
writer: PdfWriter<'a, W>,
|
||||||
doc: &'a Document,
|
doc: &'a Document,
|
||||||
offsets: Offsets,
|
offsets: Offsets,
|
||||||
font_data: FontData,
|
font: PdfFont,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Offsets for the various groups of ids.
|
/// Offsets for the various groups of ids.
|
||||||
@ -72,17 +80,6 @@ struct Offsets {
|
|||||||
fonts: (Id, Id),
|
fonts: (Id, Id),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The data we need from the font.
|
|
||||||
struct FontData {
|
|
||||||
data: Vec<u8>,
|
|
||||||
name: tables::Name,
|
|
||||||
head: tables::Header,
|
|
||||||
post: tables::Post,
|
|
||||||
os2: tables::OS2,
|
|
||||||
hmtx: tables::HorizontalMetrics,
|
|
||||||
cmap: tables::CharMap,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, W: Write> PdfCreator<'a, W> {
|
impl<'a, W: Write> PdfCreator<'a, W> {
|
||||||
/// Create a new _PDF_ Creator.
|
/// Create a new _PDF_ Creator.
|
||||||
pub fn new(target: &'a mut W, doc: &'a Document) -> PdfResult<PdfCreator<'a, W>> {
|
pub fn new(target: &'a mut W, doc: &'a Document) -> PdfResult<PdfCreator<'a, W>> {
|
||||||
@ -94,9 +91,17 @@ impl<'a, W: Write> PdfCreator<'a, W> {
|
|||||||
let contents = (pages.1 + 1, pages.1 + content_count);
|
let contents = (pages.1 + 1, pages.1 + content_count);
|
||||||
let fonts = (contents.1 + 1, contents.1 + 4);
|
let fonts = (contents.1 + 1, contents.1 + 4);
|
||||||
|
|
||||||
// Read the font from a file.
|
// Find out which chars are used in this document.
|
||||||
|
let mut chars = HashSet::new();
|
||||||
|
for page in &doc.pages {
|
||||||
|
for content in &page.contents {
|
||||||
|
chars.extend(content.0.chars());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a subsetted pdf font.
|
||||||
let data = std::fs::read(format!("../fonts/{}.ttf", doc.font))?;
|
let data = std::fs::read(format!("../fonts/{}.ttf", doc.font))?;
|
||||||
let font_data = FontData::load(data)?;
|
let font = PdfFont::new(&doc.font, data, chars)?;
|
||||||
|
|
||||||
Ok(PdfCreator {
|
Ok(PdfCreator {
|
||||||
writer: PdfWriter::new(target),
|
writer: PdfWriter::new(target),
|
||||||
@ -108,7 +113,7 @@ impl<'a, W: Write> PdfCreator<'a, W> {
|
|||||||
contents,
|
contents,
|
||||||
fonts,
|
fonts,
|
||||||
},
|
},
|
||||||
font_data,
|
font,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,17 +189,9 @@ impl<'a, W: Write> PdfCreator<'a, W> {
|
|||||||
/// Write the fonts.
|
/// Write the fonts.
|
||||||
fn write_fonts(&mut self) -> PdfResult<()> {
|
fn write_fonts(&mut self) -> PdfResult<()> {
|
||||||
let id = self.offsets.fonts.0;
|
let id = self.offsets.fonts.0;
|
||||||
let font_data = &self.font_data;
|
|
||||||
|
|
||||||
// Create conversion function from font units to PDF units.
|
|
||||||
let ratio = 1000.0 / (font_data.head.units_per_em as f32);
|
|
||||||
let convert = |x| (ratio * x as f32).round() as GlyphUnit;
|
|
||||||
|
|
||||||
let font_name = font_data.name.get_decoded(NameEntry::PostScriptName);
|
|
||||||
let base_font = font_name.as_ref().unwrap_or(&self.doc.font);
|
|
||||||
|
|
||||||
self.writer.write_obj(id, &Type0Font::new(
|
self.writer.write_obj(id, &Type0Font::new(
|
||||||
base_font.clone(),
|
self.font.name.clone(),
|
||||||
CMapEncoding::Predefined("Identity-H".to_owned()),
|
CMapEncoding::Predefined("Identity-H".to_owned()),
|
||||||
id + 1
|
id + 1
|
||||||
)).unwrap();
|
)).unwrap();
|
||||||
@ -202,80 +199,124 @@ impl<'a, W: Write> PdfCreator<'a, W> {
|
|||||||
self.writer.write_obj(id + 1,
|
self.writer.write_obj(id + 1,
|
||||||
CIDFont::new(
|
CIDFont::new(
|
||||||
CIDFontType::Type2,
|
CIDFontType::Type2,
|
||||||
base_font.clone(),
|
self.font.name.clone(),
|
||||||
CIDSystemInfo::new("(Adobe)", "(Identity)", 0),
|
CIDSystemInfo::new("(Adobe)", "(Identity)", 0),
|
||||||
id + 2,
|
id + 2,
|
||||||
).widths(vec![
|
).widths(vec![WidthRecord::start(0, self.font.widths.clone())])
|
||||||
WidthRecord::start(0, font_data.hmtx.metrics.iter().map(|m| convert(m.advance_width))
|
|
||||||
)])
|
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
let mut flags = FontFlags::empty();
|
|
||||||
flags.set(FontFlags::FIXED_PITCH, font_data.post.is_fixed_pitch);
|
|
||||||
flags.set(FontFlags::SERIF, base_font.contains("Serif"));
|
|
||||||
flags.insert(FontFlags::SYMBOLIC);
|
|
||||||
flags.set(FontFlags::ITALIC, font_data.head.mac_style.contains(MacStyleFlags::ITALIC));
|
|
||||||
flags.insert(FontFlags::SMALL_CAP);
|
|
||||||
|
|
||||||
self.writer.write_obj(id + 2,
|
self.writer.write_obj(id + 2,
|
||||||
FontDescriptor::new(
|
FontDescriptor::new(
|
||||||
base_font.clone(),
|
self.font.name.clone(),
|
||||||
flags,
|
self.font.flags,
|
||||||
font_data.post.italic_angle.to_f32(),
|
self.font.italic_angle,
|
||||||
)
|
)
|
||||||
.font_bbox(Rect::new(
|
.font_bbox(self.font.bounding_box)
|
||||||
convert(font_data.head.x_min),
|
.ascent(self.font.ascender)
|
||||||
convert(font_data.head.y_min),
|
.descent(self.font.descender)
|
||||||
convert(font_data.head.x_max),
|
.cap_height(self.font.cap_height)
|
||||||
convert(font_data.head.y_max)
|
.stem_v(self.font.stem_v)
|
||||||
))
|
|
||||||
.ascent(convert(font_data.os2.s_typo_ascender))
|
|
||||||
.descent(convert(font_data.os2.s_typo_descender))
|
|
||||||
.cap_height(convert(font_data.os2.s_cap_height
|
|
||||||
.unwrap_or(font_data.os2.s_typo_ascender)))
|
|
||||||
.stem_v((10.0 + 220.0 * (font_data.os2.us_weight_class as f32
|
|
||||||
- 50.0) / 900.0) as GlyphUnit)
|
|
||||||
.font_file_3(id + 3)
|
.font_file_3(id + 3)
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
self.writer.write_obj(id + 3, &EmbeddedFont::OpenType(&font_data.data)).unwrap();
|
|
||||||
|
self.writer.write_obj(id + 3, &EmbeddedFont::OpenType(&self.font.data)).unwrap();
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode the given text for our font.
|
/// Encode the given text for our font.
|
||||||
fn encode(&self, text: &str) -> Vec<u8> {
|
fn encode(&self, text: &str) -> Vec<u8> {
|
||||||
let default = self.font_data.os2.us_default_char.unwrap_or(0);
|
|
||||||
let mut bytes = Vec::with_capacity(2 * text.len());
|
let mut bytes = Vec::with_capacity(2 * text.len());
|
||||||
text.chars().map(|c| {
|
for glyph in text.chars().map(|c| self.font.map(c)) {
|
||||||
self.font_data.cmap.get(c).unwrap_or(default)
|
|
||||||
})
|
|
||||||
.for_each(|glyph| {
|
|
||||||
bytes.push((glyph >> 8) as u8);
|
bytes.push((glyph >> 8) as u8);
|
||||||
bytes.push((glyph & 0xff) as u8);
|
bytes.push((glyph & 0xff) as u8);
|
||||||
});
|
}
|
||||||
bytes
|
bytes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FontData {
|
|
||||||
/// Load various needed tables from the font data.
|
/// The data we need from the font.
|
||||||
pub fn load(data: Vec<u8>) -> PdfResult<FontData> {
|
struct PdfFont {
|
||||||
let mut readable = Cursor::new(data);
|
data: Vec<u8>,
|
||||||
|
mapping: HashMap<char, u16>,
|
||||||
|
default_glyph: u16,
|
||||||
|
name: String,
|
||||||
|
widths: Vec<GlyphUnit>,
|
||||||
|
flags: FontFlags,
|
||||||
|
italic_angle: f32,
|
||||||
|
bounding_box: Rect<GlyphUnit>,
|
||||||
|
ascender: GlyphUnit,
|
||||||
|
descender: GlyphUnit,
|
||||||
|
cap_height: GlyphUnit,
|
||||||
|
stem_v: GlyphUnit,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfFont {
|
||||||
|
/// Create a subetted version of the font and calculate some information
|
||||||
|
/// needed for creating the _PDF_.
|
||||||
|
pub fn new(font_name: &str, data: Vec<u8>, chars: HashSet<char>) -> PdfResult<PdfFont> {
|
||||||
|
let mut readable = Cursor::new(&data);
|
||||||
let mut reader = OpenTypeReader::new(&mut readable);
|
let mut reader = OpenTypeReader::new(&mut readable);
|
||||||
|
|
||||||
let name = reader.read_table::<tables::Name>()?;
|
|
||||||
let head = reader.read_table::<tables::Header>()?;
|
let head = reader.read_table::<tables::Header>()?;
|
||||||
|
let name = reader.read_table::<tables::Name>()?;
|
||||||
let post = reader.read_table::<tables::Post>()?;
|
let post = reader.read_table::<tables::Post>()?;
|
||||||
let os2 = reader.read_table::<tables::OS2>()?;
|
let os2 = reader.read_table::<tables::OS2>()?;
|
||||||
let hmtx = reader.read_table::<tables::HorizontalMetrics>()?;
|
|
||||||
let cmap = reader.read_table::<tables::CharMap>()?;
|
|
||||||
|
|
||||||
Ok(FontData {
|
let font = Font::new(data);
|
||||||
data: readable.into_inner(),
|
let (subsetted, mapping) = font.subsetted(
|
||||||
name, head, post, os2, hmtx, cmap,
|
chars,
|
||||||
|
&["head", "hhea", "maxp", "hmtx", "loca", "glyf"],
|
||||||
|
&["cvt ", "prep", "fpgm", "OS/2", "cmap", "name", "post"],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let unit_ratio = 1000.0 / (head.units_per_em as f32);
|
||||||
|
let convert = |x| (unit_ratio * x as f32).round() as GlyphUnit;
|
||||||
|
|
||||||
|
let base_font = name.get_decoded(NameEntry::PostScriptName);
|
||||||
|
let font_name = base_font.unwrap_or_else(|| font_name.to_owned());
|
||||||
|
|
||||||
|
|
||||||
|
let mut flags = FontFlags::empty();
|
||||||
|
flags.set(FontFlags::FIXED_PITCH, post.is_fixed_pitch);
|
||||||
|
flags.set(FontFlags::SERIF, font_name.contains("Serif"));
|
||||||
|
flags.insert(FontFlags::SYMBOLIC);
|
||||||
|
flags.set(FontFlags::ITALIC, head.mac_style.contains(MacStyleFlags::ITALIC));
|
||||||
|
flags.insert(FontFlags::SMALL_CAP);
|
||||||
|
|
||||||
|
let mut readable = Cursor::new(&subsetted);
|
||||||
|
let mut reader = OpenTypeReader::new(&mut readable);
|
||||||
|
let hmtx = reader.read_table::<tables::HorizontalMetrics>()?;
|
||||||
|
let widths = hmtx.metrics.iter().map(|m| convert(m.advance_width)).collect();
|
||||||
|
|
||||||
|
|
||||||
|
Ok(PdfFont {
|
||||||
|
data: subsetted,
|
||||||
|
mapping,
|
||||||
|
default_glyph: os2.us_default_char.unwrap_or(0),
|
||||||
|
name: font_name,
|
||||||
|
widths,
|
||||||
|
flags,
|
||||||
|
italic_angle: post.italic_angle.to_f32(),
|
||||||
|
bounding_box: Rect::new(
|
||||||
|
convert(head.x_min),
|
||||||
|
convert(head.y_min),
|
||||||
|
convert(head.x_max),
|
||||||
|
convert(head.y_max)
|
||||||
|
),
|
||||||
|
ascender: convert(os2.s_typo_ascender),
|
||||||
|
descender: convert(os2.s_typo_descender),
|
||||||
|
cap_height: convert(os2.s_cap_height.unwrap_or(os2.s_typo_ascender)),
|
||||||
|
stem_v: (10.0 + 220.0 * (os2.us_weight_class as f32 - 50.0) / 900.0) as GlyphUnit,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Map a character to it's glyph index.
|
||||||
|
fn map(&self, c: char) -> u16 {
|
||||||
|
self.mapping.get(&c).map(|&g| g).unwrap_or(self.default_glyph)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -304,4 +345,17 @@ mod pdf_tests {
|
|||||||
Stet clita kasd gubergren, no sea takimata sanctus est.
|
Stet clita kasd gubergren, no sea takimata sanctus est.
|
||||||
");
|
");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #[test]
|
||||||
|
// fn pdf_fix_1() {
|
||||||
|
// use unicode_normalization::UnicodeNormalization;
|
||||||
|
|
||||||
|
// let text = "Hello World! from Typeset‼";
|
||||||
|
// let chars = text.nfd().collect::<HashSet<char>>();
|
||||||
|
|
||||||
|
// // Create a subsetted pdf font.
|
||||||
|
// let data = std::fs::read("../fonts/NotoSans-Regular.ttf").unwrap();
|
||||||
|
// let font = PdfFont::new("NotoSans-Regular", data, chars).unwrap();
|
||||||
|
// std::fs::write("../target/NotoTest.ttf", font.data).unwrap();
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user