Refactor and refine PDF exporter ♻

This commit is contained in:
Laurenz 2019-10-14 17:32:37 +02:00
parent 7c0899b537
commit 5e41672a91
5 changed files with 191 additions and 148 deletions

View File

@ -16,7 +16,6 @@ fn main() {
} }
} }
/// The actual main function.
fn run() -> Result<(), Box<dyn Error>> { fn run() -> Result<(), Box<dyn Error>> {
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if args.len() < 2 || args.len() > 3 { if args.len() < 2 || args.len() > 3 {
@ -24,42 +23,30 @@ fn run() -> Result<(), Box<dyn Error>> {
} }
let source_path = Path::new(&args[1]); let source_path = Path::new(&args[1]);
// Compute the output filename from the input filename by replacing the
// extension.
let dest_path = if args.len() <= 2 { let dest_path = if args.len() <= 2 {
let stem = source_path source_path.with_extension("pdf")
.file_stem()
.ok_or_else(|| "missing destation file name")?;
let base = source_path
.parent()
.ok_or_else(|| "missing destation folder")?;
base.join(format!("{}.pdf", stem.to_string_lossy()))
} else { } else {
PathBuf::from(&args[2]) PathBuf::from(&args[2])
}; };
if dest_path == source_path { if dest_path == source_path {
return Err("source and destination path are the same".into()); return err("source and destination path are the same");
} }
let mut source_file = File::open(source_path)
.map_err(|_| "failed to open source file")?;
let mut src = String::new(); let mut src = String::new();
let mut source_file = File::open(source_path).map_err(|_| "failed to open source file")?;
source_file source_file
.read_to_string(&mut src) .read_to_string(&mut src)
.map_err(|_| "failed to read from source file")?; .map_err(|_| "failed to read from source file")?;
// Create a typesetter with a font provider that provides the default fonts.
let mut typesetter = Typesetter::new(); let mut typesetter = Typesetter::new();
let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap(); let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
typesetter.add_font_provider(provider); typesetter.add_font_provider(provider);
// Typeset the source code.
let document = typesetter.typeset(&src)?; let document = typesetter.typeset(&src)?;
// Export the document into a PDF file.
let exporter = PdfExporter::new(); let exporter = PdfExporter::new();
let dest_file = File::create(&dest_path)?; let dest_file = File::create(&dest_path)?;
exporter.export(&document, typesetter.loader(), BufWriter::new(dest_file))?; exporter.export(&document, typesetter.loader(), BufWriter::new(dest_file))?;
@ -67,7 +54,12 @@ fn run() -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
/// Print a usage message and quit. /// Construct an error `Result` from a message.
fn err<S: Into<String>, T>(message: S) -> Result<T, Box<dyn Error>> {
Err(message.into().into())
}
/// Print a usage message and exit the process.
fn help_and_quit() { fn help_and_quit() {
let name = env::args().next().unwrap_or("typst".to_string()); let name = env::args().next().unwrap_or("typst".to_string());
println!("usage: {} source [destination]", name); println!("usage: {} source [destination]", name);

View File

@ -1,4 +1,4 @@
//! Exporting into _PDF_ documents. //! Exporting of layouts into _PDF_ documents.
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::io::{self, Write}; use std::io::{self, Write};
@ -11,13 +11,12 @@ use tide::{PdfWriter, Rect, Ref, Trailer, Version};
use toddle::font::OwnedFont; use toddle::font::OwnedFont;
use toddle::query::SharedFontLoader; use toddle::query::SharedFontLoader;
use toddle::tables::{ use toddle::tables::{CharMap, Header, HorizontalMetrics, MacStyleFlags};
CharMap, Header, HorizontalMetrics, MacStyleFlags, Name, NameEntry, Post, OS2, use toddle::tables::{Name, NameEntry, Post, OS2};
};
use toddle::Error as FontError; use toddle::Error as FontError;
use crate::layout::{Layout, LayoutAction, MultiLayout}; use crate::layout::{Layout, LayoutAction, MultiLayout};
use crate::size::{Size, Size2D}; use crate::size::Size;
/// Exports layouts into _PDFs_. /// Exports layouts into _PDFs_.
#[derive(Debug)] #[derive(Debug)]
@ -30,8 +29,9 @@ impl PdfExporter {
PdfExporter {} PdfExporter {}
} }
/// Export a finished layouts into a writer. Returns how many bytes were /// Export a finished multi-layout. The layout needs to have been created with the same
/// written. /// font loader passed in here since the indices must match. The PDF data is written into
/// the target writable and the number of bytes written is returned.
#[inline] #[inline]
pub fn export<W: Write>( pub fn export<W: Write>(
&self, &self,
@ -40,22 +40,31 @@ impl PdfExporter {
target: W, target: W,
) -> PdfResult<usize> ) -> PdfResult<usize>
{ {
let mut engine = PdfEngine::new(layout, loader, target)?; ExportProcess::new(layout, loader, target)?.write()
engine.write()
} }
} }
/// Writes layouts in the _PDF_ format. struct ExportProcess<'d, W: Write> {
struct PdfEngine<'d, W: Write> {
writer: PdfWriter<W>, writer: PdfWriter<W>,
layout: &'d MultiLayout, layouts: &'d MultiLayout,
/// Since we cross-reference pages and fonts with their IDs already in the document
/// catalog, we need to know exactly which ID is used for what from the beginning.
/// Thus, we compute a range for each category of object and stored these here.
offsets: Offsets, offsets: Offsets,
/// Each font has got an index from the font loader. However, these may not be
/// ascending from zero. Since we want to use the indices 0 .. num_fonts we
/// go through all font usages and assign a new index for each used font.
/// This remapping is stored here because we need it when converting the
/// layout actions in `ExportProcess::write_page`.
font_remap: HashMap<usize, usize>, font_remap: HashMap<usize, usize>,
/// These are the fonts sorted by their *new* ids, that is, the values of `font_remap`.
fonts: Vec<OwnedFont>, fonts: Vec<OwnedFont>,
} }
/// Offsets for the various groups of ids. /// Indicates which range of PDF IDs are used for which contents.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct Offsets { struct Offsets {
catalog: Ref, catalog: Ref,
page_tree: Ref, page_tree: Ref,
@ -64,109 +73,129 @@ struct Offsets {
fonts: (Ref, Ref), fonts: (Ref, Ref),
} }
impl<'d, W: Write> PdfEngine<'d, W> { impl<'d, W: Write> ExportProcess<'d, W> {
/// Create a new _PDF_ engine.
fn new( fn new(
layout: &'d MultiLayout, layouts: &'d MultiLayout,
loader: &SharedFontLoader, font_loader: &SharedFontLoader,
target: W, target: W,
) -> PdfResult<PdfEngine<'d, W>> ) -> PdfResult<ExportProcess<'d, W>>
{ {
// Create a subsetted PDF font for each font in the layout. let (fonts, font_remap) = Self::subset_fonts(layouts, font_loader)?;
let mut font_remap = HashMap::new(); let offsets = Self::calculate_offset(layouts.count(), fonts.len());
let fonts = {
let mut font = 0usize;
let mut chars = HashMap::new();
// Find out which characters are used for each font. Ok(ExportProcess {
for boxed in &layout.layouts {
for action in &boxed.actions {
match action {
LayoutAction::WriteText(string) => chars
.entry(font)
.or_insert_with(HashSet::new)
.extend(string.chars()),
LayoutAction::SetFont(id, _) => {
font = *id;
let new_id = font_remap.len();
font_remap.entry(font).or_insert(new_id);
}
_ => {}
}
}
}
// Collect the fonts into a vector in the order of the values in the remapping.
let mut loader = loader.borrow_mut();
let mut order = font_remap
.iter()
.map(|(&old, &new)| (old, new))
.collect::<Vec<_>>();
order.sort_by_key(|&(_, new)| new);
let mut fonts = vec![];
for (index, _) in order {
let font = loader.get_with_index(index);
let subsetted = font.subsetted(
chars[&index].iter().cloned(),
&[
"name", "OS/2", "post", "head", "hhea", "hmtx", "maxp", "cmap", "cvt ",
"fpgm", "prep", "loca", "glyf",
][..],
)?;
fonts.push(OwnedFont::from_bytes(subsetted)?);
}
fonts
};
// Calculate a unique id for all objects that will be written.
let catalog = 1;
let page_tree = catalog + 1;
let pages = (page_tree + 1, page_tree + layout.layouts.len() as Ref);
let contents = (pages.1 + 1, pages.1 + layout.layouts.len() as Ref);
let font_offsets = (contents.1 + 1, contents.1 + 5 * fonts.len() as Ref);
let offsets = Offsets {
catalog,
page_tree,
pages,
contents,
fonts: font_offsets,
};
Ok(PdfEngine {
writer: PdfWriter::new(target), writer: PdfWriter::new(target),
layout, layouts,
offsets, offsets,
font_remap, font_remap,
fonts, fonts,
}) })
} }
/// Write the complete layout. /// Subsets all fonts and assings each one a new index. The returned hash map
/// maps the old indices (used by the layouts) to the new one used in the PDF.
/// The new ones index into the returned vector.
fn subset_fonts(
layouts: &'d MultiLayout,
font_loader: &SharedFontLoader
) -> PdfResult<(Vec<OwnedFont>, HashMap<usize, usize>)>
{
let mut fonts = Vec::new();
let mut font_chars: HashMap<usize, HashSet<char>> = HashMap::new();
let mut old_to_new: HashMap<usize, usize> = HashMap::new();
let mut new_to_old: HashMap<usize, usize> = HashMap::new();
let mut active_font = 0;
// We want to find out which fonts are used at all and which are chars
// are used for these. We use this information to create subsetted fonts.
for layout in layouts {
for action in &layout.actions {
match action {
LayoutAction::WriteText(text) => {
font_chars
.entry(active_font)
.or_insert_with(HashSet::new)
.extend(text.chars());
},
LayoutAction::SetFont(index, _) => {
active_font = *index;
let next_id = old_to_new.len();
let new_id = *old_to_new
.entry(active_font)
.or_insert(next_id);
new_to_old
.entry(new_id)
.or_insert(active_font);
},
_ => {}
}
}
}
let num_fonts = old_to_new.len();
let mut font_loader = font_loader.borrow_mut();
const SUBSET_TABLES: [&str; 13] = [
"name", "OS/2", "post", "head", "hhea", "hmtx", "maxp",
"cmap", "cvt ", "fpgm", "prep", "loca", "glyf",
];
for index in 0 .. num_fonts {
let old_index = new_to_old[&index];
let font = font_loader.get_with_index(old_index);
let subsetted = font.subsetted(font_chars[&old_index].iter().cloned(), &SUBSET_TABLES)?;
fonts.push(OwnedFont::from_bytes(subsetted)?);
}
Ok((fonts, old_to_new))
}
/// We need to know in advance which IDs to use for which objects to cross-reference them.
/// Therefore, we calculate them in the beginning.
fn calculate_offset(layout_count: usize, font_count: usize) -> Offsets {
let catalog = 1;
let page_tree = catalog + 1;
let pages = (page_tree + 1, page_tree + layout_count as Ref);
let contents = (pages.1 + 1, pages.1 + layout_count as Ref);
let font_offsets = (contents.1 + 1, contents.1 + 5 * font_count as Ref);
Offsets {
catalog,
page_tree,
pages,
contents,
fonts: font_offsets,
}
}
/// Write everything (entry point).
fn write(&mut self) -> PdfResult<usize> { fn write(&mut self) -> PdfResult<usize> {
self.writer.write_header(Version::new(1, 7))?; self.writer.write_header(Version::new(1, 7))?;
self.write_page_tree()?; self.write_preface()?;
self.write_pages()?; self.write_pages()?;
self.write_fonts()?; self.write_fonts()?;
self.writer.write_xref_table()?; self.writer.write_xref_table()?;
self.writer self.writer.write_trailer(Trailer::new(self.offsets.catalog))?;
.write_trailer(Trailer::new(self.offsets.catalog))?;
Ok(self.writer.written()) Ok(self.writer.written())
} }
/// Write the document catalog and page tree. /// Write the document catalog and page tree.
fn write_page_tree(&mut self) -> PdfResult<()> { fn write_preface(&mut self) -> PdfResult<()> {
// The document catalog // The document catalog.
self.writer self.writer.write_obj(self.offsets.catalog, &Catalog::new(self.offsets.page_tree))?;
.write_obj(self.offsets.catalog, &Catalog::new(self.offsets.page_tree))?;
// The font resources // The font resources.
let offset = self.offsets.fonts.0; let start = self.offsets.fonts.0;
let fonts = const NUM_OBJECTS_PER_FONT: usize = 5;
(0..self.fonts.len()).map(|i| Resource::Font((i + 1) as u32, offset + 5 * i as u32)); let fonts = (0 .. self.fonts.len()).map(|i| {
Resource::Font((i + 1) as u32, start + (NUM_OBJECTS_PER_FONT * i) as u32)
});
// The root page tree // The root page tree.
self.writer.write_obj( self.writer.write_obj(
self.offsets.page_tree, self.offsets.page_tree,
PageTree::new() PageTree::new()
@ -174,14 +203,15 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.resources(fonts), .resources(fonts),
)?; )?;
// The page objects // The page objects (non-root nodes in the page tree).
for (id, page) in ids(self.offsets.pages).zip(&self.layout.layouts) { for (id, page) in ids(self.offsets.pages).zip(self.layouts) {
let rect = Rect::new( let rect = Rect::new(
0.0, 0.0,
0.0, 0.0,
page.dimensions.x.to_pt(), page.dimensions.x.to_pt(),
page.dimensions.y.to_pt(), page.dimensions.y.to_pt(),
); );
self.writer.write_obj( self.writer.write_obj(
id, id,
Page::new(self.offsets.page_tree) Page::new(self.offsets.page_tree)
@ -195,7 +225,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
/// Write the contents of all pages. /// Write the contents of all pages.
fn write_pages(&mut self) -> PdfResult<()> { fn write_pages(&mut self) -> PdfResult<()> {
for (id, page) in ids(self.offsets.contents).zip(&self.layout.layouts) { for (id, page) in ids(self.offsets.contents).zip(self.layouts) {
self.write_page(id, &page)?; self.write_page(id, &page)?;
} }
Ok(()) Ok(())
@ -205,36 +235,29 @@ impl<'d, W: Write> PdfEngine<'d, W> {
fn write_page(&mut self, id: u32, page: &Layout) -> PdfResult<()> { fn write_page(&mut self, id: u32, page: &Layout) -> PdfResult<()> {
let mut text = Text::new(); let mut text = Text::new();
let mut active_font = (std::usize::MAX, 0.0); let mut active_font = (std::usize::MAX, 0.0);
let mut next_pos = None;
// The last set position and font,
// these only get flushed lazily when content is written.
let mut next_pos = Some(Size2D::zero());
let mut next_font = None;
for action in &page.actions { for action in &page.actions {
match action { match action {
LayoutAction::MoveAbsolute(pos) => next_pos = Some(*pos), LayoutAction::MoveAbsolute(pos) => {
LayoutAction::SetFont(id, size) => next_font = Some((self.font_remap[id], *size)), next_pos = Some(*pos);
LayoutAction::WriteText(string) => { },
// Flush the font if it is different from the current.
if let Some((id, size)) = next_font { LayoutAction::SetFont(id, size) => {
if (id, size) != active_font { active_font = (self.font_remap[id], *size);
text.tf(id as u32 + 1, size); text.tf(active_font.0 as u32 + 1, *size);
active_font = (id, size);
next_font = None;
}
} }
// Flush the position. LayoutAction::WriteText(string) => {
if let Some(pos) = next_pos.take() { if let Some(pos) = next_pos.take() {
let x = pos.x.to_pt(); let x = pos.x.to_pt();
let y = (page.dimensions.y - pos.y - Size::pt(active_font.1)).to_pt(); let y = (page.dimensions.y - pos.y - Size::pt(active_font.1)).to_pt();
text.tm(1.0, 0.0, 0.0, 1.0, x, y); text.tm(1.0, 0.0, 0.0, 1.0, x, y);
} }
// Write the text.
text.tj(self.fonts[active_font.0].encode_text(&string)?); text.tj(self.fonts[active_font.0].encode_text(&string)?);
} },
LayoutAction::DebugBox(_, _) => {} LayoutAction::DebugBox(_, _) => {}
} }
} }
@ -253,7 +276,9 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.read_table::<Name>()? .read_table::<Name>()?
.get_decoded(NameEntry::PostScriptName) .get_decoded(NameEntry::PostScriptName)
.unwrap_or_else(|| "unknown".to_string()); .unwrap_or_else(|| "unknown".to_string());
let base_font = format!("ABCDEF+{}", name); let base_font = format!("ABCDEF+{}", name);
let system_info = CIDSystemInfo::new("Adobe", "Identity", 0);
// Write the base font object referencing the CID font. // Write the base font object referencing the CID font.
self.writer.write_obj( self.writer.write_obj(
@ -266,6 +291,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.to_unicode(id + 3), .to_unicode(id + 3),
)?; )?;
// ---------------------------------------------
// Extract information from the head table. // Extract information from the head table.
let head = font.read_table::<Header>()?; let head = font.read_table::<Header>()?;
@ -293,7 +319,6 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.collect(); .collect();
// Write the CID font referencing the font descriptor. // Write the CID font referencing the font descriptor.
let system_info = CIDSystemInfo::new("Adobe", "Identity", 0);
self.writer.write_obj( self.writer.write_obj(
id + 1, id + 1,
CIDFont::new( CIDFont::new(
@ -305,12 +330,12 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.widths(vec![WidthRecord::start(0, widths)]), .widths(vec![WidthRecord::start(0, widths)]),
)?; )?;
// ---------------------------------------------
// Extract information from the post table. // Extract information from the post table.
let post = font.read_table::<Post>()?; let post = font.read_table::<Post>()?;
let fixed_pitch = post.is_fixed_pitch; let fixed_pitch = post.is_fixed_pitch;
let italic_angle = post.italic_angle.to_f32(); let italic_angle = post.italic_angle.to_f32();
// Build the flag set.
let mut flags = FontFlags::empty(); let mut flags = FontFlags::empty();
flags.set(FontFlags::SERIF, name.contains("Serif")); flags.set(FontFlags::SERIF, name.contains("Serif"));
flags.set(FontFlags::FIXED_PITCH, fixed_pitch); flags.set(FontFlags::FIXED_PITCH, fixed_pitch);
@ -318,6 +343,7 @@ impl<'d, W: Write> PdfEngine<'d, W> {
flags.insert(FontFlags::SYMBOLIC); flags.insert(FontFlags::SYMBOLIC);
flags.insert(FontFlags::SMALL_CAP); flags.insert(FontFlags::SMALL_CAP);
// ---------------------------------------------
// Extract information from the OS/2 table. // Extract information from the OS/2 table.
let os2 = font.read_table::<OS2>()?; let os2 = font.read_table::<OS2>()?;
@ -341,12 +367,11 @@ impl<'d, W: Write> PdfEngine<'d, W> {
.mapping .mapping
.iter() .iter()
.map(|(&c, &cid)| (cid, c)); .map(|(&c, &cid)| (cid, c));
self.writer
.write_obj(id + 3, &CMap::new("Custom", system_info, mapping))?; self.writer.write_obj(id + 3, &CMap::new("Custom", system_info, mapping))?;
// Finally write the subsetted font program. // Finally write the subsetted font program.
self.writer self.writer.write_obj(id + 4, &FontStream::new(font.data().get_ref()))?;
.write_obj(id + 4, &FontStream::new(font.data().get_ref()))?;
id += 5; id += 5;
} }
@ -360,11 +385,11 @@ fn ids((start, end): (Ref, Ref)) -> impl Iterator<Item = Ref> {
start ..= end start ..= end
} }
/// The error type for _PDF_ creation. /// The error type for _PDF_ exporting.
pub enum PdfExportError { pub enum PdfExportError {
/// An error occured while subsetting the font for the _PDF_. /// An error occured while subsetting the font for the _PDF_.
Font(FontError), Font(FontError),
/// An I/O Error on the underlying writable occured. /// An I/O Error on the underlying writable.
Io(io::Error), Io(io::Error),
} }

View File

@ -84,7 +84,7 @@ impl LayoutActionList {
MoveAbsolute(pos) => self.next_pos = Some(self.origin + pos), MoveAbsolute(pos) => self.next_pos = Some(self.origin + pos),
DebugBox(pos, size) => self.actions.push(DebugBox(self.origin + pos, size)), DebugBox(pos, size) => self.actions.push(DebugBox(self.origin + pos, size)),
SetFont(index, size) if (index, size) != self.active_font => { SetFont(index, size) => {
self.next_font = Some((index, size)); self.next_font = Some((index, size));
} }
@ -92,8 +92,12 @@ impl LayoutActionList {
if let Some(target) = self.next_pos.take() { if let Some(target) = self.next_pos.take() {
self.actions.push(MoveAbsolute(target)); self.actions.push(MoveAbsolute(target));
} }
if let Some((index, size)) = self.next_font.take() { if let Some((index, size)) = self.next_font.take() {
if (index, size) != self.active_font {
self.actions.push(SetFont(index, size)); self.actions.push(SetFont(index, size));
self.active_font = (index, size);
}
} }
self.actions.push(action); self.actions.push(action);

View File

@ -76,6 +76,11 @@ impl MultiLayout {
self.layouts.push(layout); self.layouts.push(layout);
} }
/// The count of sublayouts.
pub fn count(&self) -> usize {
self.layouts.len()
}
/// Whether this layout contains any sublayouts. /// Whether this layout contains any sublayouts.
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.layouts.is_empty() self.layouts.is_empty()
@ -91,6 +96,16 @@ impl IntoIterator for MultiLayout {
} }
} }
impl<'a> IntoIterator for &'a MultiLayout {
type Item = &'a Layout;
type IntoIter = std::slice::Iter<'a, Layout>;
fn into_iter(self) -> Self::IntoIter {
self.layouts.iter()
}
}
/// The context for layouting. /// The context for layouting.
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct LayoutContext<'a, 'p> { pub struct LayoutContext<'a, 'p> {

View File

@ -1,6 +1,7 @@
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::{BufWriter, Read, Write}; use std::io::{BufWriter, Read, Write};
use std::process::Command; use std::process::Command;
use std::time::Instant;
use typst::export::pdf::PdfExporter; use typst::export::pdf::PdfExporter;
use typst::layout::LayoutAction; use typst::layout::LayoutAction;
@ -50,16 +51,22 @@ fn main() {
/// Create a _PDF_ with a name from the source code. /// Create a _PDF_ with a name from the source code.
fn test(name: &str, src: &str) { fn test(name: &str, src: &str) {
println!("Testing: {}", name); print!("Testing: {}", name);
let mut typesetter = Typesetter::new(); let mut typesetter = Typesetter::new();
let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap(); let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
typesetter.add_font_provider(provider.clone()); typesetter.add_font_provider(provider.clone());
let start = Instant::now();
// Layout into box layout. // Layout into box layout.
let tree = typesetter.parse(src).unwrap(); let tree = typesetter.parse(src).unwrap();
let layout = typesetter.layout(&tree).unwrap(); let layout = typesetter.layout(&tree).unwrap();
let end = Instant::now();
let duration = end - start;
println!(" [{:?}]", duration);
// Write the serialed layout file. // Write the serialed layout file.
let path = format!("{}/serialized/{}.box", CACHE_DIR, name); let path = format!("{}/serialized/{}.box", CACHE_DIR, name);
let mut file = File::create(path).unwrap(); let mut file = File::create(path).unwrap();