Move exporting into seperate module 🧱

This commit is contained in:
Laurenz 2019-03-30 16:01:45 +01:00
parent 094648e86b
commit 10994ebac3
4 changed files with 177 additions and 211 deletions

BIN
hello-typeset.pdf Normal file

Binary file not shown.

3
src/export/mod.rs Normal file
View File

@ -0,0 +1,3 @@
//! Exporting into external formats.
pub mod pdf;

View File

@ -1,26 +1,48 @@
//! Writing of documents in the _PDF_ format.
//! Exporting into _PDF_ documents.
use std::collections::HashSet;
use std::error;
use std::fmt;
use std::fmt::{self, Display, Debug, Formatter};
use std::io::{self, Write};
use pdf::{PdfWriter, Ref, Rect, Version, Trailer, Content};
use pdf::doc::{Catalog, PageTree, Page, Resource, Text};
use pdf::font::{Type0Font, CMapEncoding, CIDFont, CIDFontType, CIDSystemInfo};
use pdf::font::{GlyphUnit, WidthRecord, FontDescriptor, FontFlags, FontStream, EmbeddedFontType};
use pdf::font::{Type0Font, CIDFont, CIDFontType, CIDSystemInfo, FontDescriptor, FontFlags};
use pdf::font::{GlyphUnit, CMapEncoding, WidthRecord, FontStream, EmbeddedFontType};
use crate::doc::{Document, Size, Text as DocText, TextCommand};
use crate::font::{Font, FontError};
/// Exports documents into _PDFs_.
#[derive(Debug)]
pub struct PdfExporter {}
impl PdfExporter {
/// Create a new exporter.
#[inline]
pub fn new() -> PdfExporter {
PdfExporter {}
}
/// Export a typesetted document into a writer. Returns how many bytes were written.
#[inline]
pub fn export<W: Write>(&self, document: &Document, target: W) -> PdfResult<usize> {
let mut engine = PdfEngine::new(document, target)?;
engine.write()
}
}
/// Writes documents in the _PDF_ format.
pub struct PdfCreator<'a, W: Write> {
#[derive(Debug)]
struct PdfEngine<'d, W: Write> {
writer: PdfWriter<W>,
doc: &'a Document,
doc: &'d Document,
offsets: Offsets,
fonts: Vec<PdfFont>,
}
/// Offsets for the various groups of ids.
#[derive(Debug, Copy, Clone)]
struct Offsets {
catalog: Ref,
page_tree: Ref,
@ -29,47 +51,42 @@ struct Offsets {
fonts: (Ref, Ref),
}
impl<'a, W: Write> PdfCreator<'a, W> {
impl<'d, W: Write> PdfEngine<'d, W> {
/// Create a new _PDF_ Creator.
pub fn new(doc: &'a Document, target: W) -> PdfResult<PdfCreator<'a, W>> {
// Calculate a unique id for all object to come
fn new(doc: &'d Document, target: W) -> PdfResult<PdfEngine<'d, W>> {
// Calculate a unique id for all objects that will be written.
let catalog = 1;
let page_tree = catalog + 1;
let pages = (page_tree + 1, page_tree + doc.pages.len() as Ref);
let content_count = doc.pages.iter().flat_map(|p| p.text.iter()).count() as Ref;
let contents = (pages.1 + 1, pages.1 + content_count);
let fonts = (contents.1 + 1, contents.1 + 4 * doc.fonts.len() as Ref);
let offsets = Offsets { catalog, page_tree, pages, contents, fonts };
let offsets = Offsets {
catalog,
page_tree,
pages,
contents,
fonts,
};
// Create a subsetted PDF font for each font in the document.
let fonts = {
let mut font = 0usize;
let mut chars = vec![HashSet::new(); doc.fonts.len()];
// Find out which chars are used in this document.
let mut char_sets = vec![HashSet::new(); doc.fonts.len()];
let mut current_font: usize = 0;
for page in &doc.pages {
for text in &page.text {
// Iterate through every text object on every page and find out
// which characters they use.
for text in doc.pages.iter().flat_map(|page| page.text.iter()) {
for command in &text.commands {
match command {
TextCommand::Text(string)
=> char_sets[current_font].extend(string.chars()),
TextCommand::SetFont(id, _) => current_font = *id,
TextCommand::Text(string) => chars[font].extend(string.chars()),
TextCommand::SetFont(id, _) => font = *id,
_ => {},
}
}
}
}
// Create a subsetted pdf font.
let fonts = doc.fonts.iter().enumerate().map(|(i, font)| {
PdfFont::new(font, &char_sets[i])
}).collect::<PdfResult<Vec<_>>>()?;
doc.fonts.iter()
.enumerate()
.map(|(i, font)| PdfFont::new(font, &chars[i]))
.collect::<PdfResult<Vec<_>>>()?
};
Ok(PdfCreator {
Ok(PdfEngine {
writer: PdfWriter::new(target),
doc,
offsets,
@ -78,58 +95,40 @@ impl<'a, W: Write> PdfCreator<'a, W> {
}
/// Write the complete document.
pub fn write(&mut self) -> PdfResult<usize> {
// Header
fn write(&mut self) -> PdfResult<usize> {
// Write all the things!
self.writer.write_header(&Version::new(1, 7))?;
// Document catalog, page tree and pages
self.write_pages()?;
// Contents
self.write_contents()?;
// Fonts
self.write_fonts()?;
// Cross-reference table
self.writer.write_xref_table()?;
// Trailer
self.writer.write_trailer(&Trailer::new(self.offsets.catalog))?;
Ok(self.writer.written())
}
/// Write the document catalog, page tree and pages.
/// Write the document catalog and page tree.
fn write_pages(&mut self) -> PdfResult<()> {
// The document catalog
self.writer.write_obj(self.offsets.catalog,
&Catalog::new(self.offsets.page_tree))?;
// The document catalog.
self.writer.write_obj(self.offsets.catalog, &Catalog::new(self.offsets.page_tree))?;
// Root page tree
// The root page tree.
self.writer.write_obj(self.offsets.page_tree, PageTree::new()
.kids(self.offsets.pages.0 ..= self.offsets.pages.1)
.kids(ids(self.offsets.pages))
.resource(Resource::Font(1, self.offsets.fonts.0))
)?;
// The page objects
let mut id = self.offsets.pages.0;
for page in &self.doc.pages {
// The page objects.
for (id, page) in ids(self.offsets.pages).zip(&self.doc.pages) {
self.writer.write_obj(id, Page::new(self.offsets.page_tree)
.media_box(Rect::new(
0.0, 0.0,
page.width.to_points(), page.height.to_points())
)
.contents(self.offsets.contents.0 ..= self.offsets.contents.1)
.media_box(Rect::new(0.0, 0.0, page.width.to_points(), page.height.to_points()))
.contents(ids(self.offsets.contents))
)?;
id += 1;
}
Ok(())
}
/// Write the page contents.
/// Write the contents of all pages.
fn write_contents(&mut self) -> PdfResult<()> {
let mut id = self.offsets.contents.0;
for page in &self.doc.pages {
@ -141,40 +140,40 @@ impl<'a, W: Write> PdfCreator<'a, W> {
Ok(())
}
fn write_text(&mut self, id: u32, text: &DocText) -> PdfResult<()> {
let mut object = Text::new();
let mut current_font = 0;
/// Write one text object.
fn write_text(&mut self, id: u32, doc_text: &DocText) -> PdfResult<()> {
let mut font = 0;
let mut text = Text::new();
for command in &text.commands {
for command in &doc_text.commands {
match command {
TextCommand::Text(string) => {
let encoded = self.fonts[current_font].encode(&string);
object.tj(encoded);
},
TextCommand::Text(string) => { text.tj(self.fonts[font].encode(&string)); },
TextCommand::Move(x, y) => { text.td(x.to_points(), y.to_points()); },
TextCommand::SetFont(id, size) => {
current_font = *id;
object.tf(*id as u32 + 1, *size);
font = *id;
text.tf(*id as u32 + 1, *size);
},
TextCommand::Move(x, y) => { object.td(x.to_points(), y.to_points()); },
}
}
self.writer.write_obj(id, &object.to_stream())?;
self.writer.write_obj(id, &text.to_stream())?;
Ok(())
}
/// Write the fonts.
/// Write all the fonts.
fn write_fonts(&mut self) -> PdfResult<()> {
let mut id = self.offsets.fonts.0;
for font in &self.fonts {
// Write the base font object referencing the CID font.
self.writer.write_obj(id, &Type0Font::new(
font.name.clone(),
CMapEncoding::Predefined("Identity-H".to_owned()),
id + 1
))?;
// Write the CID font referencing the font descriptor.
self.writer.write_obj(id + 1,
CIDFont::new(
CIDFontType::Type2,
@ -184,6 +183,7 @@ impl<'a, W: Write> PdfCreator<'a, W> {
).widths(vec![WidthRecord::start(0, font.widths.clone())])
)?;
// Write the font descriptor (contains the global information about the font).
self.writer.write_obj(id + 2,
FontDescriptor::new(
font.name.clone(),
@ -198,6 +198,7 @@ impl<'a, W: Write> PdfCreator<'a, W> {
.font_file_3(id + 3)
)?;
// Finally write the subsetted font program.
self.writer.write_obj(id + 3, &FontStream::new(
&font.program,
EmbeddedFontType::OpenType,
@ -210,7 +211,13 @@ impl<'a, W: Write> PdfCreator<'a, W> {
}
}
/// Create an iterator from reference pair.
fn ids((start, end): (Ref, Ref)) -> impl Iterator<Item=Ref> {
start ..= end
}
/// The data we need from the font.
#[derive(Debug, Clone)]
struct PdfFont {
font: Font,
widths: Vec<GlyphUnit>,
@ -226,7 +233,12 @@ struct PdfFont {
impl PdfFont {
/// Create a subetted version of the font and calculate some information
/// needed for creating the _PDF_.
pub fn new(font: &Font, chars: &HashSet<char>) -> PdfResult<PdfFont> {
fn new(font: &Font, chars: &HashSet<char>) -> PdfResult<PdfFont> {
/// Convert a size into a _PDF_ glyph unit.
fn size_to_glyph_unit(size: Size) -> GlyphUnit {
(1000.0 * size.to_points()).round() as GlyphUnit
}
// Subset the font using the selected characters
let subsetted = font.subsetted(
chars.iter().cloned(),
@ -264,11 +276,6 @@ impl PdfFont {
}
}
/// Convert a size into a _PDF_ glyph unit.
fn size_to_glyph_unit(size: Size) -> GlyphUnit {
(1000.0 * size.to_points()).round() as GlyphUnit
}
impl std::ops::Deref for PdfFont {
type Target = Font;
@ -278,53 +285,48 @@ impl std::ops::Deref for PdfFont {
}
/// Result type for _PDF_ creation.
type PdfResult<T> = std::result::Result<T, PdfError>;
type PdfResult<T> = std::result::Result<T, PdfExportError>;
/// The error type for _PDF_ creation.
pub enum PdfError {
pub enum PdfExportError {
/// An error occured while subsetting the font for the _PDF_.
Font(FontError),
/// An I/O Error on the underlying writable occured.
Io(io::Error),
}
impl error::Error for PdfError {
#[inline]
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
impl std::error::Error for PdfExportError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
PdfError::Font(err) => Some(err),
PdfError::Io(err) => Some(err),
PdfExportError::Font(err) => Some(err),
PdfExportError::Io(err) => Some(err),
}
}
}
impl fmt::Display for PdfError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl Display for PdfExportError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
PdfError::Font(err) => write!(f, "font error: {}", err),
PdfError::Io(err) => write!(f, "io error: {}", err),
PdfExportError::Font(err) => write!(f, "font error: {}", err),
PdfExportError::Io(err) => write!(f, "io error: {}", err),
}
}
}
impl fmt::Debug for PdfError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
impl Debug for PdfExportError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Display::fmt(self, f)
}
}
impl From<io::Error> for PdfError {
#[inline]
fn from(err: io::Error) -> PdfError {
PdfError::Io(err)
impl From<io::Error> for PdfExportError {
fn from(err: io::Error) -> PdfExportError {
PdfExportError::Io(err)
}
}
impl From<FontError> for PdfError {
#[inline]
fn from(err: FontError) -> PdfError {
PdfError::Font(err)
impl From<FontError> for PdfExportError {
fn from(err: FontError) -> PdfExportError {
PdfExportError::Font(err)
}
}

View File

@ -2,76 +2,61 @@
//!
//! # Compilation
//! - **Parsing:** The parsing step first transforms a plain string into an
//! [iterator of tokens](Tokens). Then the parser operates on that to
//! construct an abstract syntax tree. The structures describing the tree
//! can be found in the [`syntax`](syntax) module.
//! - **Typesetting:** The next step is to transform the syntax tree into an
//! abstract document representation. Types for these can be found in the
//! [`doc`](doc) module. This representation contains already the finished
//! layout, but is still portable.
//! - **Exporting:** The abstract document can then be exported into supported
//! formats. Currently the only supported format is _PDF_. In this step
//! the text is finally encoded into glyph indices and font data is
//! subsetted.
//!
//! # Fonts
//! To do the typesetting, the compiler needs font data. To be highly portable
//! the compiler assumes nothing about the environment. To still work with fonts,
//! the consumer of this library has to add _font providers_ to their compiler
//! instance. These can be queried for font data given a flexible font configuration
//! specifying font families and styles. A font provider is a type implementing the
//! [`FontProvider`](crate::font::FontProvider) trait. For convenience there exists
//! the [`FileFontProvider`](crate::font::FileFontProvider) to serve fonts from a
//! local folder.
//! [iterator of tokens](crate::parsing::Tokens). Then the parser operates on that to construct
//! a syntax tree. The structures describing the tree can be found in the [`syntax`] module.
//! - **Typesetting:** The next step is to transform the syntax tree into a portable representation
//! of the typesetted document. Types for these can be found in the [`doc`] module. This
//! representation contains already the finished layout.
//! - **Exporting:** The finished document can then be exported into supported formats. Submodules
//! for the supported formats are located in the [`export`] module. Currently the only supported
//! format is _PDF_.
//!
//! # Example
//! ```
//! use std::fs::File;
//! use typeset::{Compiler, font::FileFontProvider, file_font};
//! use typeset::Compiler;
//! use typeset::{font::FileFontProvider, file_font};
//! use typeset::export::pdf::PdfExporter;
//!
//! // Simple example source code.
//! let source = "Hello World from Typeset!";
//! let src = "Hello World from Typeset!";
//!
//! // Create a compiler with a font provider that provides one font.
//! // Create a compiler with a font provider that provides three fonts
//! // (the default sans-serif fonts and a fallback for the emoji).
//! let mut compiler = Compiler::new();
//! compiler.add_font_provider(FileFontProvider::new("../fonts", vec![
//! // Font family name, generic families, file, bold, italic
//! file_font!("NotoSans", [SansSerif], "NotoSans-Regular.ttf", false, false),
//! ]));
//!
//! // Open an output file, compile and write to the file.
//! # /*
//! // Compile the source code with the compiler.
//! let document = compiler.typeset(src).unwrap();
//!
//! // Export the document into a PDF file.
//! let mut file = File::create("hello-typeset.pdf").unwrap();
//! # */
//! # let mut file = File::create("../target/typeset-hello.pdf").unwrap();
//! compiler.write_pdf(source, &mut file).unwrap();
//! let exporter = PdfExporter::new();
//! exporter.export(&document, &mut file).unwrap();
//! ```
pub mod syntax;
pub mod doc;
pub mod font;
mod parsing;
mod engine;
mod pdf;
mod utility;
pub use crate::parsing::{Tokens, ParseError};
pub use crate::engine::TypesetError;
pub use crate::pdf::PdfError;
use std::error;
use std::fmt;
use std::io::Write;
use std::fmt::{self, Display, Debug, Formatter};
use crate::syntax::SyntaxTree;
use crate::parsing::Parser;
use crate::parsing::{Tokens, Parser, ParseError};
use crate::doc::{Document, Style};
use crate::font::FontProvider;
use crate::engine::Engine;
use crate::pdf::PdfCreator;
use crate::engine::{Engine, TypesetError};
pub mod doc;
pub mod engine;
pub mod export;
pub mod font;
pub mod parsing;
pub mod syntax;
mod utility;
/// Compiles source code into typesetted documents allowing to
/// retrieve results at various stages.
/// Transforms source code into typesetted documents.
///
/// Holds the compilation context, which can be configured through various methods.
pub struct Compiler<'p> {
context: Context<'p>,
}
@ -83,8 +68,9 @@ struct Context<'p> {
font_providers: Vec<Box<dyn FontProvider + 'p>>,
}
/// Functions to set up the compilation context.
impl<'p> Compiler<'p> {
/// Create a new compiler from a document.
/// Create a new compiler.
#[inline]
pub fn new() -> Compiler<'p> {
Compiler {
@ -95,44 +81,33 @@ impl<'p> Compiler<'p> {
}
}
/// Set the default style for typesetting.
/// Set the default style for the document.
#[inline]
pub fn style(&mut self, style: Style) -> &mut Self {
pub fn set_style(&mut self, style: Style) {
self.context.style = style;
self
}
/// Add a font provider.
/// Add a font provider to the context of this compiler.
#[inline]
pub fn add_font_provider<P: 'p>(&mut self, provider: P) -> &mut Self
where P: FontProvider {
pub fn add_font_provider<P: 'p>(&mut self, provider: P) where P: FontProvider {
self.context.font_providers.push(Box::new(provider));
self
}
}
/// Return an iterator over the tokens of the document.
/// Compilation functions.
impl<'p> Compiler<'p> {
/// Parse source code into a syntax tree.
#[inline]
pub fn tokenize<'s>(&self, source: &'s str) -> Tokens<'s> {
Tokens::new(source)
pub fn parse<'s>(&self, src: &'s str) -> Result<SyntaxTree<'s>, ParseError> {
Parser::new(Tokens::new(src)).parse()
}
/// Return the abstract syntax tree representation of the document.
/// Compile a portable typesetted document from source code.
#[inline]
pub fn parse<'s>(&self, source: &'s str) -> Result<SyntaxTree<'s>, ParseError> {
Parser::new(self.tokenize(source)).parse()
}
/// Return the abstract typesetted representation of the document.
#[inline]
pub fn typeset(&self, source: &str) -> Result<Document, Error> {
let tree = self.parse(source)?;
Engine::new(&tree, &self.context).typeset().map_err(Into::into)
}
/// Write the document as a _PDF_, returning how many bytes were written.
pub fn write_pdf<W: Write>(&self, source: &str, target: &mut W) -> Result<usize, Error> {
let document = self.typeset(source)?;
PdfCreator::new(&document, target)?.write().map_err(Into::into)
pub fn typeset(&self, src: &str) -> Result<Document, Error> {
let tree = self.parse(src)?;
let engine = Engine::new(&tree, &self.context);
engine.typeset().map_err(Into::into)
}
}
@ -143,72 +118,57 @@ pub enum Error {
Parse(ParseError),
/// An error that occured while typesetting into an abstract document.
Typeset(TypesetError),
/// An error that occured while writing the document as a _PDF_.
Pdf(PdfError),
}
impl error::Error for Error {
#[inline]
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::Parse(err) => Some(err),
Error::Typeset(err) => Some(err),
Error::Pdf(err) => Some(err),
}
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl Display for Error {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Error::Parse(err) => write!(f, "parse error: {}", err),
Error::Typeset(err) => write!(f, "typeset error: {}", err),
Error::Pdf(err) => write!(f, "pdf error: {}", err),
}
}
}
impl fmt::Debug for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
impl Debug for Error {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Display::fmt(self, f)
}
}
impl From<ParseError> for Error {
#[inline]
fn from(err: ParseError) -> Error {
Error::Parse(err)
}
}
impl From<TypesetError> for Error {
#[inline]
fn from(err: TypesetError) -> Error {
Error::Typeset(err)
}
}
impl From<PdfError> for Error {
#[inline]
fn from(err: PdfError) -> Error {
Error::Pdf(err)
}
}
#[cfg(test)]
mod test {
use std::fs::File;
use crate::Compiler;
use crate::export::pdf::PdfExporter;
use crate::font::FileFontProvider;
/// Create a pdf with a name from the source code.
fn test(name: &str, src: &str) {
// Create compiler
let mut compiler = Compiler::new();
let provider = FileFontProvider::new("../fonts", vec![
compiler.add_font_provider(FileFontProvider::new("../fonts", vec![
// Font family name, generic families, file, bold, italic
file_font!("NotoSans", [SansSerif], "NotoSans-Regular.ttf", false, false),
file_font!("NotoSans", [SansSerif], "NotoSans-Bold.ttf", true, false),
@ -217,15 +177,16 @@ mod test {
file_font!("NotoSansMath", [SansSerif], "NotoSansMath-Regular.ttf", false, false),
file_font!("NotoEmoji", [SansSerif, Serif, Monospace],
"NotoEmoji-Regular.ttf", false, false),
]);
compiler.add_font_provider(provider);
]));
// Open output file;
// Compile into document
let document = compiler.typeset(src).unwrap();
// Write to file
let path = format!("../target/typeset-pdf-{}.pdf", name);
let mut file = File::create(path).unwrap();
// Compile and output
compiler.write_pdf(src, &mut file).unwrap();
let file = File::create(path).unwrap();
let exporter = PdfExporter::new();
exporter.export(&document, file).unwrap();
}
#[test]