Restructure PDF structure writing

This commit is contained in:
Martin Haug 2022-06-08 16:23:32 +02:00
parent 995a7882d2
commit 55dce19f49

View File

@ -22,6 +22,7 @@ use crate::geom::{
Stroke, Transform, Stroke, Transform,
}; };
use crate::image::{Image, ImageId, ImageStore, RasterImage}; use crate::image::{Image, ImageId, ImageStore, RasterImage};
use crate::library::prelude::EcoString;
use crate::library::text::Lang; use crate::library::text::Lang;
use crate::Context; use crate::Context;
@ -74,7 +75,8 @@ impl<'a> PdfExporter<'a> {
self.build_pages(frames); self.build_pages(frames);
self.write_fonts(); self.write_fonts();
self.write_images(); self.write_images();
self.write_structure() self.write_structure();
self.writer.finish()
} }
fn build_pages(&mut self, frames: &[Arc<Frame>]) { fn build_pages(&mut self, frames: &[Arc<Frame>]) {
@ -299,7 +301,7 @@ impl<'a> PdfExporter<'a> {
} }
} }
fn write_structure(mut self) -> Vec<u8> { fn write_structure(&mut self) {
// The root page tree. // The root page tree.
let page_tree_ref = self.alloc.bump(); let page_tree_ref = self.alloc.bump();
@ -315,10 +317,37 @@ impl<'a> PdfExporter<'a> {
let mut languages = HashMap::new(); let mut languages = HashMap::new();
let mut heading_tree: Vec<HeadingNode> = vec![]; let mut heading_tree: Vec<HeadingNode> = vec![];
for (page, page_id) in self.pages.into_iter().zip(page_refs.iter()) { for (page, page_id) in
std::mem::take(&mut self.pages).into_iter().zip(page_refs.iter())
{
self.write_page(
page,
*page_id,
&page_refs,
page_tree_ref,
&mut languages,
&mut heading_tree,
&mut page_heights,
);
}
self.write_page_tree(&page_refs, page_tree_ref);
self.write_catalog(page_tree_ref, &languages, &heading_tree);
}
fn write_page(
&mut self,
page: Page,
page_id: Ref,
page_refs: &[Ref],
page_tree_ref: Ref,
languages: &mut HashMap<Lang, usize>,
heading_tree: &mut Vec<HeadingNode>,
page_heights: &mut Vec<f32>,
) {
let content_id = self.alloc.bump(); let content_id = self.alloc.bump();
let mut page_writer = self.writer.page(*page_id); let mut page_writer = self.writer.page(page_id);
page_writer.parent(page_tree_ref); page_writer.parent(page_tree_ref);
let w = page.size.x.to_f32(); let w = page.size.x.to_f32();
@ -359,16 +388,12 @@ impl<'a> PdfExporter<'a> {
} }
for heading in page.headings.into_iter() { for heading in page.headings.into_iter() {
if let Some(last) = heading_tree.pop() { if let Some(last) = heading_tree.last_mut() {
let new = last.clone().insert(heading.clone(), *page_id, 1); if !last.insert(heading.clone(), page_id, 1) {
if let Some(new) = new { heading_tree.push(HeadingNode::leaf(heading, page_id))
heading_tree.push(new);
} else {
heading_tree.push(last);
heading_tree.push(HeadingNode::Leaf(heading, *page_id))
} }
} else { } else {
heading_tree.push(HeadingNode::Leaf(heading, *page_id)) heading_tree.push(HeadingNode::leaf(heading, page_id))
} }
} }
@ -377,8 +402,9 @@ impl<'a> PdfExporter<'a> {
.filter(Filter::FlateDecode); .filter(Filter::FlateDecode);
} }
fn write_page_tree(&mut self, page_refs: &[Ref], page_tree_ref: Ref) {
let mut pages = self.writer.pages(page_tree_ref); let mut pages = self.writer.pages(page_tree_ref);
pages.count(page_refs.len() as i32).kids(page_refs); pages.count(page_refs.len() as i32).kids(page_refs.iter().copied());
let mut resources = pages.resources(); let mut resources = pages.resources();
let mut spaces = resources.color_spaces(); let mut spaces = resources.color_spaces();
@ -403,37 +429,32 @@ impl<'a> PdfExporter<'a> {
images.finish(); images.finish();
resources.finish(); resources.finish();
pages.finish(); pages.finish();
// Build the heading tree.
let outline_root_id = self.alloc.bump();
let start_ref = self.alloc.bump();
let mut current_ref = start_ref;
let mut prev_ref = None;
for (i, node) in heading_tree.iter().enumerate() {
let next = write_outline_item(
&mut self.writer,
node,
current_ref,
prev_ref,
i == heading_tree.len() - 1,
outline_root_id,
);
prev_ref = Some(current_ref);
current_ref = next;
} }
fn write_catalog(
&mut self,
page_tree_ref: Ref,
languages: &HashMap<Lang, usize>,
heading_tree: &Vec<HeadingNode>,
) {
// Build the outline tree.
let outline_root_id = self.alloc.bump();
self.alloc = Ref::new( let outline_start_ref = self.alloc;
start_ref.get()
+ heading_tree.iter().map(HeadingNode::len).sum::<usize>() as i32, for (i, node) in heading_tree.iter().enumerate() {
self.write_outline_item(
node,
i == 0,
i + 1 == heading_tree.len(),
outline_root_id,
); );
}
if let Some(prev_ref) = prev_ref { if !heading_tree.is_empty() {
let mut outline_root = self.writer.outline(outline_root_id); let mut outline_root = self.writer.outline(outline_root_id);
outline_root.first(start_ref); outline_root.first(outline_start_ref);
outline_root.last(prev_ref); outline_root.last(Ref::new(self.alloc.get() - 1));
outline_root.count(heading_tree.len() as i32); outline_root.count(heading_tree.len() as i32);
} }
@ -442,7 +463,7 @@ impl<'a> PdfExporter<'a> {
.max_by(|(_, v1), (_, v2)| v1.cmp(v2)) .max_by(|(_, v1), (_, v2)| v1.cmp(v2))
.map(|(k, _)| k); .map(|(k, _)| k);
let dir = if lang.map(Lang::dir) == Some(Dir::RTL) { let dir = if lang.copied().map(Lang::dir) == Some(Dir::RTL) {
Direction::R2L Direction::R2L
} else { } else {
Direction::L2R Direction::L2R
@ -454,7 +475,6 @@ impl<'a> PdfExporter<'a> {
catalog.pages(page_tree_ref); catalog.pages(page_tree_ref);
catalog.viewer_preferences().direction(dir); catalog.viewer_preferences().direction(dir);
if !heading_tree.is_empty() { if !heading_tree.is_empty() {
catalog.outlines(outline_root_id); catalog.outlines(outline_root_id);
} }
@ -464,7 +484,51 @@ impl<'a> PdfExporter<'a> {
} }
catalog.finish(); catalog.finish();
self.writer.finish() }
fn write_outline_item(
&mut self,
node: &HeadingNode,
is_first: bool,
is_last: bool,
parent_ref: Ref,
) {
let id = self.alloc.bump();
let next = Ref::new(id.get() + node.len() as i32);
let mut outline = self.writer.outline_item(id);
outline.parent(parent_ref);
if !is_last {
outline.next(next);
}
if !is_first {
outline.prev(Ref::new(id.get() - 1));
}
if !node.children.is_empty() {
let current_child = Ref::new(id.get() + 1);
outline.first(current_child);
outline.last(Ref::new(next.get() - 1));
outline.count(-1 * node.children.len() as i32);
}
outline.title(TextStr(&node.heading.content));
outline.dest_direct().page(node.page).xyz(
node.heading.position.x.to_f32(),
(node.heading.position.y + Length::pt(3.0)).to_f32(),
None,
);
outline.finish();
if !node.children.is_empty() {
for (i, child) in node.children.iter().enumerate() {
self.write_outline_item(child, i == 0, i + 1 == node.children.len(), id);
}
}
} }
} }
@ -507,58 +571,43 @@ struct State {
/// A heading that can later be linked in the outline panel. /// A heading that can later be linked in the outline panel.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct Heading { struct Heading {
content: String, content: EcoString,
level: usize, level: usize,
position: Point, position: Point,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
enum HeadingNode { struct HeadingNode {
Leaf(Heading, Ref), heading: Heading,
Branch(Heading, Ref, Vec<HeadingNode>), page: Ref,
children: Vec<HeadingNode>,
} }
impl HeadingNode { impl HeadingNode {
fn heading(&self) -> &Heading { fn leaf(heading: Heading, page: Ref) -> Self {
match self { HeadingNode { heading, page, children: Vec::new() }
HeadingNode::Leaf(h, _) => h,
HeadingNode::Branch(h, _, _) => h,
}
}
fn reference(&self) -> Ref {
match self {
HeadingNode::Leaf(_, r) => *r,
HeadingNode::Branch(_, r, _) => *r,
}
} }
fn len(&self) -> usize { fn len(&self) -> usize {
match self { 1 + self.children.iter().map(|c| c.len()).sum::<usize>()
HeadingNode::Leaf(_, _) => 1,
HeadingNode::Branch(_, _, children) => {
1 + children.iter().map(|c| c.len()).sum::<usize>()
}
}
} }
fn insert(self, other: Heading, page: Ref, level: usize) -> Option<Self> { fn insert(&mut self, other: Heading, page: Ref, level: usize) -> bool {
if level >= other.level { if level >= other.level {
return None; return false;
} }
let mut node = match self { if !self.children.is_empty() && level + 1 > other.level {
HeadingNode::Leaf(h, r) => (h, r, vec![]), return self.children.last_mut().unwrap().insert(other, page, level + 1);
HeadingNode::Branch(h, r, v) if level + 1 == other.level => (h, r, v),
HeadingNode::Branch(h, r, mut v) => {
let new = v.pop().unwrap().insert(other, page, level + 1).unwrap();
v.push(new);
return Some(HeadingNode::Branch(h, r, v));
} }
};
node.2.push(HeadingNode::Leaf(other, page)); self.children.push(HeadingNode {
Some(HeadingNode::Branch(node.0, node.1, node.2)) heading: other,
page,
children: Vec::new(),
});
true
} }
} }
@ -603,11 +652,8 @@ impl<'a> PageExporter<'a> {
fn write_frame(&mut self, frame: &Frame) { fn write_frame(&mut self, frame: &Frame) {
if let Some(Role::Heading(level)) = frame.role() { if let Some(Role::Heading(level)) = frame.role() {
self.headings.push(Heading { self.headings.push(Heading {
position: Point::new( position: Point::new(self.state.transform.tx, self.state.transform.ty),
self.state.transform.tx, content: frame.inner_text(),
self.state.transform.ty + Length::pt(3.0),
),
content: frame.inner_text().to_string(),
level, level,
}) })
} }
@ -942,67 +988,6 @@ fn encode_image(img: &RasterImage) -> ImageResult<(Vec<u8>, Filter, bool)> {
}) })
} }
fn write_outline_item(
writer: &mut PdfWriter,
node: &HeadingNode,
current_ref: Ref,
prev_ref: Option<Ref>,
is_last: bool,
parent_ref: Ref,
) -> Ref {
let mut outline = writer.outline_item(current_ref);
let next = Ref::new(current_ref.get() + node.len() as i32);
outline.parent(parent_ref);
if !is_last {
outline.next(next);
}
if let Some(prev_ref) = prev_ref {
outline.prev(prev_ref);
}
if let HeadingNode::Branch(_, _, children) = node {
let current_child = Ref::new(current_ref.get() + 1);
if children.len() > 0 {
outline.first(current_child);
outline.last(Ref::new(next.get() - 1));
}
outline.count(-1 * children.len() as i32);
}
let heading = node.heading();
outline.title(TextStr(&heading.content));
outline.dest_direct().page(node.reference()).xyz(
heading.position.x.to_f32(),
heading.position.y.to_f32(),
None,
);
outline.finish();
if let HeadingNode::Branch(_, _, children) = node {
let mut current_child = Ref::new(current_ref.get() + 1);
let mut prev_ref = None;
for (i, child) in children.iter().enumerate() {
write_outline_item(
writer,
child,
current_child,
prev_ref,
i == children.len() - 1,
current_ref,
);
prev_ref = Some(current_child);
current_child = Ref::new(current_child.get() + 1);
}
}
next
}
/// Encode an image's alpha channel if present. /// Encode an image's alpha channel if present.
fn encode_alpha(img: &RasterImage) -> (Vec<u8>, Filter) { fn encode_alpha(img: &RasterImage) -> (Vec<u8>, Filter) {
let pixels: Vec<_> = img.buf.pixels().map(|(_, _, Rgba([_, _, _, a]))| a).collect(); let pixels: Vec<_> = img.buf.pixels().map(|(_, _, Rgba([_, _, _, a]))| a).collect();