Restructure PDF structure writing

This commit is contained in:
Martin Haug 2022-06-08 16:23:32 +02:00
parent 995a7882d2
commit 55dce19f49

View File

@ -22,6 +22,7 @@ use crate::geom::{
Stroke, Transform,
};
use crate::image::{Image, ImageId, ImageStore, RasterImage};
use crate::library::prelude::EcoString;
use crate::library::text::Lang;
use crate::Context;
@ -74,7 +75,8 @@ impl<'a> PdfExporter<'a> {
self.build_pages(frames);
self.write_fonts();
self.write_images();
self.write_structure()
self.write_structure();
self.writer.finish()
}
fn build_pages(&mut self, frames: &[Arc<Frame>]) {
@ -299,7 +301,7 @@ impl<'a> PdfExporter<'a> {
}
}
fn write_structure(mut self) -> Vec<u8> {
fn write_structure(&mut self) {
// The root page tree.
let page_tree_ref = self.alloc.bump();
@ -315,70 +317,94 @@ impl<'a> PdfExporter<'a> {
let mut languages = HashMap::new();
let mut heading_tree: Vec<HeadingNode> = vec![];
for (page, page_id) in self.pages.into_iter().zip(page_refs.iter()) {
let content_id = self.alloc.bump();
let mut page_writer = self.writer.page(*page_id);
page_writer.parent(page_tree_ref);
let w = page.size.x.to_f32();
let h = page.size.y.to_f32();
page_writer.media_box(Rect::new(0.0, 0.0, w, h));
page_writer.contents(content_id);
let mut annotations = page_writer.annotations();
for (dest, rect) in page.links {
let mut link = annotations.push();
link.subtype(AnnotationType::Link).rect(rect);
match dest {
Destination::Url(uri) => {
link.action()
.action_type(ActionType::Uri)
.uri(Str(uri.as_str().as_bytes()));
}
Destination::Internal(loc) => {
let index = loc.page - 1;
let height = page_heights[index];
link.action()
.action_type(ActionType::GoTo)
.destination_direct()
.page(page_refs[index])
.xyz(loc.pos.x.to_f32(), height - loc.pos.y.to_f32(), None);
}
}
}
annotations.finish();
page_writer.finish();
for (lang, count) in page.languages {
languages
.entry(lang)
.and_modify(|x| *x += count)
.or_insert_with(|| count);
}
for heading in page.headings.into_iter() {
if let Some(last) = heading_tree.pop() {
let new = last.clone().insert(heading.clone(), *page_id, 1);
if let Some(new) = new {
heading_tree.push(new);
} else {
heading_tree.push(last);
heading_tree.push(HeadingNode::Leaf(heading, *page_id))
}
} else {
heading_tree.push(HeadingNode::Leaf(heading, *page_id))
}
}
self.writer
.stream(content_id, &deflate(&page.content.finish()))
.filter(Filter::FlateDecode);
for (page, page_id) in
std::mem::take(&mut self.pages).into_iter().zip(page_refs.iter())
{
self.write_page(
page,
*page_id,
&page_refs,
page_tree_ref,
&mut languages,
&mut heading_tree,
&mut page_heights,
);
}
self.write_page_tree(&page_refs, page_tree_ref);
self.write_catalog(page_tree_ref, &languages, &heading_tree);
}
fn write_page(
&mut self,
page: Page,
page_id: Ref,
page_refs: &[Ref],
page_tree_ref: Ref,
languages: &mut HashMap<Lang, usize>,
heading_tree: &mut Vec<HeadingNode>,
page_heights: &mut Vec<f32>,
) {
let content_id = self.alloc.bump();
let mut page_writer = self.writer.page(page_id);
page_writer.parent(page_tree_ref);
let w = page.size.x.to_f32();
let h = page.size.y.to_f32();
page_writer.media_box(Rect::new(0.0, 0.0, w, h));
page_writer.contents(content_id);
let mut annotations = page_writer.annotations();
for (dest, rect) in page.links {
let mut link = annotations.push();
link.subtype(AnnotationType::Link).rect(rect);
match dest {
Destination::Url(uri) => {
link.action()
.action_type(ActionType::Uri)
.uri(Str(uri.as_str().as_bytes()));
}
Destination::Internal(loc) => {
let index = loc.page - 1;
let height = page_heights[index];
link.action()
.action_type(ActionType::GoTo)
.destination_direct()
.page(page_refs[index])
.xyz(loc.pos.x.to_f32(), height - loc.pos.y.to_f32(), None);
}
}
}
annotations.finish();
page_writer.finish();
for (lang, count) in page.languages {
languages
.entry(lang)
.and_modify(|x| *x += count)
.or_insert_with(|| count);
}
for heading in page.headings.into_iter() {
if let Some(last) = heading_tree.last_mut() {
if !last.insert(heading.clone(), page_id, 1) {
heading_tree.push(HeadingNode::leaf(heading, page_id))
}
} else {
heading_tree.push(HeadingNode::leaf(heading, page_id))
}
}
self.writer
.stream(content_id, &deflate(&page.content.finish()))
.filter(Filter::FlateDecode);
}
fn write_page_tree(&mut self, page_refs: &[Ref], page_tree_ref: Ref) {
let mut pages = self.writer.pages(page_tree_ref);
pages.count(page_refs.len() as i32).kids(page_refs);
pages.count(page_refs.len() as i32).kids(page_refs.iter().copied());
let mut resources = pages.resources();
let mut spaces = resources.color_spaces();
@ -403,37 +429,32 @@ impl<'a> PdfExporter<'a> {
images.finish();
resources.finish();
pages.finish();
}
// Build the heading tree.
fn write_catalog(
&mut self,
page_tree_ref: Ref,
languages: &HashMap<Lang, usize>,
heading_tree: &Vec<HeadingNode>,
) {
// Build the outline tree.
let outline_root_id = self.alloc.bump();
let start_ref = self.alloc.bump();
let mut current_ref = start_ref;
let mut prev_ref = None;
let outline_start_ref = self.alloc;
for (i, node) in heading_tree.iter().enumerate() {
let next = write_outline_item(
&mut self.writer,
self.write_outline_item(
node,
current_ref,
prev_ref,
i == heading_tree.len() - 1,
i == 0,
i + 1 == heading_tree.len(),
outline_root_id,
);
prev_ref = Some(current_ref);
current_ref = next;
}
self.alloc = Ref::new(
start_ref.get()
+ heading_tree.iter().map(HeadingNode::len).sum::<usize>() as i32,
);
if let Some(prev_ref) = prev_ref {
if !heading_tree.is_empty() {
let mut outline_root = self.writer.outline(outline_root_id);
outline_root.first(start_ref);
outline_root.last(prev_ref);
outline_root.first(outline_start_ref);
outline_root.last(Ref::new(self.alloc.get() - 1));
outline_root.count(heading_tree.len() as i32);
}
@ -442,7 +463,7 @@ impl<'a> PdfExporter<'a> {
.max_by(|(_, v1), (_, v2)| v1.cmp(v2))
.map(|(k, _)| k);
let dir = if lang.map(Lang::dir) == Some(Dir::RTL) {
let dir = if lang.copied().map(Lang::dir) == Some(Dir::RTL) {
Direction::R2L
} else {
Direction::L2R
@ -454,7 +475,6 @@ impl<'a> PdfExporter<'a> {
catalog.pages(page_tree_ref);
catalog.viewer_preferences().direction(dir);
if !heading_tree.is_empty() {
catalog.outlines(outline_root_id);
}
@ -464,7 +484,51 @@ impl<'a> PdfExporter<'a> {
}
catalog.finish();
self.writer.finish()
}
fn write_outline_item(
&mut self,
node: &HeadingNode,
is_first: bool,
is_last: bool,
parent_ref: Ref,
) {
let id = self.alloc.bump();
let next = Ref::new(id.get() + node.len() as i32);
let mut outline = self.writer.outline_item(id);
outline.parent(parent_ref);
if !is_last {
outline.next(next);
}
if !is_first {
outline.prev(Ref::new(id.get() - 1));
}
if !node.children.is_empty() {
let current_child = Ref::new(id.get() + 1);
outline.first(current_child);
outline.last(Ref::new(next.get() - 1));
outline.count(-1 * node.children.len() as i32);
}
outline.title(TextStr(&node.heading.content));
outline.dest_direct().page(node.page).xyz(
node.heading.position.x.to_f32(),
(node.heading.position.y + Length::pt(3.0)).to_f32(),
None,
);
outline.finish();
if !node.children.is_empty() {
for (i, child) in node.children.iter().enumerate() {
self.write_outline_item(child, i == 0, i + 1 == node.children.len(), id);
}
}
}
}
@ -507,58 +571,43 @@ struct State {
/// A heading that can later be linked in the outline panel.
#[derive(Debug, Clone)]
struct Heading {
content: String,
content: EcoString,
level: usize,
position: Point,
}
#[derive(Debug, Clone)]
enum HeadingNode {
Leaf(Heading, Ref),
Branch(Heading, Ref, Vec<HeadingNode>),
struct HeadingNode {
heading: Heading,
page: Ref,
children: Vec<HeadingNode>,
}
impl HeadingNode {
fn heading(&self) -> &Heading {
match self {
HeadingNode::Leaf(h, _) => h,
HeadingNode::Branch(h, _, _) => h,
}
}
fn reference(&self) -> Ref {
match self {
HeadingNode::Leaf(_, r) => *r,
HeadingNode::Branch(_, r, _) => *r,
}
fn leaf(heading: Heading, page: Ref) -> Self {
HeadingNode { heading, page, children: Vec::new() }
}
fn len(&self) -> usize {
match self {
HeadingNode::Leaf(_, _) => 1,
HeadingNode::Branch(_, _, children) => {
1 + children.iter().map(|c| c.len()).sum::<usize>()
}
}
1 + self.children.iter().map(|c| c.len()).sum::<usize>()
}
fn insert(self, other: Heading, page: Ref, level: usize) -> Option<Self> {
fn insert(&mut self, other: Heading, page: Ref, level: usize) -> bool {
if level >= other.level {
return None;
return false;
}
let mut node = match self {
HeadingNode::Leaf(h, r) => (h, r, vec![]),
HeadingNode::Branch(h, r, v) if level + 1 == other.level => (h, r, v),
HeadingNode::Branch(h, r, mut v) => {
let new = v.pop().unwrap().insert(other, page, level + 1).unwrap();
v.push(new);
return Some(HeadingNode::Branch(h, r, v));
}
};
if !self.children.is_empty() && level + 1 > other.level {
return self.children.last_mut().unwrap().insert(other, page, level + 1);
}
node.2.push(HeadingNode::Leaf(other, page));
Some(HeadingNode::Branch(node.0, node.1, node.2))
self.children.push(HeadingNode {
heading: other,
page,
children: Vec::new(),
});
true
}
}
@ -603,11 +652,8 @@ impl<'a> PageExporter<'a> {
fn write_frame(&mut self, frame: &Frame) {
if let Some(Role::Heading(level)) = frame.role() {
self.headings.push(Heading {
position: Point::new(
self.state.transform.tx,
self.state.transform.ty + Length::pt(3.0),
),
content: frame.inner_text().to_string(),
position: Point::new(self.state.transform.tx, self.state.transform.ty),
content: frame.inner_text(),
level,
})
}
@ -942,67 +988,6 @@ fn encode_image(img: &RasterImage) -> ImageResult<(Vec<u8>, Filter, bool)> {
})
}
fn write_outline_item(
writer: &mut PdfWriter,
node: &HeadingNode,
current_ref: Ref,
prev_ref: Option<Ref>,
is_last: bool,
parent_ref: Ref,
) -> Ref {
let mut outline = writer.outline_item(current_ref);
let next = Ref::new(current_ref.get() + node.len() as i32);
outline.parent(parent_ref);
if !is_last {
outline.next(next);
}
if let Some(prev_ref) = prev_ref {
outline.prev(prev_ref);
}
if let HeadingNode::Branch(_, _, children) = node {
let current_child = Ref::new(current_ref.get() + 1);
if children.len() > 0 {
outline.first(current_child);
outline.last(Ref::new(next.get() - 1));
}
outline.count(-1 * children.len() as i32);
}
let heading = node.heading();
outline.title(TextStr(&heading.content));
outline.dest_direct().page(node.reference()).xyz(
heading.position.x.to_f32(),
heading.position.y.to_f32(),
None,
);
outline.finish();
if let HeadingNode::Branch(_, _, children) = node {
let mut current_child = Ref::new(current_ref.get() + 1);
let mut prev_ref = None;
for (i, child) in children.iter().enumerate() {
write_outline_item(
writer,
child,
current_child,
prev_ref,
i == children.len() - 1,
current_ref,
);
prev_ref = Some(current_child);
current_child = Ref::new(current_child.get() + 1);
}
}
next
}
/// Encode an image's alpha channel if present.
fn encode_alpha(img: &RasterImage) -> (Vec<u8>, Filter) {
let pixels: Vec<_> = img.buf.pixels().map(|(_, _, Rgba([_, _, _, a]))| a).collect();