From 5912b1e6f12496b264359beb19d8da2952709f36 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 28 May 2025 15:08:47 +0200 Subject: [PATCH] feat: [WIP] include links in tag tree skip-checks:true --- .../src/introspection/introspector.rs | 2 - crates/typst-pdf/src/convert.rs | 22 +++-- crates/typst-pdf/src/link.rs | 25 ++++-- crates/typst-pdf/src/tags.rs | 90 +++++++++++++++---- 4 files changed, 107 insertions(+), 32 deletions(-) diff --git a/crates/typst-library/src/introspection/introspector.rs b/crates/typst-library/src/introspection/introspector.rs index a8e87124a..9751dfcb8 100644 --- a/crates/typst-library/src/introspection/introspector.rs +++ b/crates/typst-library/src/introspection/introspector.rs @@ -388,8 +388,6 @@ impl IntrospectorBuilder { ); } - dbg!(elems.len()); - self.finalize(elems) } diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 952767c6e..00d3aad83 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -31,7 +31,7 @@ use crate::metadata::build_metadata; use crate::outline::build_outline; use crate::page::PageLabelExt; use crate::shape::handle_shape; -use crate::tags::{handle_close_tag, handle_open_tag, Tags}; +use crate::tags::{handle_close_tag, handle_open_tag, Placeholder, TagNode, Tags}; use crate::text::handle_text; use crate::util::{convert_path, display_font, AbsExt, TransformExt}; use crate::PdfOptions; @@ -42,6 +42,7 @@ pub fn convert( options: &PdfOptions, ) -> SourceResult> { // HACK + // let config = Configuration::new(); let config = Configuration::new_with_validator(Validator::UA1); let settings = SerializeSettings { compress_content_streams: true, @@ -73,7 +74,7 @@ pub fn convert( document.set_outline(build_outline(&gc)); document.set_metadata(build_metadata(&gc)); - document.set_tag_tree(gc.tags.take_tree()); + document.set_tag_tree(gc.tags.build_tree()); finish(document, gc, options.standards.config) } @@ -123,7 +124,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul }; // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(tag); - nodes.push(Node::Leaf(id)); + nodes.push(TagNode::Leaf(id)); } handle_frame( @@ -141,8 +142,9 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul surface.finish(); - for annotation in fc.annotations { - page.add_annotation(annotation); + for (placeholder, annotation) in fc.annotations { + let annotation_id = page.add_tagged_annotation(annotation); + gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id)); } } } @@ -197,7 +199,7 @@ impl State { /// Context needed for converting a single frame. pub(crate) struct FrameContext { states: Vec, - annotations: Vec, + annotations: Vec<(Placeholder, Annotation)>, } impl FrameContext { @@ -224,8 +226,12 @@ impl FrameContext { self.states.last_mut().unwrap() } - pub(crate) fn push_annotation(&mut self, annotation: Annotation) { - self.annotations.push(annotation); + pub(crate) fn push_annotation( + &mut self, + placeholder: Placeholder, + annotation: Annotation, + ) { + self.annotations.push((placeholder, annotation)); } } diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index 64cb8f0a2..a792778dd 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -1,11 +1,12 @@ use krilla::action::{Action, LinkAction}; -use krilla::annotation::{LinkAnnotation, Target}; +use krilla::annotation::{Annotation, LinkAnnotation, Target}; use krilla::destination::XyzDestination; use krilla::geom::Rect; use typst_library::layout::{Abs, Point, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; +use crate::tags::TagNode; use crate::util::{AbsExt, PointExt}; pub(crate) fn handle_link( @@ -44,15 +45,23 @@ pub(crate) fn handle_link( // TODO: Support quad points. + let placeholder = gc.tags.reserve_placeholder(); + gc.tags.push(TagNode::Placeholder(placeholder)); + + // TODO: add some way to add alt text to annotations. + // probably through [typst_layout::modifiers::FrameModifiers] let pos = match dest { Destination::Url(u) => { fc.push_annotation( - LinkAnnotation::new( - rect, - None, - Target::Action(Action::Link(LinkAction::new(u.to_string()))), - ) - .into(), + placeholder, + Annotation::new_link( + LinkAnnotation::new( + rect, + None, + Target::Action(Action::Link(LinkAction::new(u.to_string()))), + ), + Some(u.to_string()), + ), ); return; } @@ -62,6 +71,7 @@ pub(crate) fn handle_link( // If a named destination has been registered, it's already guaranteed to // not point to an excluded page. fc.push_annotation( + placeholder, LinkAnnotation::new( rect, None, @@ -81,6 +91,7 @@ pub(crate) fn handle_link( let page_index = pos.page.get() - 1; if let Some(index) = gc.page_index_converter.pdf_page_index(page_index) { fc.push_annotation( + placeholder, LinkAnnotation::new( rect, None, diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 70792dfe8..2c43c8495 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,5 +1,7 @@ +use std::cell::OnceCell; + use krilla::surface::Surface; -use krilla::tagging::{ContentTag, Node, Tag, TagGroup, TagTree}; +use krilla::tagging::{ContentTag, Identifier, Node, Tag, TagGroup, TagTree}; use typst_library::foundations::{Content, StyleChain}; use typst_library::introspection::Location; use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry}; @@ -8,24 +10,87 @@ use crate::convert::GlobalContext; pub(crate) struct Tags { /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec<(Location, Tag, Vec)>, + pub(crate) stack: Vec<(Location, Tag, Vec)>, + pub(crate) placeholders: Vec>, pub(crate) in_artifact: bool, /// The output. - pub(crate) tree: TagTree, + pub(crate) tree: Vec, } +pub(crate) enum TagNode { + Group(Tag, Vec), + Leaf(Identifier), + /// Allows inserting a placeholder into the tag tree. + /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. + Placeholder(Placeholder), +} + +#[derive(Clone, Copy)] +pub(crate) struct Placeholder(usize); + impl Tags { pub(crate) fn new() -> Self { Self { stack: Vec::new(), + placeholders: Vec::new(), in_artifact: false, - tree: TagTree::new(), + + tree: Vec::new(), } } - pub(crate) fn take_tree(&mut self) -> TagTree { - std::mem::take(&mut self.tree) + pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { + let idx = self.placeholders.len(); + self.placeholders.push(OnceCell::new()); + Placeholder(idx) + } + + pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { + self.placeholders[placeholder.0] + .set(node) + .map_err(|_| ()) + .expect("placeholder to be uninitialized"); + } + + pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { + self.placeholders[placeholder.0] + .take() + .expect("initialized placeholder node") + } + + pub(crate) fn push(&mut self, node: TagNode) { + if let Some((_, _, nodes)) = self.stack.last_mut() { + nodes.push(node); + } else { + self.tree.push(node); + } + } + + pub(crate) fn build_tree(&mut self) -> TagTree { + let mut tree = TagTree::new(); + let nodes = std::mem::take(&mut self.tree); + // PERF: collect into vec and construct TagTree directly from tag nodes. + for node in nodes.into_iter().map(|node| self.resolve_node(node)) { + tree.push(node); + } + tree + } + + /// Resolves [`Placeholder`] nodes. + fn resolve_node(&mut self, node: TagNode) -> Node { + match node { + TagNode::Group(tag, nodes) => { + let mut group = TagGroup::new(tag); + // PERF: collect into vec and construct TagTree directly from tag nodes. + for node in nodes.into_iter().map(|node| self.resolve_node(node)) { + group.push(node); + } + Node::Group(group) + } + TagNode::Leaf(identifier) => Node::Leaf(identifier), + TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), + } } pub(crate) fn context_supports(&self, tag: &Tag) -> bool { @@ -118,7 +183,7 @@ pub(crate) fn handle_open_tag( } let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other); - gc.tags.stack.push((loc, tag, vec![Node::Leaf(content_id)])); + gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(content_id)])); } pub(crate) fn handle_close_tag( @@ -129,21 +194,16 @@ pub(crate) fn handle_close_tag( let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else { return; }; - // TODO: contstruct group directly from nodes - let mut tag_group = TagGroup::new(tag); - for node in nodes { - tag_group.push(node); - } surface.end_tagged(); if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() { - parent_nodes.push(Node::Group(tag_group)); + parent_nodes.push(TagNode::Group(tag, nodes)); // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(ContentTag::Other); - parent_nodes.push(Node::Leaf(id)); + parent_nodes.push(TagNode::Leaf(id)); } else { - gc.tags.tree.push(Node::Group(tag_group)); + gc.tags.tree.push(TagNode::Group(tag, nodes)); } }