fix: ensure link annotation object references are direct children of link tags

This commit is contained in:
Tobias Schmitz 2025-07-15 17:23:11 +02:00
parent 728d37efa0
commit cd5d91a82d
No known key found for this signature in database
2 changed files with 87 additions and 55 deletions

View File

@ -49,7 +49,7 @@ pub(crate) fn handle_link(
} }
}; };
let Some((link_id, link)) = gc.tags.find_parent_link() else { let Some((link_id, link, link_nodes)) = gc.tags.stack.find_parent_link() else {
unreachable!("expected a link parent") unreachable!("expected a link parent")
}; };
let alt = link.alt.as_ref().map(EcoString::to_string); let alt = link.alt.as_ref().map(EcoString::to_string);
@ -68,8 +68,8 @@ pub(crate) fn handle_link(
annotation.quad_points.extend_from_slice(&quadpoints); annotation.quad_points.extend_from_slice(&quadpoints);
} }
_ => { _ => {
let placeholder = gc.tags.reserve_placeholder(); let placeholder = gc.tags.placeholders.reserve();
gc.tags.push(TagNode::Placeholder(placeholder)); link_nodes.push(TagNode::Placeholder(placeholder));
fc.push_link_annotation(LinkAnnotation { fc.push_link_annotation(LinkAnnotation {
id: link_id, id: link_id,
placeholder, placeholder,

View File

@ -1,5 +1,6 @@
use std::cell::OnceCell; use std::cell::OnceCell;
use std::num::NonZeroU32; use std::num::NonZeroU32;
use std::ops::{Deref, DerefMut};
use ecow::EcoString; use ecow::EcoString;
use krilla::configure::Validator; use krilla::configure::Validator;
@ -97,7 +98,7 @@ pub(crate) fn handle_start(
} else if let Some(image) = elem.to_packed::<ImageElem>() { } else if let Some(image) = elem.to_packed::<ImageElem>() {
let alt = image.alt.get_as_ref().map(|s| s.to_string()); let alt = image.alt.get_as_ref().map(|s| s.to_string());
let figure_tag = (gc.tags.parent()) let figure_tag = (gc.tags.stack.parent())
.and_then(StackEntryKind::as_standard_mut) .and_then(StackEntryKind::as_standard_mut)
.filter(|tag| tag.kind == TagKind::Figure); .filter(|tag| tag.kind == TagKind::Figure);
if let Some(figure_tag) = figure_tag { if let Some(figure_tag) = figure_tag {
@ -116,7 +117,7 @@ pub(crate) fn handle_start(
push_stack(gc, loc, StackEntryKind::Table(ctx))?; push_stack(gc, loc, StackEntryKind::Table(ctx))?;
return Ok(()); return Ok(());
} else if let Some(cell) = elem.to_packed::<TableCell>() { } else if let Some(cell) = elem.to_packed::<TableCell>() {
let table_ctx = gc.tags.parent_table(); let table_ctx = gc.tags.stack.parent_table();
// Only repeated table headers and footer cells are layed out multiple // Only repeated table headers and footer cells are layed out multiple
// times. Mark duplicate headers as artifacts, since they have no // times. Mark duplicate headers as artifacts, since they have no
@ -168,11 +169,8 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc
StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes),
StackEntryKind::Outline(ctx) => ctx.build_outline(entry.nodes), StackEntryKind::Outline(ctx) => ctx.build_outline(entry.nodes),
StackEntryKind::OutlineEntry(outline_entry) => { StackEntryKind::OutlineEntry(outline_entry) => {
let parent = gc.tags.stack.last_mut().and_then(|parent| { let Some((outline_ctx, outline_nodes)) = gc.tags.stack.parent_outline()
let ctx = parent.kind.as_outline_mut()?; else {
Some((&mut parent.nodes, ctx))
});
let Some((parent_nodes, outline_ctx)) = parent else {
// PDF/UA compliance of the structure hierarchy is checked // PDF/UA compliance of the structure hierarchy is checked
// elsewhere. While this doesn't make a lot of sense, just // elsewhere. While this doesn't make a lot of sense, just
// avoid crashing here. // avoid crashing here.
@ -181,12 +179,12 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc
return; return;
}; };
outline_ctx.insert(parent_nodes, outline_entry, entry.nodes); outline_ctx.insert(outline_nodes, outline_entry, entry.nodes);
return; return;
} }
StackEntryKind::Table(ctx) => ctx.build_table(entry.nodes), StackEntryKind::Table(ctx) => ctx.build_table(entry.nodes),
StackEntryKind::TableCell(cell) => { StackEntryKind::TableCell(cell) => {
let Some(table_ctx) = gc.tags.parent_table() else { let Some(table_ctx) = gc.tags.stack.parent_table() else {
// PDF/UA compliance of the structure hierarchy is checked // PDF/UA compliance of the structure hierarchy is checked
// elsewhere. While this doesn't make a lot of sense, just // elsewhere. While this doesn't make a lot of sense, just
// avoid crashing here. // avoid crashing here.
@ -200,12 +198,12 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc
} }
StackEntryKind::List(list) => list.build_list(entry.nodes), StackEntryKind::List(list) => list.build_list(entry.nodes),
StackEntryKind::ListItemLabel => { StackEntryKind::ListItemLabel => {
let list_ctx = gc.tags.parent_list().expect("parent list"); let list_ctx = gc.tags.stack.parent_list().expect("parent list");
list_ctx.push_label(entry.nodes); list_ctx.push_label(entry.nodes);
return; return;
} }
StackEntryKind::ListItemBody => { StackEntryKind::ListItemBody => {
let list_ctx = gc.tags.parent_list().expect("parent list"); let list_ctx = gc.tags.stack.parent_list().expect("parent list");
list_ctx.push_body(entry.nodes); list_ctx.push_body(entry.nodes);
return; return;
} }
@ -287,15 +285,15 @@ pub(crate) fn add_annotations(
alt, alt,
); );
let annot_id = page.add_tagged_annotation(annot); let annot_id = page.add_tagged_annotation(annot);
gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); gc.tags.placeholders.init(placeholder, Node::Leaf(annot_id));
} }
} }
pub(crate) struct Tags { pub(crate) struct Tags {
/// The intermediary stack of nested tag groups. /// The intermediary stack of nested tag groups.
pub(crate) stack: Vec<StackEntry>, pub(crate) stack: TagStack,
/// A list of placeholders corresponding to a [`TagNode::Placeholder`]. /// A list of placeholders corresponding to a [`TagNode::Placeholder`].
pub(crate) placeholders: Vec<OnceCell<Node>>, pub(crate) placeholders: Placeholders,
pub(crate) in_artifact: Option<(Location, ArtifactKind)>, pub(crate) in_artifact: Option<(Location, ArtifactKind)>,
/// Used to group multiple link annotations using quad points. /// Used to group multiple link annotations using quad points.
pub(crate) link_id: LinkId, pub(crate) link_id: LinkId,
@ -310,8 +308,8 @@ pub(crate) struct Tags {
impl Tags { impl Tags {
pub(crate) fn new() -> Self { pub(crate) fn new() -> Self {
Self { Self {
stack: Vec::new(), stack: TagStack(Vec::new()),
placeholders: Vec::new(), placeholders: Placeholders(Vec::new()),
in_artifact: None, in_artifact: None,
tree: Vec::new(), tree: Vec::new(),
@ -320,41 +318,6 @@ impl Tags {
} }
} }
pub(crate) fn reserve_placeholder(&mut self) -> Placeholder {
let idx = self.placeholders.len();
self.placeholders.push(OnceCell::new());
Placeholder(idx)
}
pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) {
self.placeholders[placeholder.0]
.set(node)
.map_err(|_| ())
.expect("placeholder to be uninitialized");
}
pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node {
self.placeholders[placeholder.0]
.take()
.expect("initialized placeholder node")
}
pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> {
self.stack.last_mut().map(|e| &mut e.kind)
}
pub(crate) fn parent_table(&mut self) -> Option<&mut TableCtx> {
self.parent()?.as_table_mut()
}
pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> {
self.parent()?.as_list_mut()
}
pub(crate) fn find_parent_link(&self) -> Option<(LinkId, &Packed<LinkMarker>)> {
self.stack.iter().rev().find_map(|entry| entry.kind.as_link())
}
pub(crate) fn push(&mut self, node: TagNode) { pub(crate) fn push(&mut self, node: TagNode) {
if let Some(entry) = self.stack.last_mut() { if let Some(entry) = self.stack.last_mut() {
entry.nodes.push(node); entry.nodes.push(node);
@ -382,7 +345,7 @@ impl Tags {
Node::Group(TagGroup::with_children(tag, children)) Node::Group(TagGroup::with_children(tag, children))
} }
TagNode::Leaf(identifier) => Node::Leaf(identifier), TagNode::Leaf(identifier) => Node::Leaf(identifier),
TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), TagNode::Placeholder(placeholder) => self.placeholders.take(placeholder),
} }
} }
@ -402,6 +365,75 @@ impl Tags {
} }
} }
pub(crate) struct TagStack(Vec<StackEntry>);
impl Deref for TagStack {
type Target = Vec<StackEntry>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for TagStack {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl TagStack {
pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> {
self.0.last_mut().map(|e| &mut e.kind)
}
pub(crate) fn parent_table(&mut self) -> Option<&mut TableCtx> {
self.parent()?.as_table_mut()
}
pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> {
self.parent()?.as_list_mut()
}
pub(crate) fn parent_outline(
&mut self,
) -> Option<(&mut OutlineCtx, &mut Vec<TagNode>)> {
self.0.last_mut().and_then(|e| {
let ctx = e.kind.as_outline_mut()?;
Some((ctx, &mut e.nodes))
})
}
pub(crate) fn find_parent_link(
&mut self,
) -> Option<(LinkId, &LinkMarker, &mut Vec<TagNode>)> {
self.0.iter_mut().rev().find_map(|e| {
let (link_id, link) = e.kind.as_link()?;
Some((link_id, link.as_ref(), &mut e.nodes))
})
}
}
pub(crate) struct Placeholders(Vec<OnceCell<Node>>);
impl Placeholders {
pub(crate) fn reserve(&mut self) -> Placeholder {
let idx = self.0.len();
self.0.push(OnceCell::new());
Placeholder(idx)
}
pub(crate) fn init(&mut self, placeholder: Placeholder, node: Node) {
self.0[placeholder.0]
.set(node)
.map_err(|_| ())
.expect("placeholder to be uninitialized");
}
pub(crate) fn take(&mut self, placeholder: Placeholder) -> Node {
self.0[placeholder.0].take().expect("initialized placeholder node")
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub(crate) struct TableId(u32); pub(crate) struct TableId(u32);