From e6341c0fe495b03603a2139b3826ff6f21470f0b Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 14:51:25 +0200 Subject: [PATCH] fix: avoid empty marked-content sequences --- crates/typst-pdf/src/convert.rs | 20 ++--- crates/typst-pdf/src/image.rs | 3 + crates/typst-pdf/src/shape.rs | 5 +- crates/typst-pdf/src/tags.rs | 144 +++++++++++++------------------- crates/typst-pdf/src/text.rs | 5 +- 5 files changed, 77 insertions(+), 100 deletions(-) diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 6f62cff58..372fd2f3a 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -13,7 +13,7 @@ use krilla::{Document, SerializeSettings}; use krilla_svg::render_svg_glyph; use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult}; use typst_library::foundations::{NativeElement, Repr}; -use typst_library::introspection::{self, Location}; +use typst_library::introspection::{Location, Tag}; use typst_library::layout::{ Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform, }; @@ -110,8 +110,6 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul let mut surface = page.surface(); let mut fc = FrameContext::new(typst_page.frame.size()); - tags::restart_open(gc, &mut surface); - handle_frame( &mut fc, &typst_page.frame, @@ -120,8 +118,6 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul gc, )?; - tags::end_open(gc, &mut surface); - surface.finish(); tags::add_annotations(gc, &mut page, fc.link_annotations); @@ -286,12 +282,8 @@ pub(crate) fn handle_frame( handle_image(gc, fc, image, *size, surface, *span)? } FrameItem::Link(link, size) => handle_link(fc, gc, link, *size), - FrameItem::Tag(introspection::Tag::Start(elem)) => { - tags::handle_start(gc, surface, elem) - } - FrameItem::Tag(introspection::Tag::End(loc, _)) => { - tags::handle_end(gc, surface, *loc); - } + FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, elem), + FrameItem::Tag(Tag::End(loc, _)) => tags::handle_end(gc, *loc), } fc.pop(); @@ -306,7 +298,7 @@ pub(crate) fn handle_group( fc: &mut FrameContext, group: &GroupItem, surface: &mut Surface, - context: &mut GlobalContext, + gc: &mut GlobalContext, ) -> SourceResult<()> { fc.push(); fc.state_mut().pre_concat(group.transform); @@ -322,10 +314,12 @@ pub(crate) fn handle_group( .and_then(|p| p.transform(fc.state().transform.to_krilla())); if let Some(clip_path) = &clip_path { + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); surface.push_clip_path(clip_path, &krilla::paint::FillRule::NonZero); } - handle_frame(fc, &group.frame, None, surface, context)?; + handle_frame(fc, &group.frame, None, surface, gc)?; if clip_path.is_some() { surface.pop(); diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 0809ae046..41d0aa3d8 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -14,6 +14,7 @@ use typst_library::visualize::{ use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; +use crate::tags; use crate::util::{SizeExt, TransformExt}; #[typst_macros::time(name = "handle image")] @@ -32,6 +33,8 @@ pub(crate) fn handle_image( gc.image_spans.insert(span); + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); match image.kind() { ImageKind::Raster(raster) => { let (exif_transform, new_size) = exif_transform(raster, size); diff --git a/crates/typst-pdf/src/shape.rs b/crates/typst-pdf/src/shape.rs index 5b9232dbe..3b52939da 100644 --- a/crates/typst-pdf/src/shape.rs +++ b/crates/typst-pdf/src/shape.rs @@ -5,8 +5,8 @@ use typst_library::visualize::{Geometry, Shape}; use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; -use crate::paint; use crate::util::{convert_path, AbsExt, TransformExt}; +use crate::{paint, tags}; #[typst_macros::time(name = "handle shape")] pub(crate) fn handle_shape( @@ -16,6 +16,9 @@ pub(crate) fn handle_shape( gc: &mut GlobalContext, span: Span, ) -> SourceResult<()> { + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); + surface.set_location(span.into_raw().get()); surface.push_transform(&fc.state().transform().to_krilla()); diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 0446eebce..26776d090 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -45,6 +45,16 @@ pub(crate) enum StackEntryKind { TableCell(Packed), } +impl StackEntryKind { + pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { + if let Self::Standard(v) = self { + Some(v) + } else { + None + } + } +} + pub(crate) struct TableCtx { table: Packed, rows: Vec, Tag, Vec)>>>, @@ -144,10 +154,6 @@ impl Tags { .expect("initialized placeholder node") } - pub(crate) fn is_root(&self) -> bool { - self.stack.is_empty() - } - /// Returns the current parent's list of children and the structure type ([Tag]). /// In case of the document root the structure type will be `None`. pub(crate) fn parent(&mut self) -> (Option<&mut StackEntryKind>, &mut Vec) { @@ -196,25 +202,40 @@ impl Tags { } } -/// Marked-content may not cross page boundaries: restart tag that was still open -/// at the end of the last page. -pub(crate) fn restart_open(gc: &mut GlobalContext, surface: &mut Surface) { - // TODO: somehow avoid empty marked-content sequences - if let Some((loc, kind)) = gc.tags.in_artifact { - start_artifact(gc, surface, loc, kind); - } else if let Some(entry) = gc.tags.stack.last_mut() { - let id = surface.start_tagged(ContentTag::Other); - entry.nodes.push(TagNode::Leaf(id)); +/// Automatically calls [`Surface::end_tagged`] when dropped. +pub(crate) struct TagHandle<'a, 'b> { + surface: &'b mut Surface<'a>, +} + +impl Drop for TagHandle<'_, '_> { + fn drop(&mut self) { + self.surface.end_tagged(); } } -/// Marked-content may not cross page boundaries: end any open tag. -pub(crate) fn end_open(gc: &mut GlobalContext, surface: &mut Surface) { - if !gc.tags.stack.is_empty() || gc.tags.in_artifact.is_some() { - surface.end_tagged(); +impl<'a> TagHandle<'a, '_> { + pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { + &mut self.surface } } +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_marked<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, +) -> TagHandle<'a, 'b> { + let content = if let Some((_, kind)) = gc.tags.in_artifact { + let ty = artifact_type(kind); + ContentTag::Artifact(ty) + } else { + ContentTag::Other + }; + let id = surface.start_tagged(content); + gc.tags.push(TagNode::Leaf(id)); + TagHandle { surface } +} + /// Add all annotations that were found in the page frame. pub(crate) fn add_annotations( gc: &mut GlobalContext, @@ -232,11 +253,7 @@ pub(crate) fn add_annotations( } } -pub(crate) fn handle_start( - gc: &mut GlobalContext, - surface: &mut Surface, - elem: &Content, -) { +pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { if gc.tags.in_artifact.is_some() { // Don't nest artifacts return; @@ -245,9 +262,8 @@ pub(crate) fn handle_start( let loc = elem.location().unwrap(); if let Some(artifact) = elem.to_packed::() { - end_open(gc, surface); let kind = artifact.kind(StyleChain::default()); - start_artifact(gc, surface, loc, kind); + start_artifact(gc, loc, kind); return; } @@ -279,79 +295,55 @@ pub(crate) fn handle_start( } else if let Some(image) = elem.to_packed::() { let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); - end_open(gc, surface); - let id = surface.start_tagged(ContentTag::Other); - let mut node = TagNode::Leaf(id); - - if let Some(StackEntryKind::Standard(parent)) = gc.tags.parent().0 { - if parent.kind == TagKind::Figure && parent.alt_text.is_none() { - // HACK: set alt text of outer figure tag, if the contained image - // has alt text specified - parent.alt_text = alt; - } else { - node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); - } + let figure_tag = (gc.tags.parent().0) + .and_then(|parent| parent.as_standard_mut()) + .filter(|tag| tag.kind == TagKind::Figure && tag.alt_text.is_none()); + if let Some(figure_tag) = figure_tag { + // HACK: set alt text of outer figure tag, if the contained image + // has alt text specified + figure_tag.alt_text = alt; + return; } else { - node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); + TagKind::Figure.with_alt_text(alt) } - gc.tags.push(node); - - return; } else if let Some(_) = elem.to_packed::() { TagKind::Caption.into() } else if let Some(link) = elem.to_packed::() { let link_id = gc.tags.next_link_id(); - push_stack(gc, surface, loc, StackEntryKind::Link(link_id, link.clone())); + push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); return; } else if let Some(table) = elem.to_packed::() { let ctx = TableCtx { table: table.clone(), rows: Vec::new() }; - push_stack(gc, surface, loc, StackEntryKind::Table(ctx)); + push_stack(gc, loc, StackEntryKind::Table(ctx)); return; } else if let Some(cell) = elem.to_packed::() { - push_stack(gc, surface, loc, StackEntryKind::TableCell(cell.clone())); + push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); return; } else if let Some(_) = elem.to_packed::() { - end_open(gc, surface); - start_artifact(gc, surface, loc, ArtifactKind::Other); + start_artifact(gc, loc, ArtifactKind::Other); return; } else if let Some(_) = elem.to_packed::() { - end_open(gc, surface); - start_artifact(gc, surface, loc, ArtifactKind::Other); + start_artifact(gc, loc, ArtifactKind::Other); return; } else { return; }; - push_stack(gc, surface, loc, StackEntryKind::Standard(tag)); + push_stack(gc, loc, StackEntryKind::Standard(tag)); } -fn push_stack( - gc: &mut GlobalContext, - surface: &mut Surface, - loc: Location, - kind: StackEntryKind, -) { +fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) { if !gc.tags.context_supports(&kind) { // TODO: error or warning? } - // close previous marked-content and open a nested tag. - end_open(gc, surface); - let id = surface.start_tagged(krilla::tagging::ContentTag::Other); - gc.tags - .stack - .push(StackEntry { loc, kind, nodes: vec![TagNode::Leaf(id)] }); + gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); } -pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Location) { +pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { if let Some((l, _)) = gc.tags.in_artifact { if l == loc { gc.tags.in_artifact = None; - surface.end_tagged(); - if let Some(entry) = gc.tags.stack.last_mut() { - let id = surface.start_tagged(ContentTag::Other); - entry.nodes.push(TagNode::Leaf(id)); - } } return; } @@ -360,8 +352,6 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc return; }; - surface.end_tagged(); - let node = match entry.kind { StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), StackEntryKind::Link(_, link) => { @@ -387,30 +377,14 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc table_ctx.insert(cell, entry.nodes); - // TODO: somehow avoid empty marked-content sequences - let id = surface.start_tagged(ContentTag::Other); - gc.tags.push(TagNode::Leaf(id)); return; } }; gc.tags.push(node); - if !gc.tags.is_root() { - // TODO: somehow avoid empty marked-content sequences - let id = surface.start_tagged(ContentTag::Other); - gc.tags.push(TagNode::Leaf(id)); - } } -fn start_artifact( - gc: &mut GlobalContext, - surface: &mut Surface, - loc: Location, - kind: ArtifactKind, -) { - let ty = artifact_type(kind); - let id = surface.start_tagged(ContentTag::Artifact(ty)); - gc.tags.push(TagNode::Leaf(id)); +fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { gc.tags.in_artifact = Some((loc, kind)); } diff --git a/crates/typst-pdf/src/text.rs b/crates/typst-pdf/src/text.rs index 9876927d0..9a12de969 100644 --- a/crates/typst-pdf/src/text.rs +++ b/crates/typst-pdf/src/text.rs @@ -11,8 +11,8 @@ use typst_library::visualize::FillRule; use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; -use crate::paint; use crate::util::{display_font, AbsExt, TransformExt}; +use crate::{paint, tags}; #[typst_macros::time(name = "handle text")] pub(crate) fn handle_text( @@ -23,6 +23,9 @@ pub(crate) fn handle_text( ) -> SourceResult<()> { *gc.languages.entry(t.lang).or_insert(0) += t.glyphs.len(); + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); + let font = convert_font(gc, t.font.clone())?; let fill = paint::convert_fill( gc,