From eb9a3359d5ef84dc9819e081baf850c77e8eb666 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 17:27:02 +0200 Subject: [PATCH] feat: generate tags for lists and enums --- crates/typst-layout/src/lists.rs | 15 +++- crates/typst-layout/src/rules.rs | 20 +++-- crates/typst-library/src/model/outline.rs | 7 -- crates/typst-library/src/pdf/accessibility.rs | 40 +++++++++ crates/typst-pdf/src/tags/list.rs | 85 +++++++++++++++++++ crates/typst-pdf/src/tags/mod.rs | 72 +++++++++++++--- 6 files changed, 209 insertions(+), 30 deletions(-) create mode 100644 crates/typst-pdf/src/tags/list.rs diff --git a/crates/typst-layout/src/lists.rs b/crates/typst-layout/src/lists.rs index adb793fb9..5fb9337ff 100644 --- a/crates/typst-layout/src/lists.rs +++ b/crates/typst-layout/src/lists.rs @@ -7,6 +7,7 @@ use typst_library::introspection::Locator; use typst_library::layout::grid::resolve::{Cell, CellGrid}; use typst_library::layout::{Axes, Fragment, HAlignment, Regions, Sizing, VAlignment}; use typst_library::model::{EnumElem, ListElem, Numbering, ParElem, ParbreakElem}; +use typst_library::pdf::PdfMarkerTag; use typst_library::text::TextElem; use crate::grid::GridLayouter; @@ -48,12 +49,16 @@ pub fn layout_list( if !tight { body += ParbreakElem::shared(); } + let body = body.set(ListElem::depth, Depth(1)); cells.push(Cell::new(Content::empty(), locator.next(&()))); - cells.push(Cell::new(marker.clone(), locator.next(&marker.span()))); + cells.push(Cell::new( + PdfMarkerTag::ListItemLabel(marker.clone()), + locator.next(&marker.span()), + )); cells.push(Cell::new(Content::empty(), locator.next(&()))); cells.push(Cell::new( - body.set(ListElem::depth, Depth(1)), + PdfMarkerTag::ListItemBody(body), locator.next(&item.body.span()), )); } @@ -142,11 +147,13 @@ pub fn layout_enum( body += ParbreakElem::shared(); } + let body = body.set(EnumElem::parents, smallvec![number]); + cells.push(Cell::new(Content::empty(), locator.next(&()))); - cells.push(Cell::new(resolved, locator.next(&()))); + cells.push(Cell::new(PdfMarkerTag::ListItemLabel(resolved), locator.next(&()))); cells.push(Cell::new(Content::empty(), locator.next(&()))); cells.push(Cell::new( - body.set(EnumElem::parents, smallvec![number]), + PdfMarkerTag::ListItemBody(body), locator.next(&item.body.span()), )); number = diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 97c8c11ea..8d3e6da4f 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -20,11 +20,12 @@ use typst_library::math::EquationElem; use typst_library::model::{ Attribution, BibliographyElem, CiteElem, CiteGroup, CslSource, Destination, EmphElem, EnumElem, FigureCaption, FigureElem, FootnoteElem, FootnoteEntry, HeadingElem, - LinkElem, LinkTarget, ListElem, Outlinable, OutlineBody, OutlineElem, OutlineEntry, - ParElem, ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, - TermsElem, Works, + LinkElem, LinkTarget, ListElem, Outlinable, OutlineElem, OutlineEntry, ParElem, + ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, TermsElem, Works, +}; +use typst_library::pdf::{ + ArtifactElem, EmbedElem, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, }; -use typst_library::pdf::{ArtifactElem, EmbedElem, PdfTagElem}; use typst_library::text::{ DecoLine, Decoration, HighlightElem, ItalicToggle, LinebreakElem, LocalName, OverlineElem, RawElem, RawLine, ScriptKind, ShiftSettings, Smallcaps, SmallcapsElem, @@ -56,7 +57,6 @@ pub fn register(rules: &mut NativeRuleMap) { rules.register(Paged, FOOTNOTE_RULE); rules.register(Paged, FOOTNOTE_ENTRY_RULE); rules.register(Paged, OUTLINE_RULE); - rules.register(Paged, OUTLINE_BODY_RULE); rules.register(Paged, OUTLINE_ENTRY_RULE); rules.register(Paged, REF_RULE); rules.register(Paged, CITE_GROUP_RULE); @@ -108,6 +108,7 @@ pub fn register(rules: &mut NativeRuleMap) { rules.register(Paged, EMBED_RULE); rules.register(Paged, PDF_TAG_RULE); rules.register(Paged, PDF_ARTIFACT_RULE); + rules.register(Paged, PDF_MARKER_TAG_RULE); } const STRONG_RULE: ShowFn = |elem, _, styles| { @@ -466,13 +467,14 @@ const OUTLINE_RULE: ShowFn = |elem, engine, styles| { } // Wrap the entries into a marker for pdf tagging. - seq.push(OutlineBody::new(Content::sequence(entries)).pack()); + seq.push( + PdfMarkerTag::new(PdfMarkerTagKind::OutlineBody, Content::sequence(entries)) + .pack(), + ); Ok(Content::sequence(seq)) }; -const OUTLINE_BODY_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); - const OUTLINE_ENTRY_RULE: ShowFn = |elem, engine, styles| { let span = elem.span(); let context = Context::new(None, Some(styles)); @@ -931,3 +933,5 @@ const EMBED_RULE: ShowFn = |_, _, _| Ok(Content::empty()); const PDF_TAG_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); const PDF_ARTIFACT_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); + +const PDF_MARKER_TAG_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 2dbd33cc2..9421c9a4e 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -266,13 +266,6 @@ impl LocalName for Packed { const KEY: &'static str = "outline"; } -/// Only used to delimit the outline in tagged PDF. -#[elem(Locatable)] -pub struct OutlineBody { - #[required] - pub body: Content, -} - /// Defines how an outline is indented. #[derive(Debug, Clone, PartialEq, Hash)] pub enum OutlineIndent { diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 16a57e537..e4213542b 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -262,3 +262,43 @@ impl TableHeaderScope { } } } + +// Used to delimit content for tagged PDF. +#[elem(Locatable)] +pub struct PdfMarkerTag { + #[required] + pub kind: PdfMarkerTagKind, + #[required] + pub body: Content, +} + +macro_rules! pdf_marker_tag { + ($(#[doc = $doc:expr] $variant:ident,)+) => { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Cast)] + pub enum PdfMarkerTagKind { + $( + #[doc = $doc] + $variant + ),+ + } + + impl PdfMarkerTag { + $( + #[doc = $doc] + #[allow(non_snake_case)] + pub fn $variant(body: Content) -> Content { + Self::new(PdfMarkerTagKind::$variant, body).pack() + } + )+ + } + } +} + +pdf_marker_tag! { + /// `TOC` + OutlineBody, + /// `Lbl` (marker) of the list item + ListItemLabel, + /// `LBody` of the enum item + ListItemBody, +} diff --git a/crates/typst-pdf/src/tags/list.rs b/crates/typst-pdf/src/tags/list.rs new file mode 100644 index 000000000..99f1254cc --- /dev/null +++ b/crates/typst-pdf/src/tags/list.rs @@ -0,0 +1,85 @@ +use krilla::tagging::{ListNumbering, TagKind}; + +use crate::tags::TagNode; + +pub(crate) struct ListCtx { + numbering: ListNumbering, + items: Vec, +} + +struct ListItem { + label: Vec, + body: Option>, + sub_list: Option, +} + +impl ListCtx { + pub(crate) fn new(numbering: ListNumbering) -> Self { + Self { numbering, items: Vec::new() } + } + + pub(crate) fn push_label(&mut self, nodes: Vec) { + self.items.push(ListItem { label: nodes, body: None, sub_list: None }); + } + + pub(crate) fn push_body(&mut self, mut nodes: Vec) { + let item = self.items.last_mut().expect("ListItemLabel"); + + // Nested lists are expected to have the following structure: + // + // Typst code + // ``` + // - a + // - b + // - c + // - d + // - e + // ``` + // + // Structure tree + // ``` + // + //
  • + // `-` + // `a` + //
  • + // `-` + // `b` + // + //
  • + // `-` + // `c` + //
  • + // `-` + // `d` + //
  • + // `-` + // `d` + // ``` + // + // So move the nested list out of the list item. + if let [_, TagNode::Group(tag, _)] = nodes.as_slice() { + if matches!(tag.kind, TagKind::L(_)) { + item.sub_list = nodes.pop(); + } + } + + item.body = Some(nodes); + } + + pub(crate) fn build_list(self, mut nodes: Vec) -> TagNode { + for item in self.items.into_iter() { + nodes.push(TagNode::Group( + TagKind::LI.into(), + vec![ + TagNode::Group(TagKind::Lbl.into(), item.label), + TagNode::Group(TagKind::LBody.into(), item.body.unwrap_or_default()), + ], + )); + if let Some(sub_list) = item.sub_list { + nodes.push(sub_list); + } + } + TagNode::Group(TagKind::L(self.numbering).into(), nodes) + } +} diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index b12d9b601..d7fe24f78 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -6,8 +6,8 @@ use krilla::configure::Validator; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, TableDataCell, Tag, TagBuilder, - TagGroup, TagKind, TagTree, + ArtifactType, ContentTag, Identifier, ListNumbering, Node, SpanTag, TableDataCell, + Tag, TagBuilder, TagGroup, TagKind, TagTree, }; use typst_library::diag::SourceResult; use typst_library::foundations::{ @@ -17,17 +17,21 @@ use typst_library::foundations::{ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ - Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, + Destination, EnumElem, FigureCaption, FigureElem, HeadingElem, ListElem, Outlinable, OutlineEntry, TableCell, TableElem, }; -use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; +use typst_library::pdf::{ + ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, PdfTagKind, +}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; use crate::link::LinkAnnotation; +use crate::tags::list::ListCtx; use crate::tags::outline::OutlineCtx; use crate::tags::table::TableCtx; +mod list; mod outline; mod table; @@ -58,16 +62,32 @@ pub(crate) fn handle_start( PdfTagKind::Part => TagKind::Part.into(), _ => todo!(), } - } else if let Some(heading) = elem.to_packed::() { - let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); - let name = heading.body.plain_text().to_string(); - TagKind::Hn(level, Some(name)).into() - } else if let Some(_) = elem.to_packed::() { - push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; - return Ok(()); + } else if let Some(tag) = elem.to_packed::() { + match tag.kind { + PdfMarkerTagKind::OutlineBody => { + push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; + return Ok(()); + } + PdfMarkerTagKind::ListItemLabel => { + push_stack(gc, loc, StackEntryKind::ListItemLabel)?; + return Ok(()); + } + PdfMarkerTagKind::ListItemBody => { + push_stack(gc, loc, StackEntryKind::ListItemBody)?; + return Ok(()); + } + } } else if let Some(entry) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()))?; return Ok(()); + } else if let Some(_list) = elem.to_packed::() { + let numbering = ListNumbering::Circle; // TODO: infer numbering from `list.marker` + push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; + return Ok(()); + } else if let Some(_enumeration) = elem.to_packed::() { + let numbering = ListNumbering::Decimal; // TODO: infer numbering from `enum.numbering` + push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; + return Ok(()); } else if let Some(_) = elem.to_packed::() { let alt = None; // TODO TagKind::Figure.with_alt_text(alt) @@ -112,6 +132,10 @@ pub(crate) fn handle_start( push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()))?; } return Ok(()); + } else if let Some(heading) = elem.to_packed::() { + let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); + let name = heading.body.plain_text().to_string(); + TagKind::Hn(level, Some(name)).into() } else if let Some(link) = elem.to_packed::() { let link_id = gc.tags.next_link_id(); push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; @@ -171,6 +195,17 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc table_ctx.insert(&cell, entry.nodes); return; } + StackEntryKind::List(list) => list.build_list(entry.nodes), + StackEntryKind::ListItemLabel => { + let list_ctx = gc.tags.parent_list().expect("parent list"); + list_ctx.push_label(entry.nodes); + return; + } + StackEntryKind::ListItemBody => { + let list_ctx = gc.tags.parent_list().expect("parent list"); + list_ctx.push_body(entry.nodes); + return; + } StackEntryKind::Link(_, link) => { let alt = link.alt.as_ref().map(EcoString::to_string); let tag = TagKind::Link.with_alt_text(alt); @@ -309,6 +344,10 @@ impl Tags { self.parent()?.as_table_mut() } + pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> { + self.parent()?.as_list_mut() + } + pub(crate) fn find_parent_link(&self) -> Option<(LinkId, &Packed)> { self.stack.iter().rev().find_map(|entry| entry.kind.as_link()) } @@ -378,6 +417,9 @@ pub(crate) enum StackEntryKind { OutlineEntry(Packed), Table(TableCtx), TableCell(Packed), + List(ListCtx), + ListItemLabel, + ListItemBody, Link(LinkId, Packed), } @@ -406,6 +448,14 @@ impl StackEntryKind { } } + pub(crate) fn as_list_mut(&mut self) -> Option<&mut ListCtx> { + if let Self::List(v) = self { + Some(v) + } else { + None + } + } + pub(crate) fn as_link(&self) -> Option<(LinkId, &Packed)> { if let Self::Link(id, link) = self { Some((*id, link))