feat: generate tags for lists and enums

This commit is contained in:
Tobias Schmitz 2025-07-13 17:27:02 +02:00
parent a495724813
commit eb9a3359d5
No known key found for this signature in database
6 changed files with 209 additions and 30 deletions

View File

@ -7,6 +7,7 @@ use typst_library::introspection::Locator;
use typst_library::layout::grid::resolve::{Cell, CellGrid};
use typst_library::layout::{Axes, Fragment, HAlignment, Regions, Sizing, VAlignment};
use typst_library::model::{EnumElem, ListElem, Numbering, ParElem, ParbreakElem};
use typst_library::pdf::PdfMarkerTag;
use typst_library::text::TextElem;
use crate::grid::GridLayouter;
@ -48,12 +49,16 @@ pub fn layout_list(
if !tight {
body += ParbreakElem::shared();
}
let body = body.set(ListElem::depth, Depth(1));
cells.push(Cell::new(Content::empty(), locator.next(&())));
cells.push(Cell::new(marker.clone(), locator.next(&marker.span())));
cells.push(Cell::new(
PdfMarkerTag::ListItemLabel(marker.clone()),
locator.next(&marker.span()),
));
cells.push(Cell::new(Content::empty(), locator.next(&())));
cells.push(Cell::new(
body.set(ListElem::depth, Depth(1)),
PdfMarkerTag::ListItemBody(body),
locator.next(&item.body.span()),
));
}
@ -142,11 +147,13 @@ pub fn layout_enum(
body += ParbreakElem::shared();
}
let body = body.set(EnumElem::parents, smallvec![number]);
cells.push(Cell::new(Content::empty(), locator.next(&())));
cells.push(Cell::new(resolved, locator.next(&())));
cells.push(Cell::new(PdfMarkerTag::ListItemLabel(resolved), locator.next(&())));
cells.push(Cell::new(Content::empty(), locator.next(&())));
cells.push(Cell::new(
body.set(EnumElem::parents, smallvec![number]),
PdfMarkerTag::ListItemBody(body),
locator.next(&item.body.span()),
));
number =

View File

@ -20,11 +20,12 @@ use typst_library::math::EquationElem;
use typst_library::model::{
Attribution, BibliographyElem, CiteElem, CiteGroup, CslSource, Destination, EmphElem,
EnumElem, FigureCaption, FigureElem, FootnoteElem, FootnoteEntry, HeadingElem,
LinkElem, LinkTarget, ListElem, Outlinable, OutlineBody, OutlineElem, OutlineEntry,
ParElem, ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem,
TermsElem, Works,
LinkElem, LinkTarget, ListElem, Outlinable, OutlineElem, OutlineEntry, ParElem,
ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, TermsElem, Works,
};
use typst_library::pdf::{
ArtifactElem, EmbedElem, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem,
};
use typst_library::pdf::{ArtifactElem, EmbedElem, PdfTagElem};
use typst_library::text::{
DecoLine, Decoration, HighlightElem, ItalicToggle, LinebreakElem, LocalName,
OverlineElem, RawElem, RawLine, ScriptKind, ShiftSettings, Smallcaps, SmallcapsElem,
@ -56,7 +57,6 @@ pub fn register(rules: &mut NativeRuleMap) {
rules.register(Paged, FOOTNOTE_RULE);
rules.register(Paged, FOOTNOTE_ENTRY_RULE);
rules.register(Paged, OUTLINE_RULE);
rules.register(Paged, OUTLINE_BODY_RULE);
rules.register(Paged, OUTLINE_ENTRY_RULE);
rules.register(Paged, REF_RULE);
rules.register(Paged, CITE_GROUP_RULE);
@ -108,6 +108,7 @@ pub fn register(rules: &mut NativeRuleMap) {
rules.register(Paged, EMBED_RULE);
rules.register(Paged, PDF_TAG_RULE);
rules.register(Paged, PDF_ARTIFACT_RULE);
rules.register(Paged, PDF_MARKER_TAG_RULE);
}
const STRONG_RULE: ShowFn<StrongElem> = |elem, _, styles| {
@ -466,13 +467,14 @@ const OUTLINE_RULE: ShowFn<OutlineElem> = |elem, engine, styles| {
}
// Wrap the entries into a marker for pdf tagging.
seq.push(OutlineBody::new(Content::sequence(entries)).pack());
seq.push(
PdfMarkerTag::new(PdfMarkerTagKind::OutlineBody, Content::sequence(entries))
.pack(),
);
Ok(Content::sequence(seq))
};
const OUTLINE_BODY_RULE: ShowFn<OutlineBody> = |elem, _, _| Ok(elem.body.clone());
const OUTLINE_ENTRY_RULE: ShowFn<OutlineEntry> = |elem, engine, styles| {
let span = elem.span();
let context = Context::new(None, Some(styles));
@ -931,3 +933,5 @@ const EMBED_RULE: ShowFn<EmbedElem> = |_, _, _| Ok(Content::empty());
const PDF_TAG_RULE: ShowFn<PdfTagElem> = |elem, _, _| Ok(elem.body.clone());
const PDF_ARTIFACT_RULE: ShowFn<ArtifactElem> = |elem, _, _| Ok(elem.body.clone());
const PDF_MARKER_TAG_RULE: ShowFn<PdfMarkerTag> = |elem, _, _| Ok(elem.body.clone());

View File

@ -266,13 +266,6 @@ impl LocalName for Packed<OutlineElem> {
const KEY: &'static str = "outline";
}
/// Only used to delimit the outline in tagged PDF.
#[elem(Locatable)]
pub struct OutlineBody {
#[required]
pub body: Content,
}
/// Defines how an outline is indented.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum OutlineIndent {

View File

@ -262,3 +262,43 @@ impl TableHeaderScope {
}
}
}
// Used to delimit content for tagged PDF.
#[elem(Locatable)]
pub struct PdfMarkerTag {
#[required]
pub kind: PdfMarkerTagKind,
#[required]
pub body: Content,
}
macro_rules! pdf_marker_tag {
($(#[doc = $doc:expr] $variant:ident,)+) => {
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Cast)]
pub enum PdfMarkerTagKind {
$(
#[doc = $doc]
$variant
),+
}
impl PdfMarkerTag {
$(
#[doc = $doc]
#[allow(non_snake_case)]
pub fn $variant(body: Content) -> Content {
Self::new(PdfMarkerTagKind::$variant, body).pack()
}
)+
}
}
}
pdf_marker_tag! {
/// `TOC`
OutlineBody,
/// `Lbl` (marker) of the list item
ListItemLabel,
/// `LBody` of the enum item
ListItemBody,
}

View File

@ -0,0 +1,85 @@
use krilla::tagging::{ListNumbering, TagKind};
use crate::tags::TagNode;
pub(crate) struct ListCtx {
numbering: ListNumbering,
items: Vec<ListItem>,
}
struct ListItem {
label: Vec<TagNode>,
body: Option<Vec<TagNode>>,
sub_list: Option<TagNode>,
}
impl ListCtx {
pub(crate) fn new(numbering: ListNumbering) -> Self {
Self { numbering, items: Vec::new() }
}
pub(crate) fn push_label(&mut self, nodes: Vec<TagNode>) {
self.items.push(ListItem { label: nodes, body: None, sub_list: None });
}
pub(crate) fn push_body(&mut self, mut nodes: Vec<TagNode>) {
let item = self.items.last_mut().expect("ListItemLabel");
// Nested lists are expected to have the following structure:
//
// Typst code
// ```
// - a
// - b
// - c
// - d
// - e
// ```
//
// Structure tree
// ```
// <L>
// <LI>
// <Lbl> `-`
// <LBody> `a`
// <LI>
// <Lbl> `-`
// <LBody> `b`
// <L>
// <LI>
// <Lbl> `-`
// <LBody> `c`
// <LI>
// <Lbl> `-`
// <LBody> `d`
// <LI>
// <Lbl> `-`
// <LBody> `d`
// ```
//
// So move the nested list out of the list item.
if let [_, TagNode::Group(tag, _)] = nodes.as_slice() {
if matches!(tag.kind, TagKind::L(_)) {
item.sub_list = nodes.pop();
}
}
item.body = Some(nodes);
}
pub(crate) fn build_list(self, mut nodes: Vec<TagNode>) -> TagNode {
for item in self.items.into_iter() {
nodes.push(TagNode::Group(
TagKind::LI.into(),
vec![
TagNode::Group(TagKind::Lbl.into(), item.label),
TagNode::Group(TagKind::LBody.into(), item.body.unwrap_or_default()),
],
));
if let Some(sub_list) = item.sub_list {
nodes.push(sub_list);
}
}
TagNode::Group(TagKind::L(self.numbering).into(), nodes)
}
}

View File

@ -6,8 +6,8 @@ use krilla::configure::Validator;
use krilla::page::Page;
use krilla::surface::Surface;
use krilla::tagging::{
ArtifactType, ContentTag, Identifier, Node, SpanTag, TableDataCell, Tag, TagBuilder,
TagGroup, TagKind, TagTree,
ArtifactType, ContentTag, Identifier, ListNumbering, Node, SpanTag, TableDataCell,
Tag, TagBuilder, TagGroup, TagKind, TagTree,
};
use typst_library::diag::SourceResult;
use typst_library::foundations::{
@ -17,17 +17,21 @@ use typst_library::foundations::{
use typst_library::introspection::Location;
use typst_library::layout::RepeatElem;
use typst_library::model::{
Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody,
Destination, EnumElem, FigureCaption, FigureElem, HeadingElem, ListElem, Outlinable,
OutlineEntry, TableCell, TableElem,
};
use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind};
use typst_library::pdf::{
ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, PdfTagKind,
};
use typst_library::visualize::ImageElem;
use crate::convert::GlobalContext;
use crate::link::LinkAnnotation;
use crate::tags::list::ListCtx;
use crate::tags::outline::OutlineCtx;
use crate::tags::table::TableCtx;
mod list;
mod outline;
mod table;
@ -58,16 +62,32 @@ pub(crate) fn handle_start(
PdfTagKind::Part => TagKind::Part.into(),
_ => todo!(),
}
} else if let Some(heading) = elem.to_packed::<HeadingElem>() {
let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX);
let name = heading.body.plain_text().to_string();
TagKind::Hn(level, Some(name)).into()
} else if let Some(_) = elem.to_packed::<OutlineBody>() {
push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?;
return Ok(());
} else if let Some(tag) = elem.to_packed::<PdfMarkerTag>() {
match tag.kind {
PdfMarkerTagKind::OutlineBody => {
push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?;
return Ok(());
}
PdfMarkerTagKind::ListItemLabel => {
push_stack(gc, loc, StackEntryKind::ListItemLabel)?;
return Ok(());
}
PdfMarkerTagKind::ListItemBody => {
push_stack(gc, loc, StackEntryKind::ListItemBody)?;
return Ok(());
}
}
} else if let Some(entry) = elem.to_packed::<OutlineEntry>() {
push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()))?;
return Ok(());
} else if let Some(_list) = elem.to_packed::<ListElem>() {
let numbering = ListNumbering::Circle; // TODO: infer numbering from `list.marker`
push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?;
return Ok(());
} else if let Some(_enumeration) = elem.to_packed::<EnumElem>() {
let numbering = ListNumbering::Decimal; // TODO: infer numbering from `enum.numbering`
push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?;
return Ok(());
} else if let Some(_) = elem.to_packed::<FigureElem>() {
let alt = None; // TODO
TagKind::Figure.with_alt_text(alt)
@ -112,6 +132,10 @@ pub(crate) fn handle_start(
push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()))?;
}
return Ok(());
} else if let Some(heading) = elem.to_packed::<HeadingElem>() {
let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX);
let name = heading.body.plain_text().to_string();
TagKind::Hn(level, Some(name)).into()
} else if let Some(link) = elem.to_packed::<LinkMarker>() {
let link_id = gc.tags.next_link_id();
push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?;
@ -171,6 +195,17 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc
table_ctx.insert(&cell, entry.nodes);
return;
}
StackEntryKind::List(list) => list.build_list(entry.nodes),
StackEntryKind::ListItemLabel => {
let list_ctx = gc.tags.parent_list().expect("parent list");
list_ctx.push_label(entry.nodes);
return;
}
StackEntryKind::ListItemBody => {
let list_ctx = gc.tags.parent_list().expect("parent list");
list_ctx.push_body(entry.nodes);
return;
}
StackEntryKind::Link(_, link) => {
let alt = link.alt.as_ref().map(EcoString::to_string);
let tag = TagKind::Link.with_alt_text(alt);
@ -309,6 +344,10 @@ impl Tags {
self.parent()?.as_table_mut()
}
pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> {
self.parent()?.as_list_mut()
}
pub(crate) fn find_parent_link(&self) -> Option<(LinkId, &Packed<LinkMarker>)> {
self.stack.iter().rev().find_map(|entry| entry.kind.as_link())
}
@ -378,6 +417,9 @@ pub(crate) enum StackEntryKind {
OutlineEntry(Packed<OutlineEntry>),
Table(TableCtx),
TableCell(Packed<TableCell>),
List(ListCtx),
ListItemLabel,
ListItemBody,
Link(LinkId, Packed<LinkMarker>),
}
@ -406,6 +448,14 @@ impl StackEntryKind {
}
}
pub(crate) fn as_list_mut(&mut self) -> Option<&mut ListCtx> {
if let Self::List(v) = self {
Some(v)
} else {
None
}
}
pub(crate) fn as_link(&self) -> Option<(LinkId, &Packed<LinkMarker>)> {
if let Self::Link(id, link) = self {
Some((*id, link))