refactor: split up pdf tagging code into multiple modules

This commit is contained in:
Tobias Schmitz 2025-07-03 11:22:22 +02:00
parent 50cd81ee1f
commit 377dc87325
No known key found for this signature in database
3 changed files with 630 additions and 607 deletions

View File

@ -1,621 +1,30 @@
use std::cell::OnceCell;
use std::num::{NonZeroU32, NonZeroUsize};
use std::num::NonZeroU32;
use ecow::EcoString;
use krilla::page::Page;
use krilla::surface::Surface;
use krilla::tagging::{
ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellHeaders, TableCellSpan,
TableDataCell, TableHeaderCell, Tag, TagBuilder, TagGroup, TagId, TagKind, TagTree,
ArtifactType, ContentTag, Identifier, Node, SpanTag, Tag, TagBuilder, TagGroup,
TagKind, TagTree,
};
use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain};
use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain};
use typst_library::introspection::Location;
use typst_library::layout::RepeatElem;
use typst_library::model::{
Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody,
OutlineEntry, TableCell, TableCellKind, TableElem, TableHeaderScope,
OutlineEntry, TableCell, TableElem,
};
use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind};
use typst_library::visualize::ImageElem;
use crate::convert::GlobalContext;
use crate::link::LinkAnnotation;
use crate::tags::outline::OutlineCtx;
use crate::tags::table::TableCtx;
pub(crate) struct Tags {
/// The intermediary stack of nested tag groups.
pub(crate) stack: Vec<StackEntry>,
/// A list of placeholders corresponding to a [`TagNode::Placeholder`].
pub(crate) placeholders: Vec<OnceCell<Node>>,
pub(crate) in_artifact: Option<(Location, ArtifactKind)>,
/// Used to group multiple link annotations using quad points.
pub(crate) link_id: LinkId,
/// Used to generate IDs referenced in table `Headers` attributes.
/// The IDs must be document wide unique.
pub(crate) table_id: TableId,
/// The output.
pub(crate) tree: Vec<TagNode>,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) struct TableId(u32);
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) struct LinkId(u32);
pub(crate) struct StackEntry {
pub(crate) loc: Location,
pub(crate) kind: StackEntryKind,
pub(crate) nodes: Vec<TagNode>,
}
pub(crate) enum StackEntryKind {
Standard(Tag),
Outline(OutlineCtx),
OutlineEntry(Packed<OutlineEntry>),
Table(TableCtx),
TableCell(Packed<TableCell>),
Link(LinkId, Packed<LinkMarker>),
}
impl StackEntryKind {
pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> {
if let Self::Standard(v) = self {
Some(v)
} else {
None
}
}
}
pub(crate) struct OutlineCtx {
stack: Vec<OutlineSection>,
}
pub(crate) struct OutlineSection {
entries: Vec<TagNode>,
}
impl OutlineSection {
const fn new() -> Self {
OutlineSection { entries: Vec::new() }
}
fn push(&mut self, entry: TagNode) {
self.entries.push(entry);
}
fn into_tag(self) -> TagNode {
TagNode::Group(TagKind::TOC.into(), self.entries)
}
}
impl OutlineCtx {
fn new() -> Self {
Self { stack: Vec::new() }
}
fn insert(
&mut self,
outline_nodes: &mut Vec<TagNode>,
entry: Packed<OutlineEntry>,
nodes: Vec<TagNode>,
) {
let expected_len = entry.level.get() - 1;
if self.stack.len() < expected_len {
self.stack.resize_with(expected_len, || OutlineSection::new());
} else {
while self.stack.len() > expected_len {
self.finish_section(outline_nodes);
}
}
let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes);
self.push(outline_nodes, section_entry);
}
fn finish_section(&mut self, outline_nodes: &mut Vec<TagNode>) {
let sub_section = self.stack.pop().unwrap().into_tag();
self.push(outline_nodes, sub_section);
}
fn push(&mut self, outline_nodes: &mut Vec<TagNode>, entry: TagNode) {
match self.stack.last_mut() {
Some(section) => section.push(entry),
None => outline_nodes.push(entry),
}
}
fn build_outline(mut self, mut outline_nodes: Vec<TagNode>) -> Vec<TagNode> {
while self.stack.len() > 0 {
self.finish_section(&mut outline_nodes);
}
outline_nodes
}
}
pub(crate) struct TableCtx {
id: TableId,
table: Packed<TableElem>,
rows: Vec<Vec<GridCell>>,
}
#[derive(Clone, Default)]
enum GridCell {
Cell(TableCtxCell),
Spanned(usize, usize),
#[default]
Missing,
}
impl GridCell {
fn as_cell(&self) -> Option<&TableCtxCell> {
if let Self::Cell(v) = self {
Some(v)
} else {
None
}
}
fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> {
if let Self::Cell(v) = self {
Some(v)
} else {
None
}
}
fn into_cell(self) -> Option<TableCtxCell> {
if let Self::Cell(v) = self {
Some(v)
} else {
None
}
}
}
#[derive(Clone)]
struct TableCtxCell {
x: u32,
y: u32,
rowspan: NonZeroUsize,
colspan: NonZeroUsize,
kind: Smart<TableCellKind>,
headers: TableCellHeaders,
nodes: Vec<TagNode>,
}
impl TableCtxCell {
fn unwrap_kind(&self) -> TableCellKind {
self.kind.unwrap_or_else(|| unreachable!())
}
}
impl TableCtx {
fn new(id: TableId, table: Packed<TableElem>) -> Self {
Self { id, table: table.clone(), rows: Vec::new() }
}
fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> {
let cell = self.rows.get(y)?.get(x)?;
self.resolve_cell(cell)
}
fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> {
let cell = self.rows.get_mut(y)?.get_mut(x)?;
match cell {
GridCell::Cell(cell) => {
// HACK: Workaround for the second mutable borrow when resolving
// the spanned cell.
Some(unsafe { std::mem::transmute(cell) })
}
&mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(),
GridCell::Missing => None,
}
}
fn contains(&self, cell: &Packed<TableCell>) -> bool {
let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
self.get(x, y).is_some()
}
fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> {
match cell {
GridCell::Cell(cell) => Some(cell),
&GridCell::Spanned(x, y) => self.rows[y][x].as_cell(),
GridCell::Missing => None,
}
}
fn insert(&mut self, cell: Packed<TableCell>, nodes: Vec<TagNode>) {
let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
let rowspan = cell.rowspan(StyleChain::default());
let colspan = cell.colspan(StyleChain::default());
let kind = cell.kind(StyleChain::default());
// Extend the table grid to fit this cell.
let required_height = y + rowspan.get();
let required_width = x + colspan.get();
if self.rows.len() < required_height {
self.rows
.resize(required_height, vec![GridCell::Missing; required_width]);
}
let row = &mut self.rows[y];
if row.len() < required_width {
row.resize_with(required_width, || GridCell::Missing);
}
// Store references to the cell for all spanned cells.
for i in y..y + rowspan.get() {
for j in x..x + colspan.get() {
self.rows[i][j] = GridCell::Spanned(x, y);
}
}
self.rows[y][x] = GridCell::Cell(TableCtxCell {
x: x as u32,
y: y as u32,
rowspan,
colspan,
kind,
headers: TableCellHeaders::NONE,
nodes,
});
}
fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> {
// Table layouting ensures that there are no overlapping cells, and that
// any gaps left by the user are filled with empty cells.
if self.rows.is_empty() {
return nodes;
}
let height = self.rows.len();
let width = self.rows[0].len();
// Only generate row groups such as `THead`, `TFoot`, and `TBody` if
// there are no rows with mixed cell kinds.
let mut gen_row_groups = true;
let row_kinds = (self.rows.iter())
.map(|row| {
row.iter()
.filter_map(|cell| self.resolve_cell(cell))
.map(|cell| cell.kind)
.fold(Smart::Auto, |a, b| {
if let Smart::Custom(TableCellKind::Header(_, scope)) = b {
gen_row_groups &= scope == TableHeaderScope::Column;
}
if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) {
gen_row_groups &= a == b;
}
a.or(b)
})
.unwrap_or(TableCellKind::Data)
})
.collect::<Vec<_>>();
// Fixup all missing cell kinds.
for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) {
let default_kind =
if gen_row_groups { row_kind } else { TableCellKind::Data };
for cell in row.iter_mut() {
let Some(cell) = cell.as_cell_mut() else { continue };
cell.kind = cell.kind.or(Smart::Custom(default_kind));
}
}
// Explicitly set the headers attribute for cells.
for x in 0..width {
let mut column_header = None;
for y in 0..height {
self.resolve_cell_headers(
(x, y),
&mut column_header,
TableHeaderScope::refers_to_column,
);
}
}
for y in 0..height {
let mut row_header = None;
for x in 0..width {
self.resolve_cell_headers(
(x, y),
&mut row_header,
TableHeaderScope::refers_to_row,
);
}
}
let mut chunk_kind = row_kinds[0];
let mut row_chunk = Vec::new();
for (row, row_kind) in self.rows.into_iter().zip(row_kinds) {
let row_nodes = row
.into_iter()
.filter_map(|cell| {
let cell = cell.into_cell()?;
let span = TableCellSpan {
rows: cell.rowspan.try_into().unwrap(),
cols: cell.colspan.try_into().unwrap(),
};
let tag = match cell.unwrap_kind() {
TableCellKind::Header(_, scope) => {
let id = table_cell_id(self.id, cell.x, cell.y);
let scope = table_header_scope(scope);
TagKind::TH(
TableHeaderCell::new(scope)
.with_span(span)
.with_headers(cell.headers),
)
.with_id(Some(id))
}
TableCellKind::Footer | TableCellKind::Data => TagKind::TD(
TableDataCell::new()
.with_span(span)
.with_headers(cell.headers),
)
.into(),
};
Some(TagNode::Group(tag, cell.nodes))
})
.collect();
let row = TagNode::Group(TagKind::TR.into(), row_nodes);
// Push the `TR` tags directly.
if !gen_row_groups {
nodes.push(row);
continue;
}
// Generate row groups.
if !should_group_rows(chunk_kind, row_kind) {
let tag = match chunk_kind {
TableCellKind::Header(..) => TagKind::THead,
TableCellKind::Footer => TagKind::TFoot,
TableCellKind::Data => TagKind::TBody,
};
nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk)));
chunk_kind = row_kind;
}
row_chunk.push(row);
}
if !row_chunk.is_empty() {
let tag = match chunk_kind {
TableCellKind::Header(..) => TagKind::THead,
TableCellKind::Footer => TagKind::TFoot,
TableCellKind::Data => TagKind::TBody,
};
nodes.push(TagNode::Group(tag.into(), row_chunk));
}
nodes
}
fn resolve_cell_headers<F>(
&mut self,
(x, y): (usize, usize),
current_header: &mut Option<(NonZeroU32, TagId)>,
refers_to_dir: F,
) where
F: Fn(&TableHeaderScope) -> bool,
{
let table_id = self.id;
let Some(cell) = self.get_mut(x, y) else { return };
if let Some((prev_level, cell_id)) = current_header.clone() {
// The `Headers` attribute is also set for parent headers.
let mut is_parent_header = true;
if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
if refers_to_dir(&scope) {
is_parent_header = prev_level < level;
}
}
if is_parent_header && !cell.headers.ids.contains(&cell_id) {
cell.headers.ids.push(cell_id.clone());
}
}
if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
if refers_to_dir(&scope) {
let tag_id = table_cell_id(table_id, x as u32, y as u32);
*current_header = Some((level, tag_id));
}
}
}
}
fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool {
match (a, b) {
(TableCellKind::Header(..), TableCellKind::Header(..)) => true,
(TableCellKind::Footer, TableCellKind::Footer) => true,
(TableCellKind::Data, TableCellKind::Data) => true,
(_, _) => false,
}
}
fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId {
let mut bytes = [0; 12];
bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes());
bytes[4..8].copy_from_slice(&x.to_ne_bytes());
bytes[8..12].copy_from_slice(&y.to_ne_bytes());
TagId::from_bytes(&bytes)
}
#[derive(Clone)]
pub(crate) enum TagNode {
Group(Tag, Vec<TagNode>),
Leaf(Identifier),
/// Allows inserting a placeholder into the tag tree.
/// Currently used for [`krilla::page::Page::add_tagged_annotation`].
Placeholder(Placeholder),
}
#[derive(Clone, Copy)]
pub(crate) struct Placeholder(usize);
impl Tags {
pub(crate) fn new() -> Self {
Self {
stack: Vec::new(),
placeholders: Vec::new(),
in_artifact: None,
tree: Vec::new(),
link_id: LinkId(0),
table_id: TableId(0),
}
}
pub(crate) fn reserve_placeholder(&mut self) -> Placeholder {
let idx = self.placeholders.len();
self.placeholders.push(OnceCell::new());
Placeholder(idx)
}
pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) {
self.placeholders[placeholder.0]
.set(node)
.map_err(|_| ())
.expect("placeholder to be uninitialized");
}
pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node {
self.placeholders[placeholder.0]
.take()
.expect("initialized placeholder node")
}
/// Returns the current parent's list of children and the structure type ([Tag]).
/// In case of the document root the structure type will be `None`.
pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> {
self.stack.last_mut().map(|e| &mut e.kind)
}
pub(crate) fn push(&mut self, node: TagNode) {
if let Some(entry) = self.stack.last_mut() {
entry.nodes.push(node);
} else {
self.tree.push(node);
}
}
pub(crate) fn build_tree(&mut self) -> TagTree {
let children = std::mem::take(&mut self.tree)
.into_iter()
.map(|node| self.resolve_node(node))
.collect::<Vec<_>>();
TagTree::from(children)
}
/// Resolves [`Placeholder`] nodes.
fn resolve_node(&mut self, node: TagNode) -> Node {
match node {
TagNode::Group(tag, nodes) => {
let children = nodes
.into_iter()
.map(|node| self.resolve_node(node))
.collect::<Vec<_>>();
Node::Group(TagGroup::with_children(tag, children))
}
TagNode::Leaf(identifier) => Node::Leaf(identifier),
TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder),
}
}
fn context_supports(&self, _tag: &StackEntryKind) -> bool {
// TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/
true
}
fn next_link_id(&mut self) -> LinkId {
self.link_id.0 += 1;
self.link_id
}
fn next_table_id(&mut self) -> TableId {
self.table_id.0 += 1;
self.table_id
}
}
/// Automatically calls [`Surface::end_tagged`] when dropped.
pub(crate) struct TagHandle<'a, 'b> {
surface: &'b mut Surface<'a>,
}
impl Drop for TagHandle<'_, '_> {
fn drop(&mut self) {
self.surface.end_tagged();
}
}
impl<'a> TagHandle<'a, '_> {
pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> {
&mut self.surface
}
}
/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`]
/// when dropped.
pub(crate) fn start_marked<'a, 'b>(
gc: &mut GlobalContext,
surface: &'b mut Surface<'a>,
) -> TagHandle<'a, 'b> {
start_content(gc, surface, ContentTag::Other)
}
/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`]
/// when dropped.
pub(crate) fn start_span<'a, 'b>(
gc: &mut GlobalContext,
surface: &'b mut Surface<'a>,
span: SpanTag,
) -> TagHandle<'a, 'b> {
start_content(gc, surface, ContentTag::Span(span))
}
fn start_content<'a, 'b>(
gc: &mut GlobalContext,
surface: &'b mut Surface<'a>,
content: ContentTag,
) -> TagHandle<'a, 'b> {
let content = if let Some((_, kind)) = gc.tags.in_artifact {
let ty = artifact_type(kind);
ContentTag::Artifact(ty)
} else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) {
// Mark any direct child of a table as an aritfact. Any real content
// will be wrapped inside a `TableCell`.
ContentTag::Artifact(ArtifactType::Other)
} else {
content
};
let id = surface.start_tagged(content);
gc.tags.push(TagNode::Leaf(id));
TagHandle { surface }
}
/// Add all annotations that were found in the page frame.
pub(crate) fn add_annotations(
gc: &mut GlobalContext,
page: &mut Page,
annotations: Vec<LinkAnnotation>,
) {
for annotation in annotations.into_iter() {
let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } =
annotation;
let annot = krilla::annotation::Annotation::new_link(
krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target),
alt,
);
let annot_id = page.add_tagged_annotation(annot);
gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id));
}
}
mod outline;
mod table;
pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) {
if gc.tags.in_artifact.is_some() {
@ -773,16 +182,226 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
gc.tags.push(node);
}
fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) {
gc.tags.in_artifact = Some((loc, kind));
/// Add all annotations that were found in the page frame.
pub(crate) fn add_annotations(
gc: &mut GlobalContext,
page: &mut Page,
annotations: Vec<LinkAnnotation>,
) {
for annotation in annotations.into_iter() {
let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } =
annotation;
let annot = krilla::annotation::Annotation::new_link(
krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target),
alt,
);
let annot_id = page.add_tagged_annotation(annot);
gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id));
}
}
fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope {
match scope {
TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both,
TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column,
TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row,
pub(crate) struct Tags {
/// The intermediary stack of nested tag groups.
pub(crate) stack: Vec<StackEntry>,
/// A list of placeholders corresponding to a [`TagNode::Placeholder`].
pub(crate) placeholders: Vec<OnceCell<Node>>,
pub(crate) in_artifact: Option<(Location, ArtifactKind)>,
/// Used to group multiple link annotations using quad points.
pub(crate) link_id: LinkId,
/// Used to generate IDs referenced in table `Headers` attributes.
/// The IDs must be document wide unique.
pub(crate) table_id: TableId,
/// The output.
pub(crate) tree: Vec<TagNode>,
}
impl Tags {
pub(crate) fn new() -> Self {
Self {
stack: Vec::new(),
placeholders: Vec::new(),
in_artifact: None,
tree: Vec::new(),
link_id: LinkId(0),
table_id: TableId(0),
}
}
pub(crate) fn reserve_placeholder(&mut self) -> Placeholder {
let idx = self.placeholders.len();
self.placeholders.push(OnceCell::new());
Placeholder(idx)
}
pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) {
self.placeholders[placeholder.0]
.set(node)
.map_err(|_| ())
.expect("placeholder to be uninitialized");
}
pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node {
self.placeholders[placeholder.0]
.take()
.expect("initialized placeholder node")
}
/// Returns the current parent's list of children and the structure type ([Tag]).
/// In case of the document root the structure type will be `None`.
pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> {
self.stack.last_mut().map(|e| &mut e.kind)
}
pub(crate) fn push(&mut self, node: TagNode) {
if let Some(entry) = self.stack.last_mut() {
entry.nodes.push(node);
} else {
self.tree.push(node);
}
}
pub(crate) fn build_tree(&mut self) -> TagTree {
let children = std::mem::take(&mut self.tree)
.into_iter()
.map(|node| self.resolve_node(node))
.collect::<Vec<_>>();
TagTree::from(children)
}
/// Resolves [`Placeholder`] nodes.
fn resolve_node(&mut self, node: TagNode) -> Node {
match node {
TagNode::Group(tag, nodes) => {
let children = nodes
.into_iter()
.map(|node| self.resolve_node(node))
.collect::<Vec<_>>();
Node::Group(TagGroup::with_children(tag, children))
}
TagNode::Leaf(identifier) => Node::Leaf(identifier),
TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder),
}
}
fn context_supports(&self, _tag: &StackEntryKind) -> bool {
// TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/
true
}
fn next_link_id(&mut self) -> LinkId {
self.link_id.0 += 1;
self.link_id
}
fn next_table_id(&mut self) -> TableId {
self.table_id.0 += 1;
self.table_id
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) struct TableId(u32);
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) struct LinkId(u32);
pub(crate) struct StackEntry {
pub(crate) loc: Location,
pub(crate) kind: StackEntryKind,
pub(crate) nodes: Vec<TagNode>,
}
pub(crate) enum StackEntryKind {
Standard(Tag),
Outline(OutlineCtx),
OutlineEntry(Packed<OutlineEntry>),
Table(TableCtx),
TableCell(Packed<TableCell>),
Link(LinkId, Packed<LinkMarker>),
}
impl StackEntryKind {
pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> {
if let Self::Standard(v) = self {
Some(v)
} else {
None
}
}
}
#[derive(Clone)]
pub(crate) enum TagNode {
Group(Tag, Vec<TagNode>),
Leaf(Identifier),
/// Allows inserting a placeholder into the tag tree.
/// Currently used for [`krilla::page::Page::add_tagged_annotation`].
Placeholder(Placeholder),
}
#[derive(Clone, Copy)]
pub(crate) struct Placeholder(usize);
/// Automatically calls [`Surface::end_tagged`] when dropped.
pub(crate) struct TagHandle<'a, 'b> {
surface: &'b mut Surface<'a>,
}
impl Drop for TagHandle<'_, '_> {
fn drop(&mut self) {
self.surface.end_tagged();
}
}
impl<'a> TagHandle<'a, '_> {
pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> {
self.surface
}
}
/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`]
/// when dropped.
pub(crate) fn start_marked<'a, 'b>(
gc: &mut GlobalContext,
surface: &'b mut Surface<'a>,
) -> TagHandle<'a, 'b> {
start_content(gc, surface, ContentTag::Other)
}
/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`]
/// when dropped.
pub(crate) fn start_span<'a, 'b>(
gc: &mut GlobalContext,
surface: &'b mut Surface<'a>,
span: SpanTag,
) -> TagHandle<'a, 'b> {
start_content(gc, surface, ContentTag::Span(span))
}
fn start_content<'a, 'b>(
gc: &mut GlobalContext,
surface: &'b mut Surface<'a>,
content: ContentTag,
) -> TagHandle<'a, 'b> {
let content = if let Some((_, kind)) = gc.tags.in_artifact {
let ty = artifact_type(kind);
ContentTag::Artifact(ty)
} else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) {
// Mark any direct child of a table as an aritfact. Any real content
// will be wrapped inside a `TableCell`.
ContentTag::Artifact(ArtifactType::Other)
} else {
content
};
let id = surface.start_tagged(content);
gc.tags.push(TagNode::Leaf(id));
TagHandle { surface }
}
fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) {
gc.tags.in_artifact = Some((loc, kind));
}
fn artifact_type(kind: ArtifactKind) -> ArtifactType {

View File

@ -0,0 +1,74 @@
use krilla::tagging::TagKind;
use typst_library::foundations::Packed;
use typst_library::model::OutlineEntry;
use crate::tags::TagNode;
pub(crate) struct OutlineCtx {
stack: Vec<OutlineSection>,
}
impl OutlineCtx {
pub(crate) fn new() -> Self {
Self { stack: Vec::new() }
}
pub(crate) fn insert(
&mut self,
outline_nodes: &mut Vec<TagNode>,
entry: Packed<OutlineEntry>,
nodes: Vec<TagNode>,
) {
let expected_len = entry.level.get() - 1;
if self.stack.len() < expected_len {
self.stack.resize_with(expected_len, OutlineSection::new);
} else {
while self.stack.len() > expected_len {
self.finish_section(outline_nodes);
}
}
let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes);
self.push(outline_nodes, section_entry);
}
fn finish_section(&mut self, outline_nodes: &mut Vec<TagNode>) {
let sub_section = self.stack.pop().unwrap().into_tag();
self.push(outline_nodes, sub_section);
}
fn push(&mut self, outline_nodes: &mut Vec<TagNode>, entry: TagNode) {
match self.stack.last_mut() {
Some(section) => section.push(entry),
None => outline_nodes.push(entry),
}
}
pub(crate) fn build_outline(
mut self,
mut outline_nodes: Vec<TagNode>,
) -> Vec<TagNode> {
while !self.stack.is_empty() {
self.finish_section(&mut outline_nodes);
}
outline_nodes
}
}
pub(crate) struct OutlineSection {
entries: Vec<TagNode>,
}
impl OutlineSection {
const fn new() -> Self {
OutlineSection { entries: Vec::new() }
}
fn push(&mut self, entry: TagNode) {
self.entries.push(entry);
}
fn into_tag(self) -> TagNode {
TagNode::Group(TagKind::TOC.into(), self.entries)
}
}

View File

@ -0,0 +1,330 @@
use std::num::{NonZeroU32, NonZeroUsize};
use krilla::tagging::{
TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId,
TagKind,
};
use typst_library::foundations::{Packed, Smart, StyleChain};
use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope};
use crate::tags::{TableId, TagNode};
pub(crate) struct TableCtx {
pub(crate) id: TableId,
pub(crate) table: Packed<TableElem>,
rows: Vec<Vec<GridCell>>,
}
impl TableCtx {
pub(crate) fn new(id: TableId, table: Packed<TableElem>) -> Self {
Self { id, table: table.clone(), rows: Vec::new() }
}
fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> {
let cell = self.rows.get(y)?.get(x)?;
self.resolve_cell(cell)
}
fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> {
let cell = self.rows.get_mut(y)?.get_mut(x)?;
match cell {
GridCell::Cell(cell) => {
// HACK: Workaround for the second mutable borrow when resolving
// the spanned cell.
Some(unsafe { std::mem::transmute(cell) })
}
&mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(),
GridCell::Missing => None,
}
}
pub(crate) fn contains(&self, cell: &Packed<TableCell>) -> bool {
let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
self.get(x, y).is_some()
}
fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> {
match cell {
GridCell::Cell(cell) => Some(cell),
&GridCell::Spanned(x, y) => self.rows[y][x].as_cell(),
GridCell::Missing => None,
}
}
pub(crate) fn insert(&mut self, cell: Packed<TableCell>, nodes: Vec<TagNode>) {
let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!());
let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!());
let rowspan = cell.rowspan(StyleChain::default());
let colspan = cell.colspan(StyleChain::default());
let kind = cell.kind(StyleChain::default());
// Extend the table grid to fit this cell.
let required_height = y + rowspan.get();
let required_width = x + colspan.get();
if self.rows.len() < required_height {
self.rows
.resize(required_height, vec![GridCell::Missing; required_width]);
}
let row = &mut self.rows[y];
if row.len() < required_width {
row.resize_with(required_width, || GridCell::Missing);
}
// Store references to the cell for all spanned cells.
for i in y..y + rowspan.get() {
for j in x..x + colspan.get() {
self.rows[i][j] = GridCell::Spanned(x, y);
}
}
self.rows[y][x] = GridCell::Cell(TableCtxCell {
x: x as u32,
y: y as u32,
rowspan,
colspan,
kind,
headers: TableCellHeaders::NONE,
nodes,
});
}
pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> {
// Table layouting ensures that there are no overlapping cells, and that
// any gaps left by the user are filled with empty cells.
if self.rows.is_empty() {
return nodes;
}
let height = self.rows.len();
let width = self.rows[0].len();
// Only generate row groups such as `THead`, `TFoot`, and `TBody` if
// there are no rows with mixed cell kinds.
let mut gen_row_groups = true;
let row_kinds = (self.rows.iter())
.map(|row| {
row.iter()
.filter_map(|cell| self.resolve_cell(cell))
.map(|cell| cell.kind)
.fold(Smart::Auto, |a, b| {
if let Smart::Custom(TableCellKind::Header(_, scope)) = b {
gen_row_groups &= scope == TableHeaderScope::Column;
}
if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) {
gen_row_groups &= a == b;
}
a.or(b)
})
.unwrap_or(TableCellKind::Data)
})
.collect::<Vec<_>>();
// Fixup all missing cell kinds.
for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) {
let default_kind =
if gen_row_groups { row_kind } else { TableCellKind::Data };
for cell in row.iter_mut() {
let Some(cell) = cell.as_cell_mut() else { continue };
cell.kind = cell.kind.or(Smart::Custom(default_kind));
}
}
// Explicitly set the headers attribute for cells.
for x in 0..width {
let mut column_header = None;
for y in 0..height {
self.resolve_cell_headers(
(x, y),
&mut column_header,
TableHeaderScope::refers_to_column,
);
}
}
for y in 0..height {
let mut row_header = None;
for x in 0..width {
self.resolve_cell_headers(
(x, y),
&mut row_header,
TableHeaderScope::refers_to_row,
);
}
}
let mut chunk_kind = row_kinds[0];
let mut row_chunk = Vec::new();
for (row, row_kind) in self.rows.into_iter().zip(row_kinds) {
let row_nodes = row
.into_iter()
.filter_map(|cell| {
let cell = cell.into_cell()?;
let span = TableCellSpan {
rows: cell.rowspan.try_into().unwrap(),
cols: cell.colspan.try_into().unwrap(),
};
let tag = match cell.unwrap_kind() {
TableCellKind::Header(_, scope) => {
let id = table_cell_id(self.id, cell.x, cell.y);
let scope = table_header_scope(scope);
TagKind::TH(
TableHeaderCell::new(scope)
.with_span(span)
.with_headers(cell.headers),
)
.with_id(Some(id))
}
TableCellKind::Footer | TableCellKind::Data => TagKind::TD(
TableDataCell::new()
.with_span(span)
.with_headers(cell.headers),
)
.into(),
};
Some(TagNode::Group(tag, cell.nodes))
})
.collect();
let row = TagNode::Group(TagKind::TR.into(), row_nodes);
// Push the `TR` tags directly.
if !gen_row_groups {
nodes.push(row);
continue;
}
// Generate row groups.
if !should_group_rows(chunk_kind, row_kind) {
let tag = match chunk_kind {
TableCellKind::Header(..) => TagKind::THead,
TableCellKind::Footer => TagKind::TFoot,
TableCellKind::Data => TagKind::TBody,
};
nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk)));
chunk_kind = row_kind;
}
row_chunk.push(row);
}
if !row_chunk.is_empty() {
let tag = match chunk_kind {
TableCellKind::Header(..) => TagKind::THead,
TableCellKind::Footer => TagKind::TFoot,
TableCellKind::Data => TagKind::TBody,
};
nodes.push(TagNode::Group(tag.into(), row_chunk));
}
nodes
}
fn resolve_cell_headers<F>(
&mut self,
(x, y): (usize, usize),
current_header: &mut Option<(NonZeroU32, TagId)>,
refers_to_dir: F,
) where
F: Fn(&TableHeaderScope) -> bool,
{
let table_id = self.id;
let Some(cell) = self.get_mut(x, y) else { return };
if let Some((prev_level, cell_id)) = current_header.clone() {
// The `Headers` attribute is also set for parent headers.
let mut is_parent_header = true;
if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
if refers_to_dir(&scope) {
is_parent_header = prev_level < level;
}
}
if is_parent_header && !cell.headers.ids.contains(&cell_id) {
cell.headers.ids.push(cell_id.clone());
}
}
if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
if refers_to_dir(&scope) {
let tag_id = table_cell_id(table_id, x as u32, y as u32);
*current_header = Some((level, tag_id));
}
}
}
}
#[derive(Clone, Default)]
enum GridCell {
Cell(TableCtxCell),
Spanned(usize, usize),
#[default]
Missing,
}
impl GridCell {
fn as_cell(&self) -> Option<&TableCtxCell> {
if let Self::Cell(v) = self {
Some(v)
} else {
None
}
}
fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> {
if let Self::Cell(v) = self {
Some(v)
} else {
None
}
}
fn into_cell(self) -> Option<TableCtxCell> {
if let Self::Cell(v) = self {
Some(v)
} else {
None
}
}
}
#[derive(Clone)]
struct TableCtxCell {
x: u32,
y: u32,
rowspan: NonZeroUsize,
colspan: NonZeroUsize,
kind: Smart<TableCellKind>,
headers: TableCellHeaders,
nodes: Vec<TagNode>,
}
impl TableCtxCell {
fn unwrap_kind(&self) -> TableCellKind {
self.kind.unwrap_or_else(|| unreachable!())
}
}
fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool {
match (a, b) {
(TableCellKind::Header(..), TableCellKind::Header(..)) => true,
(TableCellKind::Footer, TableCellKind::Footer) => true,
(TableCellKind::Data, TableCellKind::Data) => true,
(_, _) => false,
}
}
fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId {
let mut bytes = [0; 12];
bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes());
bytes[4..8].copy_from_slice(&x.to_ne_bytes());
bytes[8..12].copy_from_slice(&y.to_ne_bytes());
TagId::from_bytes(&bytes)
}
fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope {
match scope {
TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both,
TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column,
TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row,
}
}