diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags/mod.rs similarity index 51% rename from crates/typst-pdf/src/tags.rs rename to crates/typst-pdf/src/tags/mod.rs index 9f49024f1..99b52d555 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -1,621 +1,30 @@ use std::cell::OnceCell; -use std::num::{NonZeroU32, NonZeroUsize}; +use std::num::NonZeroU32; use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellHeaders, TableCellSpan, - TableDataCell, TableHeaderCell, Tag, TagBuilder, TagGroup, TagId, TagKind, TagTree, + ArtifactType, ContentTag, Identifier, Node, SpanTag, Tag, TagBuilder, TagGroup, + TagKind, TagTree, }; -use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain}; +use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, - OutlineEntry, TableCell, TableCellKind, TableElem, TableHeaderScope, + OutlineEntry, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; use crate::link::LinkAnnotation; +use crate::tags::outline::OutlineCtx; +use crate::tags::table::TableCtx; -pub(crate) struct Tags { - /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec, - /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. - pub(crate) placeholders: Vec>, - pub(crate) in_artifact: Option<(Location, ArtifactKind)>, - /// Used to group multiple link annotations using quad points. - pub(crate) link_id: LinkId, - /// Used to generate IDs referenced in table `Headers` attributes. - /// The IDs must be document wide unique. - pub(crate) table_id: TableId, - - /// The output. - pub(crate) tree: Vec, -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct TableId(u32); - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct LinkId(u32); - -pub(crate) struct StackEntry { - pub(crate) loc: Location, - pub(crate) kind: StackEntryKind, - pub(crate) nodes: Vec, -} - -pub(crate) enum StackEntryKind { - Standard(Tag), - Outline(OutlineCtx), - OutlineEntry(Packed), - Table(TableCtx), - TableCell(Packed), - Link(LinkId, Packed), -} - -impl StackEntryKind { - pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { - if let Self::Standard(v) = self { - Some(v) - } else { - None - } - } -} - -pub(crate) struct OutlineCtx { - stack: Vec, -} - -pub(crate) struct OutlineSection { - entries: Vec, -} - -impl OutlineSection { - const fn new() -> Self { - OutlineSection { entries: Vec::new() } - } - - fn push(&mut self, entry: TagNode) { - self.entries.push(entry); - } - - fn into_tag(self) -> TagNode { - TagNode::Group(TagKind::TOC.into(), self.entries) - } -} - -impl OutlineCtx { - fn new() -> Self { - Self { stack: Vec::new() } - } - - fn insert( - &mut self, - outline_nodes: &mut Vec, - entry: Packed, - nodes: Vec, - ) { - let expected_len = entry.level.get() - 1; - if self.stack.len() < expected_len { - self.stack.resize_with(expected_len, || OutlineSection::new()); - } else { - while self.stack.len() > expected_len { - self.finish_section(outline_nodes); - } - } - - let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); - self.push(outline_nodes, section_entry); - } - - fn finish_section(&mut self, outline_nodes: &mut Vec) { - let sub_section = self.stack.pop().unwrap().into_tag(); - self.push(outline_nodes, sub_section); - } - - fn push(&mut self, outline_nodes: &mut Vec, entry: TagNode) { - match self.stack.last_mut() { - Some(section) => section.push(entry), - None => outline_nodes.push(entry), - } - } - - fn build_outline(mut self, mut outline_nodes: Vec) -> Vec { - while self.stack.len() > 0 { - self.finish_section(&mut outline_nodes); - } - outline_nodes - } -} - -pub(crate) struct TableCtx { - id: TableId, - table: Packed, - rows: Vec>, -} - -#[derive(Clone, Default)] -enum GridCell { - Cell(TableCtxCell), - Spanned(usize, usize), - #[default] - Missing, -} - -impl GridCell { - fn as_cell(&self) -> Option<&TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) - } else { - None - } - } - - fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) - } else { - None - } - } - - fn into_cell(self) -> Option { - if let Self::Cell(v) = self { - Some(v) - } else { - None - } - } -} - -#[derive(Clone)] -struct TableCtxCell { - x: u32, - y: u32, - rowspan: NonZeroUsize, - colspan: NonZeroUsize, - kind: Smart, - headers: TableCellHeaders, - nodes: Vec, -} - -impl TableCtxCell { - fn unwrap_kind(&self) -> TableCellKind { - self.kind.unwrap_or_else(|| unreachable!()) - } -} - -impl TableCtx { - fn new(id: TableId, table: Packed) -> Self { - Self { id, table: table.clone(), rows: Vec::new() } - } - - fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { - let cell = self.rows.get(y)?.get(x)?; - self.resolve_cell(cell) - } - - fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { - let cell = self.rows.get_mut(y)?.get_mut(x)?; - match cell { - GridCell::Cell(cell) => { - // HACK: Workaround for the second mutable borrow when resolving - // the spanned cell. - Some(unsafe { std::mem::transmute(cell) }) - } - &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), - GridCell::Missing => None, - } - } - - fn contains(&self, cell: &Packed) -> bool { - let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - self.get(x, y).is_some() - } - - fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { - match cell { - GridCell::Cell(cell) => Some(cell), - &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), - GridCell::Missing => None, - } - } - - fn insert(&mut self, cell: Packed, nodes: Vec) { - let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let rowspan = cell.rowspan(StyleChain::default()); - let colspan = cell.colspan(StyleChain::default()); - let kind = cell.kind(StyleChain::default()); - - // Extend the table grid to fit this cell. - let required_height = y + rowspan.get(); - let required_width = x + colspan.get(); - if self.rows.len() < required_height { - self.rows - .resize(required_height, vec![GridCell::Missing; required_width]); - } - let row = &mut self.rows[y]; - if row.len() < required_width { - row.resize_with(required_width, || GridCell::Missing); - } - - // Store references to the cell for all spanned cells. - for i in y..y + rowspan.get() { - for j in x..x + colspan.get() { - self.rows[i][j] = GridCell::Spanned(x, y); - } - } - - self.rows[y][x] = GridCell::Cell(TableCtxCell { - x: x as u32, - y: y as u32, - rowspan, - colspan, - kind, - headers: TableCellHeaders::NONE, - nodes, - }); - } - - fn build_table(mut self, mut nodes: Vec) -> Vec { - // Table layouting ensures that there are no overlapping cells, and that - // any gaps left by the user are filled with empty cells. - if self.rows.is_empty() { - return nodes; - } - let height = self.rows.len(); - let width = self.rows[0].len(); - - // Only generate row groups such as `THead`, `TFoot`, and `TBody` if - // there are no rows with mixed cell kinds. - let mut gen_row_groups = true; - let row_kinds = (self.rows.iter()) - .map(|row| { - row.iter() - .filter_map(|cell| self.resolve_cell(cell)) - .map(|cell| cell.kind) - .fold(Smart::Auto, |a, b| { - if let Smart::Custom(TableCellKind::Header(_, scope)) = b { - gen_row_groups &= scope == TableHeaderScope::Column; - } - if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { - gen_row_groups &= a == b; - } - a.or(b) - }) - .unwrap_or(TableCellKind::Data) - }) - .collect::>(); - - // Fixup all missing cell kinds. - for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { - let default_kind = - if gen_row_groups { row_kind } else { TableCellKind::Data }; - for cell in row.iter_mut() { - let Some(cell) = cell.as_cell_mut() else { continue }; - cell.kind = cell.kind.or(Smart::Custom(default_kind)); - } - } - - // Explicitly set the headers attribute for cells. - for x in 0..width { - let mut column_header = None; - for y in 0..height { - self.resolve_cell_headers( - (x, y), - &mut column_header, - TableHeaderScope::refers_to_column, - ); - } - } - for y in 0..height { - let mut row_header = None; - for x in 0..width { - self.resolve_cell_headers( - (x, y), - &mut row_header, - TableHeaderScope::refers_to_row, - ); - } - } - - let mut chunk_kind = row_kinds[0]; - let mut row_chunk = Vec::new(); - for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { - let row_nodes = row - .into_iter() - .filter_map(|cell| { - let cell = cell.into_cell()?; - let span = TableCellSpan { - rows: cell.rowspan.try_into().unwrap(), - cols: cell.colspan.try_into().unwrap(), - }; - let tag = match cell.unwrap_kind() { - TableCellKind::Header(_, scope) => { - let id = table_cell_id(self.id, cell.x, cell.y); - let scope = table_header_scope(scope); - TagKind::TH( - TableHeaderCell::new(scope) - .with_span(span) - .with_headers(cell.headers), - ) - .with_id(Some(id)) - } - TableCellKind::Footer | TableCellKind::Data => TagKind::TD( - TableDataCell::new() - .with_span(span) - .with_headers(cell.headers), - ) - .into(), - }; - - Some(TagNode::Group(tag, cell.nodes)) - }) - .collect(); - - let row = TagNode::Group(TagKind::TR.into(), row_nodes); - - // Push the `TR` tags directly. - if !gen_row_groups { - nodes.push(row); - continue; - } - - // Generate row groups. - if !should_group_rows(chunk_kind, row_kind) { - let tag = match chunk_kind { - TableCellKind::Header(..) => TagKind::THead, - TableCellKind::Footer => TagKind::TFoot, - TableCellKind::Data => TagKind::TBody, - }; - nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); - - chunk_kind = row_kind; - } - row_chunk.push(row); - } - - if !row_chunk.is_empty() { - let tag = match chunk_kind { - TableCellKind::Header(..) => TagKind::THead, - TableCellKind::Footer => TagKind::TFoot, - TableCellKind::Data => TagKind::TBody, - }; - nodes.push(TagNode::Group(tag.into(), row_chunk)); - } - - nodes - } - - fn resolve_cell_headers( - &mut self, - (x, y): (usize, usize), - current_header: &mut Option<(NonZeroU32, TagId)>, - refers_to_dir: F, - ) where - F: Fn(&TableHeaderScope) -> bool, - { - let table_id = self.id; - let Some(cell) = self.get_mut(x, y) else { return }; - - if let Some((prev_level, cell_id)) = current_header.clone() { - // The `Headers` attribute is also set for parent headers. - let mut is_parent_header = true; - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - is_parent_header = prev_level < level; - } - } - - if is_parent_header && !cell.headers.ids.contains(&cell_id) { - cell.headers.ids.push(cell_id.clone()); - } - } - - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - let tag_id = table_cell_id(table_id, x as u32, y as u32); - *current_header = Some((level, tag_id)); - } - } - } -} - -fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { - match (a, b) { - (TableCellKind::Header(..), TableCellKind::Header(..)) => true, - (TableCellKind::Footer, TableCellKind::Footer) => true, - (TableCellKind::Data, TableCellKind::Data) => true, - (_, _) => false, - } -} - -fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { - let mut bytes = [0; 12]; - bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); - bytes[4..8].copy_from_slice(&x.to_ne_bytes()); - bytes[8..12].copy_from_slice(&y.to_ne_bytes()); - TagId::from_bytes(&bytes) -} - -#[derive(Clone)] -pub(crate) enum TagNode { - Group(Tag, Vec), - Leaf(Identifier), - /// Allows inserting a placeholder into the tag tree. - /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. - Placeholder(Placeholder), -} - -#[derive(Clone, Copy)] -pub(crate) struct Placeholder(usize); - -impl Tags { - pub(crate) fn new() -> Self { - Self { - stack: Vec::new(), - placeholders: Vec::new(), - in_artifact: None, - - tree: Vec::new(), - link_id: LinkId(0), - table_id: TableId(0), - } - } - - pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { - let idx = self.placeholders.len(); - self.placeholders.push(OnceCell::new()); - Placeholder(idx) - } - - pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { - self.placeholders[placeholder.0] - .set(node) - .map_err(|_| ()) - .expect("placeholder to be uninitialized"); - } - - pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { - self.placeholders[placeholder.0] - .take() - .expect("initialized placeholder node") - } - - /// Returns the current parent's list of children and the structure type ([Tag]). - /// In case of the document root the structure type will be `None`. - pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { - self.stack.last_mut().map(|e| &mut e.kind) - } - - pub(crate) fn push(&mut self, node: TagNode) { - if let Some(entry) = self.stack.last_mut() { - entry.nodes.push(node); - } else { - self.tree.push(node); - } - } - - pub(crate) fn build_tree(&mut self) -> TagTree { - let children = std::mem::take(&mut self.tree) - .into_iter() - .map(|node| self.resolve_node(node)) - .collect::>(); - TagTree::from(children) - } - - /// Resolves [`Placeholder`] nodes. - fn resolve_node(&mut self, node: TagNode) -> Node { - match node { - TagNode::Group(tag, nodes) => { - let children = nodes - .into_iter() - .map(|node| self.resolve_node(node)) - .collect::>(); - Node::Group(TagGroup::with_children(tag, children)) - } - TagNode::Leaf(identifier) => Node::Leaf(identifier), - TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), - } - } - - fn context_supports(&self, _tag: &StackEntryKind) -> bool { - // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ - true - } - - fn next_link_id(&mut self) -> LinkId { - self.link_id.0 += 1; - self.link_id - } - - fn next_table_id(&mut self) -> TableId { - self.table_id.0 += 1; - self.table_id - } -} - -/// Automatically calls [`Surface::end_tagged`] when dropped. -pub(crate) struct TagHandle<'a, 'b> { - surface: &'b mut Surface<'a>, -} - -impl Drop for TagHandle<'_, '_> { - fn drop(&mut self) { - self.surface.end_tagged(); - } -} - -impl<'a> TagHandle<'a, '_> { - pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { - &mut self.surface - } -} - -/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] -/// when dropped. -pub(crate) fn start_marked<'a, 'b>( - gc: &mut GlobalContext, - surface: &'b mut Surface<'a>, -) -> TagHandle<'a, 'b> { - start_content(gc, surface, ContentTag::Other) -} - -/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] -/// when dropped. -pub(crate) fn start_span<'a, 'b>( - gc: &mut GlobalContext, - surface: &'b mut Surface<'a>, - span: SpanTag, -) -> TagHandle<'a, 'b> { - start_content(gc, surface, ContentTag::Span(span)) -} - -fn start_content<'a, 'b>( - gc: &mut GlobalContext, - surface: &'b mut Surface<'a>, - content: ContentTag, -) -> TagHandle<'a, 'b> { - let content = if let Some((_, kind)) = gc.tags.in_artifact { - let ty = artifact_type(kind); - ContentTag::Artifact(ty) - } else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) { - // Mark any direct child of a table as an aritfact. Any real content - // will be wrapped inside a `TableCell`. - ContentTag::Artifact(ArtifactType::Other) - } else { - content - }; - let id = surface.start_tagged(content); - gc.tags.push(TagNode::Leaf(id)); - TagHandle { surface } -} - -/// Add all annotations that were found in the page frame. -pub(crate) fn add_annotations( - gc: &mut GlobalContext, - page: &mut Page, - annotations: Vec, -) { - for annotation in annotations.into_iter() { - let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = - annotation; - let annot = krilla::annotation::Annotation::new_link( - krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), - alt, - ); - let annot_id = page.add_tagged_annotation(annot); - gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); - } -} +mod outline; +mod table; pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { if gc.tags.in_artifact.is_some() { @@ -773,16 +182,226 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { gc.tags.push(node); } -fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { - gc.tags.in_artifact = Some((loc, kind)); +/// Add all annotations that were found in the page frame. +pub(crate) fn add_annotations( + gc: &mut GlobalContext, + page: &mut Page, + annotations: Vec, +) { + for annotation in annotations.into_iter() { + let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = + annotation; + let annot = krilla::annotation::Annotation::new_link( + krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), + alt, + ); + let annot_id = page.add_tagged_annotation(annot); + gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); + } } -fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { - match scope { - TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, - TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, - TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, +pub(crate) struct Tags { + /// The intermediary stack of nested tag groups. + pub(crate) stack: Vec, + /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. + pub(crate) placeholders: Vec>, + pub(crate) in_artifact: Option<(Location, ArtifactKind)>, + /// Used to group multiple link annotations using quad points. + pub(crate) link_id: LinkId, + /// Used to generate IDs referenced in table `Headers` attributes. + /// The IDs must be document wide unique. + pub(crate) table_id: TableId, + + /// The output. + pub(crate) tree: Vec, +} + +impl Tags { + pub(crate) fn new() -> Self { + Self { + stack: Vec::new(), + placeholders: Vec::new(), + in_artifact: None, + + tree: Vec::new(), + link_id: LinkId(0), + table_id: TableId(0), + } } + + pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { + let idx = self.placeholders.len(); + self.placeholders.push(OnceCell::new()); + Placeholder(idx) + } + + pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { + self.placeholders[placeholder.0] + .set(node) + .map_err(|_| ()) + .expect("placeholder to be uninitialized"); + } + + pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { + self.placeholders[placeholder.0] + .take() + .expect("initialized placeholder node") + } + + /// Returns the current parent's list of children and the structure type ([Tag]). + /// In case of the document root the structure type will be `None`. + pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { + self.stack.last_mut().map(|e| &mut e.kind) + } + + pub(crate) fn push(&mut self, node: TagNode) { + if let Some(entry) = self.stack.last_mut() { + entry.nodes.push(node); + } else { + self.tree.push(node); + } + } + + pub(crate) fn build_tree(&mut self) -> TagTree { + let children = std::mem::take(&mut self.tree) + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + TagTree::from(children) + } + + /// Resolves [`Placeholder`] nodes. + fn resolve_node(&mut self, node: TagNode) -> Node { + match node { + TagNode::Group(tag, nodes) => { + let children = nodes + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + Node::Group(TagGroup::with_children(tag, children)) + } + TagNode::Leaf(identifier) => Node::Leaf(identifier), + TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), + } + } + + fn context_supports(&self, _tag: &StackEntryKind) -> bool { + // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ + true + } + + fn next_link_id(&mut self) -> LinkId { + self.link_id.0 += 1; + self.link_id + } + + fn next_table_id(&mut self) -> TableId { + self.table_id.0 += 1; + self.table_id + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct TableId(u32); + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct LinkId(u32); + +pub(crate) struct StackEntry { + pub(crate) loc: Location, + pub(crate) kind: StackEntryKind, + pub(crate) nodes: Vec, +} + +pub(crate) enum StackEntryKind { + Standard(Tag), + Outline(OutlineCtx), + OutlineEntry(Packed), + Table(TableCtx), + TableCell(Packed), + Link(LinkId, Packed), +} + +impl StackEntryKind { + pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { + if let Self::Standard(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +pub(crate) enum TagNode { + Group(Tag, Vec), + Leaf(Identifier), + /// Allows inserting a placeholder into the tag tree. + /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. + Placeholder(Placeholder), +} + +#[derive(Clone, Copy)] +pub(crate) struct Placeholder(usize); + +/// Automatically calls [`Surface::end_tagged`] when dropped. +pub(crate) struct TagHandle<'a, 'b> { + surface: &'b mut Surface<'a>, +} + +impl Drop for TagHandle<'_, '_> { + fn drop(&mut self) { + self.surface.end_tagged(); + } +} + +impl<'a> TagHandle<'a, '_> { + pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { + self.surface + } +} + +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_marked<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, +) -> TagHandle<'a, 'b> { + start_content(gc, surface, ContentTag::Other) +} + +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_span<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + span: SpanTag, +) -> TagHandle<'a, 'b> { + start_content(gc, surface, ContentTag::Span(span)) +} + +fn start_content<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + content: ContentTag, +) -> TagHandle<'a, 'b> { + let content = if let Some((_, kind)) = gc.tags.in_artifact { + let ty = artifact_type(kind); + ContentTag::Artifact(ty) + } else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) { + // Mark any direct child of a table as an aritfact. Any real content + // will be wrapped inside a `TableCell`. + ContentTag::Artifact(ArtifactType::Other) + } else { + content + }; + let id = surface.start_tagged(content); + gc.tags.push(TagNode::Leaf(id)); + TagHandle { surface } +} + +fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { + gc.tags.in_artifact = Some((loc, kind)); } fn artifact_type(kind: ArtifactKind) -> ArtifactType { diff --git a/crates/typst-pdf/src/tags/outline.rs b/crates/typst-pdf/src/tags/outline.rs new file mode 100644 index 000000000..9fbeb8dcb --- /dev/null +++ b/crates/typst-pdf/src/tags/outline.rs @@ -0,0 +1,74 @@ +use krilla::tagging::TagKind; +use typst_library::foundations::Packed; +use typst_library::model::OutlineEntry; + +use crate::tags::TagNode; + +pub(crate) struct OutlineCtx { + stack: Vec, +} + +impl OutlineCtx { + pub(crate) fn new() -> Self { + Self { stack: Vec::new() } + } + + pub(crate) fn insert( + &mut self, + outline_nodes: &mut Vec, + entry: Packed, + nodes: Vec, + ) { + let expected_len = entry.level.get() - 1; + if self.stack.len() < expected_len { + self.stack.resize_with(expected_len, OutlineSection::new); + } else { + while self.stack.len() > expected_len { + self.finish_section(outline_nodes); + } + } + + let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); + self.push(outline_nodes, section_entry); + } + + fn finish_section(&mut self, outline_nodes: &mut Vec) { + let sub_section = self.stack.pop().unwrap().into_tag(); + self.push(outline_nodes, sub_section); + } + + fn push(&mut self, outline_nodes: &mut Vec, entry: TagNode) { + match self.stack.last_mut() { + Some(section) => section.push(entry), + None => outline_nodes.push(entry), + } + } + + pub(crate) fn build_outline( + mut self, + mut outline_nodes: Vec, + ) -> Vec { + while !self.stack.is_empty() { + self.finish_section(&mut outline_nodes); + } + outline_nodes + } +} + +pub(crate) struct OutlineSection { + entries: Vec, +} + +impl OutlineSection { + const fn new() -> Self { + OutlineSection { entries: Vec::new() } + } + + fn push(&mut self, entry: TagNode) { + self.entries.push(entry); + } + + fn into_tag(self) -> TagNode { + TagNode::Group(TagKind::TOC.into(), self.entries) + } +} diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs new file mode 100644 index 000000000..240da4c33 --- /dev/null +++ b/crates/typst-pdf/src/tags/table.rs @@ -0,0 +1,330 @@ +use std::num::{NonZeroU32, NonZeroUsize}; + +use krilla::tagging::{ + TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, + TagKind, +}; +use typst_library::foundations::{Packed, Smart, StyleChain}; +use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; + +use crate::tags::{TableId, TagNode}; + +pub(crate) struct TableCtx { + pub(crate) id: TableId, + pub(crate) table: Packed, + rows: Vec>, +} + +impl TableCtx { + pub(crate) fn new(id: TableId, table: Packed) -> Self { + Self { id, table: table.clone(), rows: Vec::new() } + } + + fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { + let cell = self.rows.get(y)?.get(x)?; + self.resolve_cell(cell) + } + + fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { + let cell = self.rows.get_mut(y)?.get_mut(x)?; + match cell { + GridCell::Cell(cell) => { + // HACK: Workaround for the second mutable borrow when resolving + // the spanned cell. + Some(unsafe { std::mem::transmute(cell) }) + } + &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), + GridCell::Missing => None, + } + } + + pub(crate) fn contains(&self, cell: &Packed) -> bool { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + self.get(x, y).is_some() + } + + fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { + match cell { + GridCell::Cell(cell) => Some(cell), + &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), + GridCell::Missing => None, + } + } + + pub(crate) fn insert(&mut self, cell: Packed, nodes: Vec) { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let rowspan = cell.rowspan(StyleChain::default()); + let colspan = cell.colspan(StyleChain::default()); + let kind = cell.kind(StyleChain::default()); + + // Extend the table grid to fit this cell. + let required_height = y + rowspan.get(); + let required_width = x + colspan.get(); + if self.rows.len() < required_height { + self.rows + .resize(required_height, vec![GridCell::Missing; required_width]); + } + let row = &mut self.rows[y]; + if row.len() < required_width { + row.resize_with(required_width, || GridCell::Missing); + } + + // Store references to the cell for all spanned cells. + for i in y..y + rowspan.get() { + for j in x..x + colspan.get() { + self.rows[i][j] = GridCell::Spanned(x, y); + } + } + + self.rows[y][x] = GridCell::Cell(TableCtxCell { + x: x as u32, + y: y as u32, + rowspan, + colspan, + kind, + headers: TableCellHeaders::NONE, + nodes, + }); + } + + pub(crate) fn build_table(mut self, mut nodes: Vec) -> Vec { + // Table layouting ensures that there are no overlapping cells, and that + // any gaps left by the user are filled with empty cells. + if self.rows.is_empty() { + return nodes; + } + let height = self.rows.len(); + let width = self.rows[0].len(); + + // Only generate row groups such as `THead`, `TFoot`, and `TBody` if + // there are no rows with mixed cell kinds. + let mut gen_row_groups = true; + let row_kinds = (self.rows.iter()) + .map(|row| { + row.iter() + .filter_map(|cell| self.resolve_cell(cell)) + .map(|cell| cell.kind) + .fold(Smart::Auto, |a, b| { + if let Smart::Custom(TableCellKind::Header(_, scope)) = b { + gen_row_groups &= scope == TableHeaderScope::Column; + } + if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { + gen_row_groups &= a == b; + } + a.or(b) + }) + .unwrap_or(TableCellKind::Data) + }) + .collect::>(); + + // Fixup all missing cell kinds. + for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { + let default_kind = + if gen_row_groups { row_kind } else { TableCellKind::Data }; + for cell in row.iter_mut() { + let Some(cell) = cell.as_cell_mut() else { continue }; + cell.kind = cell.kind.or(Smart::Custom(default_kind)); + } + } + + // Explicitly set the headers attribute for cells. + for x in 0..width { + let mut column_header = None; + for y in 0..height { + self.resolve_cell_headers( + (x, y), + &mut column_header, + TableHeaderScope::refers_to_column, + ); + } + } + for y in 0..height { + let mut row_header = None; + for x in 0..width { + self.resolve_cell_headers( + (x, y), + &mut row_header, + TableHeaderScope::refers_to_row, + ); + } + } + + let mut chunk_kind = row_kinds[0]; + let mut row_chunk = Vec::new(); + for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { + let row_nodes = row + .into_iter() + .filter_map(|cell| { + let cell = cell.into_cell()?; + let span = TableCellSpan { + rows: cell.rowspan.try_into().unwrap(), + cols: cell.colspan.try_into().unwrap(), + }; + let tag = match cell.unwrap_kind() { + TableCellKind::Header(_, scope) => { + let id = table_cell_id(self.id, cell.x, cell.y); + let scope = table_header_scope(scope); + TagKind::TH( + TableHeaderCell::new(scope) + .with_span(span) + .with_headers(cell.headers), + ) + .with_id(Some(id)) + } + TableCellKind::Footer | TableCellKind::Data => TagKind::TD( + TableDataCell::new() + .with_span(span) + .with_headers(cell.headers), + ) + .into(), + }; + + Some(TagNode::Group(tag, cell.nodes)) + }) + .collect(); + + let row = TagNode::Group(TagKind::TR.into(), row_nodes); + + // Push the `TR` tags directly. + if !gen_row_groups { + nodes.push(row); + continue; + } + + // Generate row groups. + if !should_group_rows(chunk_kind, row_kind) { + let tag = match chunk_kind { + TableCellKind::Header(..) => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); + + chunk_kind = row_kind; + } + row_chunk.push(row); + } + + if !row_chunk.is_empty() { + let tag = match chunk_kind { + TableCellKind::Header(..) => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), row_chunk)); + } + + nodes + } + + fn resolve_cell_headers( + &mut self, + (x, y): (usize, usize), + current_header: &mut Option<(NonZeroU32, TagId)>, + refers_to_dir: F, + ) where + F: Fn(&TableHeaderScope) -> bool, + { + let table_id = self.id; + let Some(cell) = self.get_mut(x, y) else { return }; + + if let Some((prev_level, cell_id)) = current_header.clone() { + // The `Headers` attribute is also set for parent headers. + let mut is_parent_header = true; + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + is_parent_header = prev_level < level; + } + } + + if is_parent_header && !cell.headers.ids.contains(&cell_id) { + cell.headers.ids.push(cell_id.clone()); + } + } + + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + let tag_id = table_cell_id(table_id, x as u32, y as u32); + *current_header = Some((level, tag_id)); + } + } + } +} + +#[derive(Clone, Default)] +enum GridCell { + Cell(TableCtxCell), + Spanned(usize, usize), + #[default] + Missing, +} + +impl GridCell { + fn as_cell(&self) -> Option<&TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn into_cell(self) -> Option { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +struct TableCtxCell { + x: u32, + y: u32, + rowspan: NonZeroUsize, + colspan: NonZeroUsize, + kind: Smart, + headers: TableCellHeaders, + nodes: Vec, +} + +impl TableCtxCell { + fn unwrap_kind(&self) -> TableCellKind { + self.kind.unwrap_or_else(|| unreachable!()) + } +} + +fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { + match (a, b) { + (TableCellKind::Header(..), TableCellKind::Header(..)) => true, + (TableCellKind::Footer, TableCellKind::Footer) => true, + (TableCellKind::Data, TableCellKind::Data) => true, + (_, _) => false, + } +} + +fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { + let mut bytes = [0; 12]; + bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); + bytes[4..8].copy_from_slice(&x.to_ne_bytes()); + bytes[8..12].copy_from_slice(&y.to_ne_bytes()); + TagId::from_bytes(&bytes) +} + +fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { + match scope { + TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, + TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, + TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, + } +}