diff --git a/crates/typst-pdf/src/tags/context.rs b/crates/typst-pdf/src/tags/context.rs new file mode 100644 index 000000000..8011f1cb6 --- /dev/null +++ b/crates/typst-pdf/src/tags/context.rs @@ -0,0 +1,539 @@ +use std::cell::OnceCell; +use std::collections::HashMap; +use std::slice::SliceIndex; + +use krilla::geom as kg; +use krilla::tagging::{BBox, Node, TagKind, TagTree}; +use typst_library::foundations::{LinkMarker, Packed}; +use typst_library::introspection::Location; +use typst_library::layout::{Abs, Point, Rect}; +use typst_library::model::{OutlineEntry, TableCell}; +use typst_library::pdf::ArtifactKind; +use typst_library::text::Lang; +use typst_syntax::Span; + +use crate::convert::FrameContext; +use crate::tags::list::ListCtx; +use crate::tags::outline::OutlineCtx; +use crate::tags::table::TableCtx; +use crate::tags::{Placeholder, TagGroup, TagNode}; +use crate::util::AbsExt; + +pub struct Tags { + /// The language of the first text item that has been encountered. + pub doc_lang: Option, + /// The intermediary stack of nested tag groups. + pub stack: TagStack, + /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. + pub placeholders: Placeholders, + /// Footnotes are inserted directly after the footenote reference in the + /// reading order. Because of some layouting bugs, the entry might appear + /// before the reference in the text, so we only resolve them once tags + /// for the whole document are generated. + pub footnotes: HashMap, + pub in_artifact: Option<(Location, ArtifactKind)>, + /// Used to group multiple link annotations using quad points. + link_id: LinkId, + /// Used to generate IDs referenced in table `Headers` attributes. + /// The IDs must be document wide unique. + table_id: TableId, + + /// The output. + pub tree: Vec, +} + +impl Tags { + pub fn new() -> Self { + Self { + doc_lang: None, + stack: TagStack::new(), + placeholders: Placeholders(Vec::new()), + footnotes: HashMap::new(), + in_artifact: None, + + link_id: LinkId(0), + table_id: TableId(0), + + tree: Vec::new(), + } + } + + pub fn push(&mut self, node: TagNode) { + if let Some(entry) = self.stack.last_mut() { + entry.nodes.push(node); + } else { + self.tree.push(node); + } + } + + pub fn extend(&mut self, nodes: impl IntoIterator) { + if let Some(entry) = self.stack.last_mut() { + entry.nodes.extend(nodes); + } else { + self.tree.extend(nodes); + } + } + + pub fn build_tree(&mut self) -> TagTree { + let children = std::mem::take(&mut self.tree) + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + TagTree::from(children) + } + + /// Try to set the language of a parent tag, or the entire document. + /// If the language couldn't be set and is different from the existing one, + /// this will return `Some`, and the language should be specified on the + /// marked content directly. + pub fn try_set_lang(&mut self, lang: Lang) -> Option { + // Discard languages within artifacts. + if self.in_artifact.is_some() { + return None; + } + if self.doc_lang.is_none_or(|l| l == lang) { + self.doc_lang = Some(lang); + return None; + } + if let Some(last) = self.stack.last_mut() + && last.lang.is_none_or(|l| l == lang) + { + last.lang = Some(lang); + return None; + } + Some(lang) + } + + /// Resolves [`Placeholder`] nodes. + fn resolve_node(&mut self, node: TagNode) -> Node { + match node { + TagNode::Group(TagGroup { tag, nodes }) => { + let children = nodes + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + Node::Group(krilla::tagging::TagGroup::with_children(tag, children)) + } + TagNode::Leaf(identifier) => Node::Leaf(identifier), + TagNode::Placeholder(placeholder) => self.placeholders.take(placeholder), + TagNode::FootnoteEntry(loc) => { + let node = (self.footnotes.remove(&loc)) + .and_then(|ctx| ctx.entry) + .expect("footnote"); + self.resolve_node(node) + } + } + } + + pub fn context_supports(&self, _tag: &StackEntryKind) -> bool { + // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ + true + } + + pub fn next_link_id(&mut self) -> LinkId { + self.link_id.0 += 1; + self.link_id + } + + pub fn next_table_id(&mut self) -> TableId { + self.table_id.0 += 1; + self.table_id + } +} + +#[derive(Debug)] +pub struct TagStack { + items: Vec, + /// The index of the topmost stack entry that has a bbox. + bbox_idx: Option, +} + +impl> std::ops::Index for TagStack { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + std::ops::Index::index(&self.items, index) + } +} + +impl> std::ops::IndexMut for TagStack { + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + std::ops::IndexMut::index_mut(&mut self.items, index) + } +} + +impl TagStack { + pub fn new() -> Self { + Self { items: Vec::new(), bbox_idx: None } + } + + pub fn len(&self) -> usize { + self.items.len() + } + + pub fn last(&self) -> Option<&StackEntry> { + self.items.last() + } + + pub fn last_mut(&mut self) -> Option<&mut StackEntry> { + self.items.last_mut() + } + + pub fn iter(&self) -> std::slice::Iter { + self.items.iter() + } + + pub fn push(&mut self, entry: StackEntry) { + if entry.kind.bbox().is_some() { + self.bbox_idx = Some(self.len()); + } + self.items.push(entry); + } + + pub fn extend(&mut self, iter: impl IntoIterator) { + let start = self.len(); + self.items.extend(iter); + let last_bbox_offset = self.items[start..] + .iter() + .rposition(|entry| entry.kind.bbox().is_some()); + if let Some(offset) = last_bbox_offset { + self.bbox_idx = Some(start + offset); + } + } + + /// Remove the last stack entry if the predicate returns true. + /// This takes care of updating the parent bboxes. + pub fn pop_if( + &mut self, + mut predicate: impl FnMut(&mut StackEntry) -> bool, + ) -> Option { + let last = self.items.last_mut()?; + if predicate(last) { self.pop() } else { None } + } + + /// Remove the last stack entry. + /// This takes care of updating the parent bboxes. + pub fn pop(&mut self) -> Option { + let removed = self.items.pop()?; + + let Some(inner_bbox) = removed.kind.bbox() else { return Some(removed) }; + + self.bbox_idx = self.items.iter_mut().enumerate().rev().find_map(|(i, entry)| { + let outer_bbox = entry.kind.bbox_mut()?; + if let Some((page_idx, rect)) = inner_bbox.rect { + outer_bbox.expand_page(page_idx, rect); + } + Some(i) + }); + + Some(removed) + } + + pub fn parent(&mut self) -> Option<&mut StackEntryKind> { + self.items.last_mut().map(|e| &mut e.kind) + } + + pub fn parent_table(&mut self) -> Option<&mut TableCtx> { + self.parent()?.as_table_mut() + } + + pub fn parent_list(&mut self) -> Option<&mut ListCtx> { + self.parent()?.as_list_mut() + } + + pub fn parent_figure(&mut self) -> Option<&mut FigureCtx> { + self.parent()?.as_figure_mut() + } + + pub fn parent_outline(&mut self) -> Option<(&mut OutlineCtx, &mut Vec)> { + self.items.last_mut().and_then(|e| { + let ctx = e.kind.as_outline_mut()?; + Some((ctx, &mut e.nodes)) + }) + } + + pub fn find_parent_link( + &mut self, + ) -> Option<(LinkId, &Packed, &mut Vec)> { + self.items.iter_mut().rev().find_map(|e| { + let (link_id, link) = e.kind.as_link()?; + Some((link_id, link, &mut e.nodes)) + }) + } + + /// Finds the first parent that has a bounding box. + pub fn find_parent_bbox(&mut self) -> Option<&mut BBoxCtx> { + self.items[self.bbox_idx?].kind.bbox_mut() + } +} + +pub struct Placeholders(Vec>); + +impl Placeholders { + pub fn reserve(&mut self) -> Placeholder { + let idx = self.0.len(); + self.0.push(OnceCell::new()); + Placeholder(idx) + } + + pub fn init(&mut self, placeholder: Placeholder, node: Node) { + self.0[placeholder.0] + .set(node) + .map_err(|_| ()) + .expect("placeholder to be uninitialized"); + } + + pub fn take(&mut self, placeholder: Placeholder) -> Node { + self.0[placeholder.0].take().expect("initialized placeholder node") + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct TableId(u32); + +impl TableId { + pub fn get(self) -> u32 { + self.0 + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct LinkId(u32); + +#[derive(Debug)] +pub struct StackEntry { + pub loc: Location, + pub span: Span, + pub lang: Option, + pub kind: StackEntryKind, + pub nodes: Vec, +} + +#[derive(Clone, Debug)] +pub enum StackEntryKind { + Standard(TagKind), + Outline(OutlineCtx), + OutlineEntry(Packed), + Table(TableCtx), + TableCell(Packed), + List(ListCtx), + ListItemLabel, + ListItemBody, + BibEntry, + Figure(FigureCtx), + Formula(FigureCtx), + Link(LinkId, Packed), + /// The footnote reference in the text, contains the declaration location. + FootnoteRef(Location), + /// The footnote entry at the end of the page. Contains the [`Location`] of + /// the [`FootnoteElem`](typst_library::model::FootnoteElem). + FootnoteEntry(Location), + Code(Option), +} + +impl StackEntryKind { + pub fn as_outline_mut(&mut self) -> Option<&mut OutlineCtx> { + if let Self::Outline(v) = self { Some(v) } else { None } + } + + pub fn as_table_mut(&mut self) -> Option<&mut TableCtx> { + if let Self::Table(v) = self { Some(v) } else { None } + } + + pub fn as_list_mut(&mut self) -> Option<&mut ListCtx> { + if let Self::List(v) = self { Some(v) } else { None } + } + + pub fn as_figure_mut(&mut self) -> Option<&mut FigureCtx> { + if let Self::Figure(v) = self { Some(v) } else { None } + } + + pub fn as_link(&self) -> Option<(LinkId, &Packed)> { + if let Self::Link(id, link) = self { Some((*id, link)) } else { None } + } + + pub fn bbox(&self) -> Option<&BBoxCtx> { + match self { + Self::Table(ctx) => Some(&ctx.bbox), + Self::Figure(ctx) => Some(&ctx.bbox), + Self::Formula(ctx) => Some(&ctx.bbox), + _ => None, + } + } + + pub fn bbox_mut(&mut self) -> Option<&mut BBoxCtx> { + match self { + Self::Table(ctx) => Some(&mut ctx.bbox), + Self::Figure(ctx) => Some(&mut ctx.bbox), + Self::Formula(ctx) => Some(&mut ctx.bbox), + _ => None, + } + } + + pub fn is_breakable(&self, is_pdf_ua: bool) -> bool { + match self { + StackEntryKind::Standard(tag) => match tag { + TagKind::Part(_) => !is_pdf_ua, + TagKind::Article(_) => !is_pdf_ua, + TagKind::Section(_) => !is_pdf_ua, + TagKind::Div(_) => !is_pdf_ua, + TagKind::BlockQuote(_) => !is_pdf_ua, + TagKind::Caption(_) => !is_pdf_ua, + TagKind::TOC(_) => false, + TagKind::TOCI(_) => false, + TagKind::Index(_) => false, + TagKind::P(_) => true, + TagKind::Hn(_) => !is_pdf_ua, + TagKind::L(_) => false, + TagKind::LI(_) => false, + TagKind::Lbl(_) => !is_pdf_ua, + TagKind::LBody(_) => !is_pdf_ua, + TagKind::Table(_) => false, + TagKind::TR(_) => false, + // TODO: disallow table/grid cells outside of tables/grids + TagKind::TH(_) => false, + TagKind::TD(_) => false, + TagKind::THead(_) => false, + TagKind::TBody(_) => false, + TagKind::TFoot(_) => false, + TagKind::Span(_) => true, + TagKind::InlineQuote(_) => !is_pdf_ua, + TagKind::Note(_) => !is_pdf_ua, + TagKind::Reference(_) => !is_pdf_ua, + TagKind::BibEntry(_) => !is_pdf_ua, + TagKind::Code(_) => !is_pdf_ua, + TagKind::Link(_) => !is_pdf_ua, + TagKind::Annot(_) => !is_pdf_ua, + TagKind::Figure(_) => !is_pdf_ua, + TagKind::Formula(_) => !is_pdf_ua, + TagKind::NonStruct(_) => !is_pdf_ua, + TagKind::Datetime(_) => !is_pdf_ua, + TagKind::Terms(_) => !is_pdf_ua, + TagKind::Title(_) => !is_pdf_ua, + }, + StackEntryKind::Outline(_) => false, + StackEntryKind::OutlineEntry(_) => false, + StackEntryKind::Table(_) => false, + StackEntryKind::TableCell(_) => false, + StackEntryKind::List(_) => false, + StackEntryKind::ListItemLabel => false, + StackEntryKind::ListItemBody => false, + StackEntryKind::BibEntry => false, + StackEntryKind::Figure(_) => false, + StackEntryKind::Formula(_) => false, + StackEntryKind::Link(..) => !is_pdf_ua, + StackEntryKind::FootnoteRef(_) => false, + StackEntryKind::FootnoteEntry(_) => false, + StackEntryKind::Code(_) => false, + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct FootnoteCtx { + /// Whether this footenote has been referenced inside the document. The + /// entry will be inserted inside the reading order after the first + /// reference. All other references will still have links to the footnote. + pub is_referenced: bool, + /// The nodes that make up the footnote entry. + pub entry: Option, +} + +impl FootnoteCtx { + pub const fn new() -> Self { + Self { is_referenced: false, entry: None } + } +} + +/// Figure/Formula context +#[derive(Debug, Clone, PartialEq)] +pub struct FigureCtx { + pub alt: Option, + pub bbox: BBoxCtx, +} + +impl FigureCtx { + pub fn new(alt: Option) -> Self { + Self { alt, bbox: BBoxCtx::new() } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BBoxCtx { + pub rect: Option<(usize, Rect)>, + pub multi_page: bool, +} + +impl BBoxCtx { + pub fn new() -> Self { + Self { rect: None, multi_page: false } + } + + pub fn reset(&mut self) { + *self = Self::new(); + } + + /// Expand the bounding box with a `rect` relative to the current frame + /// context transform. + pub fn expand_frame(&mut self, fc: &FrameContext, rect: Rect) { + let Some(page_idx) = fc.page_idx else { return }; + if self.multi_page { + return; + } + let (idx, bbox) = self.rect.get_or_insert(( + page_idx, + Rect::new(Point::splat(Abs::inf()), Point::splat(-Abs::inf())), + )); + if *idx != page_idx { + self.multi_page = true; + self.rect = None; + return; + } + + let size = rect.size(); + for point in [ + rect.min, + rect.min + Point::with_x(size.x), + rect.min + Point::with_y(size.y), + rect.max, + ] { + let p = point.transform(fc.state().transform()); + bbox.min = bbox.min.min(p); + bbox.max = bbox.max.max(p); + } + } + + /// Expand the bounding box with a rectangle that's already transformed into + /// page coordinates. + pub fn expand_page(&mut self, page_idx: usize, rect: Rect) { + if self.multi_page { + return; + } + let (idx, bbox) = self.rect.get_or_insert(( + page_idx, + Rect::new(Point::splat(Abs::inf()), Point::splat(-Abs::inf())), + )); + if *idx != page_idx { + self.multi_page = true; + self.rect = None; + return; + } + + bbox.min = bbox.min.min(rect.min); + bbox.max = bbox.max.max(rect.max); + } + + pub fn get(&self) -> Option { + let (page_idx, rect) = self.rect?; + let rect = kg::Rect::from_ltrb( + rect.min.x.to_f32(), + rect.min.y.to_f32(), + rect.max.x.to_f32(), + rect.max.y.to_f32(), + ) + .unwrap(); + Some(BBox::new(page_idx, rect)) + } +} diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 30d87e27b..5c7720e44 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -1,21 +1,16 @@ -use std::cell::OnceCell; -use std::collections::HashMap; use std::num::NonZeroU32; -use std::slice::SliceIndex; use ecow::EcoString; use krilla::configure::Validator; -use krilla::geom as kg; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, BBox, ContentTag, Identifier, ListNumbering, Node, SpanTag, Tag, - TagKind, TagTree, + ArtifactType, ContentTag, Identifier, ListNumbering, Node, SpanTag, Tag, TagKind, }; use typst_library::diag::{SourceResult, bail}; -use typst_library::foundations::{Content, LinkMarker, Packed}; +use typst_library::foundations::{Content, LinkMarker}; use typst_library::introspection::Location; -use typst_library::layout::{Abs, Point, Rect, RepeatElem}; +use typst_library::layout::{Rect, RepeatElem}; use typst_library::math::EquationElem; use typst_library::model::{ Destination, EnumElem, FigureCaption, FigureElem, FootnoteEntry, HeadingElem, @@ -33,13 +28,63 @@ use crate::tags::list::ListCtx; use crate::tags::outline::OutlineCtx; use crate::tags::table::TableCtx; use crate::tags::util::{PropertyOptRef, PropertyValCopied}; -use crate::util::AbsExt; +pub use context::*; + +mod context; mod list; mod outline; mod table; mod util; +#[derive(Debug, Clone, PartialEq)] +pub enum TagNode { + Group(TagGroup), + Leaf(Identifier), + /// Allows inserting a placeholder into the tag tree. + /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. + Placeholder(Placeholder), + FootnoteEntry(Location), +} + +impl TagNode { + pub fn group(tag: impl Into, contents: GroupContents) -> Self { + let lang = contents.lang.map(|l| l.as_str().to_string()); + let tag = tag + .into() + .with_lang(lang) + .with_location(Some(contents.span.into_raw())); + TagNode::Group(TagGroup { tag, nodes: contents.nodes }) + } + + /// A tag group not directly related to a typst element, generated to + /// accomodate the tag structure. + pub fn virtual_group(tag: impl Into, nodes: Vec) -> Self { + let tag = tag.into(); + TagNode::Group(TagGroup { tag, nodes }) + } + + pub fn empty_group(tag: impl Into) -> Self { + Self::virtual_group(tag, Vec::new()) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TagGroup { + tag: TagKind, + nodes: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct GroupContents { + span: Span, + lang: Option, + nodes: Vec, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct Placeholder(usize); + pub fn handle_start( gc: &mut GlobalContext, surface: &mut Surface, @@ -511,567 +556,6 @@ pub fn update_bbox( } } -pub struct Tags { - /// The language of the first text item that has been encountered. - pub doc_lang: Option, - /// The intermediary stack of nested tag groups. - pub stack: TagStack, - /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. - pub placeholders: Placeholders, - /// Footnotes are inserted directly after the footenote reference in the - /// reading order. Because of some layouting bugs, the entry might appear - /// before the reference in the text, so we only resolve them once tags - /// for the whole document are generated. - pub footnotes: HashMap, - pub in_artifact: Option<(Location, ArtifactKind)>, - /// Used to group multiple link annotations using quad points. - link_id: LinkId, - /// Used to generate IDs referenced in table `Headers` attributes. - /// The IDs must be document wide unique. - table_id: TableId, - - /// The output. - pub tree: Vec, -} - -impl Tags { - pub fn new() -> Self { - Self { - doc_lang: None, - stack: TagStack::new(), - placeholders: Placeholders(Vec::new()), - footnotes: HashMap::new(), - in_artifact: None, - - link_id: LinkId(0), - table_id: TableId(0), - - tree: Vec::new(), - } - } - - pub fn push(&mut self, node: TagNode) { - if let Some(entry) = self.stack.last_mut() { - entry.nodes.push(node); - } else { - self.tree.push(node); - } - } - - pub fn extend(&mut self, nodes: impl IntoIterator) { - if let Some(entry) = self.stack.last_mut() { - entry.nodes.extend(nodes); - } else { - self.tree.extend(nodes); - } - } - - pub fn build_tree(&mut self) -> TagTree { - let children = std::mem::take(&mut self.tree) - .into_iter() - .map(|node| self.resolve_node(node)) - .collect::>(); - TagTree::from(children) - } - - /// Try to set the language of a parent tag, or the entire document. - /// If the language couldn't be set and is different from the existing one, - /// this will return `Some`, and the language should be specified on the - /// marked content directly. - pub fn try_set_lang(&mut self, lang: Lang) -> Option { - // Discard languages within artifacts. - if self.in_artifact.is_some() { - return None; - } - if self.doc_lang.is_none_or(|l| l == lang) { - self.doc_lang = Some(lang); - return None; - } - if let Some(last) = self.stack.last_mut() - && last.lang.is_none_or(|l| l == lang) - { - last.lang = Some(lang); - return None; - } - Some(lang) - } - - /// Resolves [`Placeholder`] nodes. - fn resolve_node(&mut self, node: TagNode) -> Node { - match node { - TagNode::Group(TagGroup { tag, nodes }) => { - let children = nodes - .into_iter() - .map(|node| self.resolve_node(node)) - .collect::>(); - Node::Group(krilla::tagging::TagGroup::with_children(tag, children)) - } - TagNode::Leaf(identifier) => Node::Leaf(identifier), - TagNode::Placeholder(placeholder) => self.placeholders.take(placeholder), - TagNode::FootnoteEntry(loc) => { - let node = (self.footnotes.remove(&loc)) - .and_then(|ctx| ctx.entry) - .expect("footnote"); - self.resolve_node(node) - } - } - } - - fn context_supports(&self, _tag: &StackEntryKind) -> bool { - // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ - true - } - - pub fn next_link_id(&mut self) -> LinkId { - self.link_id.0 += 1; - self.link_id - } - - fn next_table_id(&mut self) -> TableId { - self.table_id.0 += 1; - self.table_id - } -} - -#[derive(Debug)] -pub struct TagStack { - items: Vec, - /// The index of the topmost stack entry that has a bbox. - bbox_idx: Option, -} - -impl> std::ops::Index for TagStack { - type Output = I::Output; - - #[inline] - fn index(&self, index: I) -> &Self::Output { - std::ops::Index::index(&self.items, index) - } -} - -impl> std::ops::IndexMut for TagStack { - #[inline] - fn index_mut(&mut self, index: I) -> &mut Self::Output { - std::ops::IndexMut::index_mut(&mut self.items, index) - } -} - -impl TagStack { - pub fn new() -> Self { - Self { items: Vec::new(), bbox_idx: None } - } - - pub fn len(&self) -> usize { - self.items.len() - } - - pub fn last(&self) -> Option<&StackEntry> { - self.items.last() - } - - pub fn last_mut(&mut self) -> Option<&mut StackEntry> { - self.items.last_mut() - } - - pub fn iter(&self) -> std::slice::Iter { - self.items.iter() - } - - pub fn push(&mut self, entry: StackEntry) { - if entry.kind.bbox().is_some() { - self.bbox_idx = Some(self.len()); - } - self.items.push(entry); - } - - pub fn extend(&mut self, iter: impl IntoIterator) { - let start = self.len(); - self.items.extend(iter); - let last_bbox_offset = self.items[start..] - .iter() - .rposition(|entry| entry.kind.bbox().is_some()); - if let Some(offset) = last_bbox_offset { - self.bbox_idx = Some(start + offset); - } - } - - /// Remove the last stack entry if the predicate returns true. - /// This takes care of updating the parent bboxes. - pub fn pop_if( - &mut self, - mut predicate: impl FnMut(&mut StackEntry) -> bool, - ) -> Option { - let last = self.items.last_mut()?; - if predicate(last) { self.pop() } else { None } - } - - /// Remove the last stack entry. - /// This takes care of updating the parent bboxes. - pub fn pop(&mut self) -> Option { - let removed = self.items.pop()?; - - let Some(inner_bbox) = removed.kind.bbox() else { return Some(removed) }; - - self.bbox_idx = self.items.iter_mut().enumerate().rev().find_map(|(i, entry)| { - let outer_bbox = entry.kind.bbox_mut()?; - if let Some((page_idx, rect)) = inner_bbox.rect { - outer_bbox.expand_page(page_idx, rect); - } - Some(i) - }); - - Some(removed) - } - - pub fn parent(&mut self) -> Option<&mut StackEntryKind> { - self.items.last_mut().map(|e| &mut e.kind) - } - - pub fn parent_table(&mut self) -> Option<&mut TableCtx> { - self.parent()?.as_table_mut() - } - - pub fn parent_list(&mut self) -> Option<&mut ListCtx> { - self.parent()?.as_list_mut() - } - - pub fn parent_figure(&mut self) -> Option<&mut FigureCtx> { - self.parent()?.as_figure_mut() - } - - pub fn parent_outline(&mut self) -> Option<(&mut OutlineCtx, &mut Vec)> { - self.items.last_mut().and_then(|e| { - let ctx = e.kind.as_outline_mut()?; - Some((ctx, &mut e.nodes)) - }) - } - - pub fn find_parent_link( - &mut self, - ) -> Option<(LinkId, &Packed, &mut Vec)> { - self.items.iter_mut().rev().find_map(|e| { - let (link_id, link) = e.kind.as_link()?; - Some((link_id, link, &mut e.nodes)) - }) - } - - /// Finds the first parent that has a bounding box. - pub fn find_parent_bbox(&mut self) -> Option<&mut BBoxCtx> { - self.items[self.bbox_idx?].kind.bbox_mut() - } -} - -pub struct Placeholders(Vec>); - -impl Placeholders { - pub fn reserve(&mut self) -> Placeholder { - let idx = self.0.len(); - self.0.push(OnceCell::new()); - Placeholder(idx) - } - - pub fn init(&mut self, placeholder: Placeholder, node: Node) { - self.0[placeholder.0] - .set(node) - .map_err(|_| ()) - .expect("placeholder to be uninitialized"); - } - - pub fn take(&mut self, placeholder: Placeholder) -> Node { - self.0[placeholder.0].take().expect("initialized placeholder node") - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct TableId(u32); - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct LinkId(u32); - -#[derive(Debug)] -pub struct StackEntry { - pub loc: Location, - pub span: Span, - pub lang: Option, - pub kind: StackEntryKind, - pub nodes: Vec, -} - -#[derive(Clone, Debug)] -pub enum StackEntryKind { - Standard(TagKind), - Outline(OutlineCtx), - OutlineEntry(Packed), - Table(TableCtx), - TableCell(Packed), - List(ListCtx), - ListItemLabel, - ListItemBody, - BibEntry, - Figure(FigureCtx), - Formula(FigureCtx), - Link(LinkId, Packed), - /// The footnote reference in the text, contains the declaration location. - FootnoteRef(Location), - /// The footnote entry at the end of the page. Contains the [`Location`] of - /// the [`FootnoteElem`](typst_library::model::FootnoteElem). - FootnoteEntry(Location), - Code(Option), -} - -impl StackEntryKind { - pub fn as_outline_mut(&mut self) -> Option<&mut OutlineCtx> { - if let Self::Outline(v) = self { Some(v) } else { None } - } - - pub fn as_table_mut(&mut self) -> Option<&mut TableCtx> { - if let Self::Table(v) = self { Some(v) } else { None } - } - - pub fn as_list_mut(&mut self) -> Option<&mut ListCtx> { - if let Self::List(v) = self { Some(v) } else { None } - } - - pub fn as_figure_mut(&mut self) -> Option<&mut FigureCtx> { - if let Self::Figure(v) = self { Some(v) } else { None } - } - - pub fn as_link(&self) -> Option<(LinkId, &Packed)> { - if let Self::Link(id, link) = self { Some((*id, link)) } else { None } - } - - pub fn bbox(&self) -> Option<&BBoxCtx> { - match self { - Self::Table(ctx) => Some(&ctx.bbox), - Self::Figure(ctx) => Some(&ctx.bbox), - Self::Formula(ctx) => Some(&ctx.bbox), - _ => None, - } - } - - pub fn bbox_mut(&mut self) -> Option<&mut BBoxCtx> { - match self { - Self::Table(ctx) => Some(&mut ctx.bbox), - Self::Figure(ctx) => Some(&mut ctx.bbox), - Self::Formula(ctx) => Some(&mut ctx.bbox), - _ => None, - } - } - - fn is_breakable(&self, is_pdf_ua: bool) -> bool { - match self { - StackEntryKind::Standard(tag) => match tag { - TagKind::Part(_) => !is_pdf_ua, - TagKind::Article(_) => !is_pdf_ua, - TagKind::Section(_) => !is_pdf_ua, - TagKind::Div(_) => !is_pdf_ua, - TagKind::BlockQuote(_) => !is_pdf_ua, - TagKind::Caption(_) => !is_pdf_ua, - TagKind::TOC(_) => false, - TagKind::TOCI(_) => false, - TagKind::Index(_) => false, - TagKind::P(_) => true, - TagKind::Hn(_) => !is_pdf_ua, - TagKind::L(_) => false, - TagKind::LI(_) => false, - TagKind::Lbl(_) => !is_pdf_ua, - TagKind::LBody(_) => !is_pdf_ua, - TagKind::Table(_) => false, - TagKind::TR(_) => false, - // TODO: disallow table/grid cells outside of tables/grids - TagKind::TH(_) => false, - TagKind::TD(_) => false, - TagKind::THead(_) => false, - TagKind::TBody(_) => false, - TagKind::TFoot(_) => false, - TagKind::Span(_) => true, - TagKind::InlineQuote(_) => !is_pdf_ua, - TagKind::Note(_) => !is_pdf_ua, - TagKind::Reference(_) => !is_pdf_ua, - TagKind::BibEntry(_) => !is_pdf_ua, - TagKind::Code(_) => !is_pdf_ua, - TagKind::Link(_) => !is_pdf_ua, - TagKind::Annot(_) => !is_pdf_ua, - TagKind::Figure(_) => !is_pdf_ua, - TagKind::Formula(_) => !is_pdf_ua, - TagKind::NonStruct(_) => !is_pdf_ua, - TagKind::Datetime(_) => !is_pdf_ua, - TagKind::Terms(_) => !is_pdf_ua, - TagKind::Title(_) => !is_pdf_ua, - }, - StackEntryKind::Outline(_) => false, - StackEntryKind::OutlineEntry(_) => false, - StackEntryKind::Table(_) => false, - StackEntryKind::TableCell(_) => false, - StackEntryKind::List(_) => false, - StackEntryKind::ListItemLabel => false, - StackEntryKind::ListItemBody => false, - StackEntryKind::BibEntry => false, - StackEntryKind::Figure(_) => false, - StackEntryKind::Formula(_) => false, - StackEntryKind::Link(..) => !is_pdf_ua, - StackEntryKind::FootnoteRef(_) => false, - StackEntryKind::FootnoteEntry(_) => false, - StackEntryKind::Code(_) => false, - } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct FootnoteCtx { - /// Whether this footenote has been referenced inside the document. The - /// entry will be inserted inside the reading order after the first - /// reference. All other references will still have links to the footnote. - is_referenced: bool, - /// The nodes that make up the footnote entry. - entry: Option, -} - -impl FootnoteCtx { - pub const fn new() -> Self { - Self { is_referenced: false, entry: None } - } -} - -/// Figure/Formula context -#[derive(Debug, Clone, PartialEq)] -pub struct FigureCtx { - alt: Option, - bbox: BBoxCtx, -} - -impl FigureCtx { - fn new(alt: Option) -> Self { - Self { alt, bbox: BBoxCtx::new() } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct BBoxCtx { - rect: Option<(usize, Rect)>, - multi_page: bool, -} - -impl BBoxCtx { - pub fn new() -> Self { - Self { rect: None, multi_page: false } - } - - pub fn reset(&mut self) { - *self = Self::new(); - } - - /// Expand the bounding box with a `rect` relative to the current frame - /// context transform. - pub fn expand_frame(&mut self, fc: &FrameContext, rect: Rect) { - let Some(page_idx) = fc.page_idx else { return }; - if self.multi_page { - return; - } - let (idx, bbox) = self.rect.get_or_insert(( - page_idx, - Rect::new(Point::splat(Abs::inf()), Point::splat(-Abs::inf())), - )); - if *idx != page_idx { - self.multi_page = true; - self.rect = None; - return; - } - - let size = rect.size(); - for point in [ - rect.min, - rect.min + Point::with_x(size.x), - rect.min + Point::with_y(size.y), - rect.max, - ] { - let p = point.transform(fc.state().transform()); - bbox.min = bbox.min.min(p); - bbox.max = bbox.max.max(p); - } - } - - /// Expand the bounding box with a rectangle that's already transformed into - /// page coordinates. - pub fn expand_page(&mut self, page_idx: usize, rect: Rect) { - if self.multi_page { - return; - } - let (idx, bbox) = self.rect.get_or_insert(( - page_idx, - Rect::new(Point::splat(Abs::inf()), Point::splat(-Abs::inf())), - )); - if *idx != page_idx { - self.multi_page = true; - self.rect = None; - return; - } - - bbox.min = bbox.min.min(rect.min); - bbox.max = bbox.max.max(rect.max); - } - - pub fn get(&self) -> Option { - let (page_idx, rect) = self.rect?; - let rect = kg::Rect::from_ltrb( - rect.min.x.to_f32(), - rect.min.y.to_f32(), - rect.max.x.to_f32(), - rect.max.y.to_f32(), - ) - .unwrap(); - Some(BBox::new(page_idx, rect)) - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum TagNode { - Group(TagGroup), - Leaf(Identifier), - /// Allows inserting a placeholder into the tag tree. - /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. - Placeholder(Placeholder), - FootnoteEntry(Location), -} - -impl TagNode { - pub fn group(tag: impl Into, contents: GroupContents) -> Self { - let lang = contents.lang.map(|l| l.as_str().to_string()); - let tag = tag - .into() - .with_lang(lang) - .with_location(Some(contents.span.into_raw())); - TagNode::Group(TagGroup { tag, nodes: contents.nodes }) - } - - /// A tag group not directly related to a typst element, generated to - /// accomodate the tag structure. - pub fn virtual_group(tag: impl Into, nodes: Vec) -> Self { - let tag = tag.into(); - TagNode::Group(TagGroup { tag, nodes }) - } - - pub fn empty_group(tag: impl Into) -> Self { - Self::virtual_group(tag, Vec::new()) - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct TagGroup { - tag: TagKind, - nodes: Vec, -} - -#[derive(Debug, Clone, PartialEq)] -pub struct GroupContents { - span: Span, - lang: Option, - nodes: Vec, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct Placeholder(usize); - /// Automatically calls [`Surface::end_tagged`] when dropped. pub struct TagHandle<'a, 'b> { surface: &'b mut Surface<'a>, diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index c1d06810e..92abface1 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -311,7 +311,7 @@ fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { let mut buf = SmallVec::<[u8; 32]>::new(); - _ = write!(&mut buf, "{}x{x}y{y}", table_id.0); + _ = write!(&mut buf, "{}x{x}y{y}", table_id.get()); TagId::from(buf) }