From c5dbd85a814ab9010ea6cd5ce8c24a19d602feec Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 14 May 2025 00:06:06 +0200 Subject: [PATCH 01/76] feat: [draft] generate accessibility tag tree for headings skip-checks:true --- crates/typst-pdf/src/convert.rs | 52 +++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 645d56f11..94925756a 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -10,11 +10,12 @@ use krilla::error::KrillaError; use krilla::geom::PathBuilder; use krilla::page::{PageLabel, PageSettings}; use krilla::surface::Surface; +use krilla::tagging::{Node, SpanTag, Tag, TagGroup, TagTree}; use krilla::{Document, SerializeSettings}; use krilla_svg::render_svg_glyph; use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult}; -use typst_library::foundations::{NativeElement, Repr}; -use typst_library::introspection::Location; +use typst_library::foundations::{NativeElement, Repr, StyleChain}; +use typst_library::introspection::{self, Location}; use typst_library::layout::{ Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform, }; @@ -39,14 +40,16 @@ pub fn convert( typst_document: &PagedDocument, options: &PdfOptions, ) -> SourceResult> { + // HACK + let config = Configuration::new_with_validator(Validator::UA1); let settings = SerializeSettings { compress_content_streams: true, no_device_cs: true, ascii_compatible: false, xmp_metadata: true, cmyk_profile: None, - configuration: options.standards.config, - enable_tagging: false, + configuration: config, + enable_tagging: true, render_svg_glyph_fn: render_svg_glyph, }; @@ -54,6 +57,7 @@ pub fn convert( let page_index_converter = PageIndexConverter::new(typst_document, options); let named_destinations = collect_named_destinations(typst_document, &page_index_converter); + let mut gc = GlobalContext::new( typst_document, options, @@ -67,6 +71,12 @@ pub fn convert( document.set_outline(build_outline(&gc)); document.set_metadata(build_metadata(&gc)); + let mut tag_tree = TagTree::new(); + for tag in gc.tags.drain(..) { + tag_tree.push(tag); + } + document.set_tag_tree(tag_tree); + finish(document, gc, options.standards.config) } @@ -225,6 +235,8 @@ pub(crate) struct GlobalContext<'a> { /// The languages used throughout the document. pub(crate) languages: BTreeMap, pub(crate) page_index_converter: PageIndexConverter, + pub(crate) tag_stack: Vec, + pub(crate) tags: Vec, } impl<'a> GlobalContext<'a> { @@ -244,6 +256,8 @@ impl<'a> GlobalContext<'a> { image_spans: HashSet::new(), languages: BTreeMap::new(), page_index_converter, + tag_stack: Vec::new(), + tags: Vec::new(), } } } @@ -279,7 +293,35 @@ pub(crate) fn handle_frame( handle_image(gc, fc, image, *size, surface, *span)? } FrameItem::Link(d, s) => handle_link(fc, gc, d, *s), - FrameItem::Tag(_) => {} + FrameItem::Tag(introspection::Tag::Start(elem)) => { + let Some(heading) = elem.to_packed::() else { continue }; + let Some(loc) = heading.location() else { continue }; + + let level = heading.resolve_level(StyleChain::default()); + let name = heading.body.plain_text().to_string(); + let heading_id = surface + .start_tagged(krilla::tagging::ContentTag::Span(SpanTag::empty())); + let tag = match level.get() { + 1 => Tag::H1(Some(name)), + 2 => Tag::H2(Some(name)), + 3 => Tag::H3(Some(name)), + 4 => Tag::H4(Some(name)), + 5 => Tag::H5(Some(name)), + _ => Tag::H6(Some(name)), + }; + let mut tag_group = TagGroup::new(tag); + tag_group.push(Node::Leaf(heading_id)); + gc.tags.push(Node::Group(tag_group)); + + gc.tag_stack.push(loc); + } + FrameItem::Tag(introspection::Tag::End(loc, _)) => { + // FIXME: support or split up content tags that span multiple pages + if gc.tag_stack.last() == Some(loc) { + surface.end_tagged(); + gc.tag_stack.pop(); + } + } } fc.pop(); From ab7eea23f15e506505743c2016afce5f611b4f59 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 27 May 2025 15:55:04 +0200 Subject: [PATCH 02/76] feat: [WIP] make more things locatable skip-checks:true --- crates/typst-library/src/html/mod.rs | 3 ++- .../src/introspection/introspector.rs | 2 ++ crates/typst-library/src/layout/grid/mod.rs | 9 +++++---- crates/typst-library/src/layout/hide.rs | 3 ++- crates/typst-library/src/layout/repeat.rs | 3 ++- crates/typst-library/src/math/mod.rs | 5 +++-- crates/typst-library/src/math/root.rs | 3 ++- crates/typst-library/src/model/cite.rs | 2 +- crates/typst-library/src/model/emph.rs | 3 ++- crates/typst-library/src/model/enum.rs | 5 +++-- crates/typst-library/src/model/figure.rs | 2 +- crates/typst-library/src/model/footnote.rs | 2 +- crates/typst-library/src/model/link.rs | 4 ++-- crates/typst-library/src/model/list.rs | 5 +++-- crates/typst-library/src/model/outline.rs | 2 +- crates/typst-library/src/model/par.rs | 2 +- crates/typst-library/src/model/strong.rs | 3 ++- crates/typst-library/src/model/table.rs | 14 +++++++------- crates/typst-library/src/model/terms.rs | 5 +++-- crates/typst-library/src/text/deco.rs | 9 +++++---- crates/typst-library/src/text/raw.rs | 4 +++- crates/typst-library/src/text/shift.rs | 5 +++-- crates/typst-library/src/visualize/image/mod.rs | 3 ++- 23 files changed, 58 insertions(+), 40 deletions(-) diff --git a/crates/typst-library/src/html/mod.rs b/crates/typst-library/src/html/mod.rs index 7fc8adecd..4eb800dd6 100644 --- a/crates/typst-library/src/html/mod.rs +++ b/crates/typst-library/src/html/mod.rs @@ -8,6 +8,7 @@ pub use self::dom::*; use ecow::EcoString; use crate::foundations::{elem, Content, Module, Scope}; +use crate::introspection::Locatable; /// Create a module with all HTML definitions. pub fn module() -> Module { @@ -40,7 +41,7 @@ pub fn module() -> Module { /// A div with _Typst content_ inside! /// ] /// ``` -#[elem(name = "elem")] +#[elem(name = "elem", Locatable)] pub struct HtmlElem { /// The element's tag. #[required] diff --git a/crates/typst-library/src/introspection/introspector.rs b/crates/typst-library/src/introspection/introspector.rs index d2ad0525b..a81a92035 100644 --- a/crates/typst-library/src/introspection/introspector.rs +++ b/crates/typst-library/src/introspection/introspector.rs @@ -388,6 +388,8 @@ impl IntrospectorBuilder { ); } + dbg!(elems.len()); + self.finalize(elems) } diff --git a/crates/typst-library/src/layout/grid/mod.rs b/crates/typst-library/src/layout/grid/mod.rs index 52621c647..c015c8a68 100644 --- a/crates/typst-library/src/layout/grid/mod.rs +++ b/crates/typst-library/src/layout/grid/mod.rs @@ -13,6 +13,7 @@ use crate::foundations::{ cast, elem, scope, Array, CastInfo, Content, Context, Fold, FromValue, Func, IntoValue, NativeElement, Packed, Reflect, Resolve, Show, Smart, StyleChain, Value, }; +use crate::introspection::Locatable; use crate::layout::{ Alignment, BlockElem, Length, OuterHAlignment, OuterVAlignment, Rel, Sides, Sizing, }; @@ -136,7 +137,7 @@ use crate::visualize::{Paint, Stroke}; /// /// Furthermore, strokes of a repeated grid header or footer will take /// precedence over regular cell strokes. -#[elem(scope, Show)] +#[elem(scope, Locatable, Show)] pub struct GridElem { /// The column sizes. /// @@ -462,7 +463,7 @@ impl TryFrom for GridItem { /// If `repeat` is set to `true`, the header will be repeated across pages. For /// an example, refer to the [`table.header`]($table.header) element and the /// [`grid.stroke`]($grid.stroke) parameter. -#[elem(name = "header", title = "Grid Header")] +#[elem(name = "header", title = "Grid Header", Locatable)] pub struct GridHeader { /// Whether this header should be repeated across pages. #[default(true)] @@ -490,7 +491,7 @@ pub struct GridHeader { /// itself on every page of the table. /// /// No other grid cells may be placed after the footer. -#[elem(name = "footer", title = "Grid Footer")] +#[elem(name = "footer", title = "Grid Footer", Locatable)] pub struct GridFooter { /// Whether this footer should be repeated across pages. #[default(true)] @@ -657,7 +658,7 @@ pub struct GridVLine { /// which allows you, for example, to apply styles based on a cell's position. /// Refer to the examples of the [`table.cell`]($table.cell) element to learn /// more about this. -#[elem(name = "cell", title = "Grid Cell", Show)] +#[elem(name = "cell", title = "Grid Cell", Locatable, Show)] pub struct GridCell { /// The cell's body. #[required] diff --git a/crates/typst-library/src/layout/hide.rs b/crates/typst-library/src/layout/hide.rs index eca33471a..dafff06cd 100644 --- a/crates/typst-library/src/layout/hide.rs +++ b/crates/typst-library/src/layout/hide.rs @@ -1,6 +1,7 @@ use crate::diag::SourceResult; use crate::engine::Engine; use crate::foundations::{elem, Content, Packed, Show, StyleChain}; +use crate::introspection::Locatable; /// Hides content without affecting layout. /// @@ -14,7 +15,7 @@ use crate::foundations::{elem, Content, Packed, Show, StyleChain}; /// Hello Jane \ /// #hide[Hello] Joe /// ``` -#[elem(Show)] +#[elem(Locatable, Show)] pub struct HideElem { /// The content to hide. #[required] diff --git a/crates/typst-library/src/layout/repeat.rs b/crates/typst-library/src/layout/repeat.rs index 9579f1856..ab042ceb1 100644 --- a/crates/typst-library/src/layout/repeat.rs +++ b/crates/typst-library/src/layout/repeat.rs @@ -1,6 +1,7 @@ use crate::diag::SourceResult; use crate::engine::Engine; use crate::foundations::{elem, Content, NativeElement, Packed, Show, StyleChain}; +use crate::introspection::Locatable; use crate::layout::{BlockElem, Length}; /// Repeats content to the available space. @@ -24,7 +25,7 @@ use crate::layout::{BlockElem, Length}; /// Berlin, the 22nd of December, 2022 /// ] /// ``` -#[elem(Show)] +#[elem(Locatable, Show)] pub struct RepeatElem { /// The content to repeat. #[required] diff --git a/crates/typst-library/src/math/mod.rs b/crates/typst-library/src/math/mod.rs index 2e6d42b13..5daa3e358 100644 --- a/crates/typst-library/src/math/mod.rs +++ b/crates/typst-library/src/math/mod.rs @@ -28,6 +28,7 @@ use typst_utils::singleton; use unicode_math_class::MathClass; use crate::foundations::{elem, Content, Module, NativeElement, Scope}; +use crate::introspection::Locatable; use crate::layout::{Em, HElem}; use crate::text::TextElem; @@ -109,7 +110,7 @@ pub fn module() -> Module { pub trait Mathy {} /// A math alignment point: `&`, `&&`. -#[elem(title = "Alignment Point", Mathy)] +#[elem(title = "Alignment Point", Mathy, Locatable)] pub struct AlignPointElem {} impl AlignPointElem { @@ -136,7 +137,7 @@ impl AlignPointElem { /// /// $x loves y and y loves 5$ /// ``` -#[elem(Mathy)] +#[elem(Mathy, Locatable)] pub struct ClassElem { /// The class to apply to the content. #[required] diff --git a/crates/typst-library/src/math/root.rs b/crates/typst-library/src/math/root.rs index ad111700b..15ef8c55d 100644 --- a/crates/typst-library/src/math/root.rs +++ b/crates/typst-library/src/math/root.rs @@ -1,6 +1,7 @@ use typst_syntax::Span; use crate::foundations::{elem, func, Content, NativeElement}; +use crate::introspection::Locatable; use crate::math::Mathy; /// A square root. @@ -22,7 +23,7 @@ pub fn sqrt( /// ```example /// $ root(3, x) $ /// ``` -#[elem(Mathy)] +#[elem(Mathy, Locatable)] pub struct RootElem { /// Which root of the radicand to take. #[positional] diff --git a/crates/typst-library/src/model/cite.rs b/crates/typst-library/src/model/cite.rs index 29497993d..7d118d59f 100644 --- a/crates/typst-library/src/model/cite.rs +++ b/crates/typst-library/src/model/cite.rs @@ -43,7 +43,7 @@ use crate::text::{Lang, Region, TextElem}; /// This function indirectly has dedicated syntax. [References]($ref) can be /// used to cite works from the bibliography. The label then corresponds to the /// citation key. -#[elem(Synthesize)] +#[elem(Locatable, Synthesize)] pub struct CiteElem { /// The citation key that identifies the entry in the bibliography that /// shall be cited, as a label. diff --git a/crates/typst-library/src/model/emph.rs b/crates/typst-library/src/model/emph.rs index 45097b340..73744381f 100644 --- a/crates/typst-library/src/model/emph.rs +++ b/crates/typst-library/src/model/emph.rs @@ -4,6 +4,7 @@ use crate::foundations::{ elem, Content, NativeElement, Packed, Show, StyleChain, TargetElem, }; use crate::html::{tag, HtmlElem}; +use crate::introspection::Locatable; use crate::text::{ItalicToggle, TextElem}; /// Emphasizes content by toggling italics. @@ -29,7 +30,7 @@ use crate::text::{ItalicToggle, TextElem}; /// This function also has dedicated syntax: To emphasize content, simply /// enclose it in underscores (`_`). Note that this only works at word /// boundaries. To emphasize part of a word, you have to use the function. -#[elem(title = "Emphasis", keywords = ["italic"], Show)] +#[elem(title = "Emphasis", keywords = ["italic"], Locatable, Show)] pub struct EmphElem { /// The content to emphasize. #[required] diff --git a/crates/typst-library/src/model/enum.rs b/crates/typst-library/src/model/enum.rs index f1f93702b..0b5211d5f 100644 --- a/crates/typst-library/src/model/enum.rs +++ b/crates/typst-library/src/model/enum.rs @@ -10,6 +10,7 @@ use crate::foundations::{ Styles, TargetElem, }; use crate::html::{attr, tag, HtmlElem}; +use crate::introspection::Locatable; use crate::layout::{Alignment, BlockElem, Em, HAlignment, Length, VAlignment, VElem}; use crate::model::{ ListItemLike, ListLike, Numbering, NumberingPattern, ParElem, ParbreakElem, @@ -71,7 +72,7 @@ use crate::model::{ /// Enumeration items can contain multiple paragraphs and other block-level /// content. All content that is indented more than an item's marker becomes /// part of that item. -#[elem(scope, title = "Numbered List", Show)] +#[elem(scope, title = "Numbered List", Locatable, Show)] pub struct EnumElem { /// Defines the default [spacing]($enum.spacing) of the enumeration. If it /// is `{false}`, the items are spaced apart with @@ -271,7 +272,7 @@ impl Show for Packed { } /// An enumeration item. -#[elem(name = "item", title = "Numbered List Item")] +#[elem(name = "item", title = "Numbered List Item", Locatable)] pub struct EnumItem { /// The item's number. #[positional] diff --git a/crates/typst-library/src/model/figure.rs b/crates/typst-library/src/model/figure.rs index bec667d6e..396567b73 100644 --- a/crates/typst-library/src/model/figure.rs +++ b/crates/typst-library/src/model/figure.rs @@ -473,7 +473,7 @@ impl Outlinable for Packed { /// caption: [A rectangle], /// ) /// ``` -#[elem(name = "caption", Synthesize, Show)] +#[elem(name = "caption", Locatable, Synthesize, Show)] pub struct FigureCaption { /// The caption's position in the figure. Either `{top}` or `{bottom}`. /// diff --git a/crates/typst-library/src/model/footnote.rs b/crates/typst-library/src/model/footnote.rs index dfa3933bb..773f67467 100644 --- a/crates/typst-library/src/model/footnote.rs +++ b/crates/typst-library/src/model/footnote.rs @@ -192,7 +192,7 @@ cast! { /// page run is a sequence of pages without an explicit pagebreak in between). /// For this reason, set and show rules for footnote entries should be defined /// before any page content, typically at the very start of the document. -#[elem(name = "entry", title = "Footnote Entry", Show, ShowSet)] +#[elem(name = "entry", title = "Footnote Entry", Locatable, Show, ShowSet)] pub struct FootnoteEntry { /// The footnote for this entry. Its location can be used to determine /// the footnote counter state. diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index ea85aa945..3d9dc5e55 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -9,7 +9,7 @@ use crate::foundations::{ StyleChain, Styles, TargetElem, }; use crate::html::{attr, tag, HtmlElem}; -use crate::introspection::Location; +use crate::introspection::{Locatable, Location}; use crate::layout::Position; use crate::text::TextElem; @@ -38,7 +38,7 @@ use crate::text::TextElem; /// # Syntax /// This function also has dedicated syntax: Text that starts with `http://` or /// `https://` is automatically turned into a link. -#[elem(Show)] +#[elem(Locatable, Show)] pub struct LinkElem { /// The destination the link points to. /// diff --git a/crates/typst-library/src/model/list.rs b/crates/typst-library/src/model/list.rs index 3c3afd338..a36be3cfc 100644 --- a/crates/typst-library/src/model/list.rs +++ b/crates/typst-library/src/model/list.rs @@ -7,6 +7,7 @@ use crate::foundations::{ Smart, StyleChain, Styles, TargetElem, Value, }; use crate::html::{tag, HtmlElem}; +use crate::introspection::Locatable; use crate::layout::{BlockElem, Em, Length, VElem}; use crate::model::{ParElem, ParbreakElem}; use crate::text::TextElem; @@ -42,7 +43,7 @@ use crate::text::TextElem; /// followed by a space to create a list item. A list item can contain multiple /// paragraphs and other block-level content. All content that is indented /// more than an item's marker becomes part of that item. -#[elem(scope, title = "Bullet List", Show)] +#[elem(scope, title = "Bullet List", Locatable, Show)] pub struct ListElem { /// Defines the default [spacing]($list.spacing) of the list. If it is /// `{false}`, the items are spaced apart with @@ -178,7 +179,7 @@ impl Show for Packed { } /// A bullet list item. -#[elem(name = "item", title = "Bullet List Item")] +#[elem(name = "item", title = "Bullet List Item", Locatable)] pub struct ListItem { /// The item's body. #[required] diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 16a116146..7b5838d9f 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -364,7 +364,7 @@ pub trait Outlinable: Refable { /// With show-set and show rules on outline entries, you can richly customize /// the outline's appearance. See the /// [section on styling the outline]($outline/#styling-the-outline) for details. -#[elem(scope, name = "entry", title = "Outline Entry", Show)] +#[elem(scope, name = "entry", title = "Outline Entry", Locatable, Show)] pub struct OutlineEntry { /// The nesting level of this outline entry. Starts at `{1}` for top-level /// entries. diff --git a/crates/typst-library/src/model/par.rs b/crates/typst-library/src/model/par.rs index cf31b5195..ed4f333e4 100644 --- a/crates/typst-library/src/model/par.rs +++ b/crates/typst-library/src/model/par.rs @@ -93,7 +93,7 @@ use crate::model::Numbering; /// let $a$ be the smallest of the /// three integers. Then, we ... /// ``` -#[elem(scope, title = "Paragraph")] +#[elem(scope, title = "Paragraph", Locatable)] pub struct ParElem { /// The spacing between lines. /// diff --git a/crates/typst-library/src/model/strong.rs b/crates/typst-library/src/model/strong.rs index 16d04ba97..ba795b33f 100644 --- a/crates/typst-library/src/model/strong.rs +++ b/crates/typst-library/src/model/strong.rs @@ -4,6 +4,7 @@ use crate::foundations::{ elem, Content, NativeElement, Packed, Show, StyleChain, TargetElem, }; use crate::html::{tag, HtmlElem}; +use crate::introspection::Locatable; use crate::text::{TextElem, WeightDelta}; /// Strongly emphasizes content by increasing the font weight. @@ -24,7 +25,7 @@ use crate::text::{TextElem, WeightDelta}; /// simply enclose it in stars/asterisks (`*`). Note that this only works at /// word boundaries. To strongly emphasize part of a word, you have to use the /// function. -#[elem(title = "Strong Emphasis", keywords = ["bold", "weight"], Show)] +#[elem(title = "Strong Emphasis", keywords = ["bold", "weight"], Locatable, Show)] pub struct StrongElem { /// The delta to apply on the font weight. /// diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index dcc77b0dc..76ba500a2 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -10,7 +10,7 @@ use crate::foundations::{ TargetElem, }; use crate::html::{attr, tag, HtmlAttrs, HtmlElem, HtmlTag}; -use crate::introspection::Locator; +use crate::introspection::{Locatable, Locator}; use crate::layout::grid::resolve::{table_to_cellgrid, Cell, CellGrid, Entry}; use crate::layout::{ show_grid_cell, Abs, Alignment, BlockElem, Celled, GridCell, GridFooter, GridHLine, @@ -121,7 +121,7 @@ use crate::visualize::{Paint, Stroke}; /// [Robert], b, a, b, /// ) /// ``` -#[elem(scope, Show, LocalName, Figurable)] +#[elem(scope, Locatable, Show, LocalName, Figurable)] pub struct TableElem { /// The column sizes. See the [grid documentation]($grid) for more /// information on track sizing. @@ -531,7 +531,7 @@ impl TryFrom for TableItem { /// [7.34], [57], [2], /// ) /// ``` -#[elem(name = "header", title = "Table Header")] +#[elem(name = "header", title = "Table Header", Locatable)] pub struct TableHeader { /// Whether this header should be repeated across pages. #[default(true)] @@ -561,7 +561,7 @@ pub struct TableHeader { /// totals, or other information that should be visible on every page. /// /// No other table cells may be placed after the footer. -#[elem(name = "footer", title = "Table Footer")] +#[elem(name = "footer", title = "Table Footer", Locatable)] pub struct TableFooter { /// Whether this footer should be repeated across pages. #[default(true)] @@ -604,7 +604,7 @@ pub struct TableFooter { /// [19:00], [Day 1 Attendee Mixer], /// ) /// ``` -#[elem(name = "hline", title = "Table Horizontal Line")] +#[elem(name = "hline", title = "Table Horizontal Line", Locatable)] pub struct TableHLine { /// The row above which the horizontal line is placed (zero-indexed). /// Functions identically to the `y` field in [`grid.hline`]($grid.hline.y). @@ -649,7 +649,7 @@ pub struct TableHLine { /// use the [table's `stroke`]($table.stroke) field or [`table.cell`'s /// `stroke`]($table.cell.stroke) field instead if the line you want to place is /// part of all your tables' designs. -#[elem(name = "vline", title = "Table Vertical Line")] +#[elem(name = "vline", title = "Table Vertical Line", Locatable)] pub struct TableVLine { /// The column before which the horizontal line is placed (zero-indexed). /// Functions identically to the `x` field in [`grid.vline`]($grid.vline). @@ -770,7 +770,7 @@ pub struct TableVLine { /// [Vikram], [49], [Perseverance], /// ) /// ``` -#[elem(name = "cell", title = "Table Cell", Show)] +#[elem(name = "cell", title = "Table Cell", Locatable, Show)] pub struct TableCell { /// The cell's body. #[required] diff --git a/crates/typst-library/src/model/terms.rs b/crates/typst-library/src/model/terms.rs index 3df74cd9e..2679a9500 100644 --- a/crates/typst-library/src/model/terms.rs +++ b/crates/typst-library/src/model/terms.rs @@ -7,6 +7,7 @@ use crate::foundations::{ Styles, TargetElem, }; use crate::html::{tag, HtmlElem}; +use crate::introspection::Locatable; use crate::layout::{Em, HElem, Length, Sides, StackChild, StackElem, VElem}; use crate::model::{ListItemLike, ListLike, ParElem, ParbreakElem}; use crate::text::TextElem; @@ -27,7 +28,7 @@ use crate::text::TextElem; /// # Syntax /// This function also has dedicated syntax: Starting a line with a slash, /// followed by a term, a colon and a description creates a term list item. -#[elem(scope, title = "Term List", Show)] +#[elem(scope, title = "Term List", Locatable, Show)] pub struct TermsElem { /// Defines the default [spacing]($terms.spacing) of the term list. If it is /// `{false}`, the items are spaced apart with @@ -205,7 +206,7 @@ impl Show for Packed { } /// A term list item. -#[elem(name = "item", title = "Term List Item")] +#[elem(name = "item", title = "Term List Item", Locatable)] pub struct TermItem { /// The term described by the list item. #[required] diff --git a/crates/typst-library/src/text/deco.rs b/crates/typst-library/src/text/deco.rs index d745a48fd..3004d58af 100644 --- a/crates/typst-library/src/text/deco.rs +++ b/crates/typst-library/src/text/deco.rs @@ -6,6 +6,7 @@ use crate::foundations::{ elem, Content, NativeElement, Packed, Show, Smart, StyleChain, TargetElem, }; use crate::html::{attr, tag, HtmlElem}; +use crate::introspection::Locatable; use crate::layout::{Abs, Corners, Length, Rel, Sides}; use crate::text::{BottomEdge, BottomEdgeMetric, TextElem, TopEdge, TopEdgeMetric}; use crate::visualize::{Color, FixedStroke, Paint, Stroke}; @@ -16,7 +17,7 @@ use crate::visualize::{Color, FixedStroke, Paint, Stroke}; /// ```example /// This is #underline[important]. /// ``` -#[elem(Show)] +#[elem(Locatable, Show)] pub struct UnderlineElem { /// How to [stroke] the line. /// @@ -112,7 +113,7 @@ impl Show for Packed { /// ```example /// #overline[A line over text.] /// ``` -#[elem(Show)] +#[elem(Locatable, Show)] pub struct OverlineElem { /// How to [stroke] the line. /// @@ -211,7 +212,7 @@ impl Show for Packed { /// ```example /// This is #strike[not] relevant. /// ``` -#[elem(title = "Strikethrough", Show)] +#[elem(title = "Strikethrough", Locatable, Show)] pub struct StrikeElem { /// How to [stroke] the line. /// @@ -292,7 +293,7 @@ impl Show for Packed { /// ```example /// This is #highlight[important]. /// ``` -#[elem(Show)] +#[elem(Locatable, Show)] pub struct HighlightElem { /// The color to highlight the text with. /// diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs index e1f4cf13d..4b10143e0 100644 --- a/crates/typst-library/src/text/raw.rs +++ b/crates/typst-library/src/text/raw.rs @@ -20,6 +20,7 @@ use crate::foundations::{ PlainText, Show, ShowSet, Smart, StyleChain, Styles, Synthesize, TargetElem, }; use crate::html::{tag, HtmlElem}; +use crate::introspection::Locatable; use crate::layout::{BlockBody, BlockElem, Em, HAlignment}; use crate::loading::{DataSource, Load}; use crate::model::{Figurable, ParElem}; @@ -78,6 +79,7 @@ use crate::World; scope, title = "Raw Text / Code", Synthesize, + Locatable, Show, ShowSet, LocalName, @@ -636,7 +638,7 @@ fn format_theme_error(error: syntect::LoadingError) -> LoadError { /// It allows you to access various properties of the line, such as the line /// number, the raw non-highlighted text, the highlighted text, and whether it /// is the first or last line of the raw block. -#[elem(name = "line", title = "Raw Text / Code Line", Show, PlainText)] +#[elem(name = "line", title = "Raw Text / Code Line", Locatable, Show, PlainText)] pub struct RawLine { /// The line number of the raw line inside of the raw block, starts at 1. #[required] diff --git a/crates/typst-library/src/text/shift.rs b/crates/typst-library/src/text/shift.rs index b7f3ed926..8596cdd37 100644 --- a/crates/typst-library/src/text/shift.rs +++ b/crates/typst-library/src/text/shift.rs @@ -4,6 +4,7 @@ use crate::foundations::{ elem, Content, NativeElement, Packed, Show, Smart, StyleChain, TargetElem, }; use crate::html::{tag, HtmlElem}; +use crate::introspection::Locatable; use crate::layout::{Em, Length}; use crate::text::{FontMetrics, TextElem, TextSize}; use ttf_parser::Tag; @@ -17,7 +18,7 @@ use typst_library::text::ScriptMetrics; /// ```example /// Revenue#sub[yearly] /// ``` -#[elem(title = "Subscript", Show)] +#[elem(title = "Subscript", Locatable, Show)] pub struct SubElem { /// Whether to create artificial subscripts by lowering and scaling down /// regular glyphs. @@ -95,7 +96,7 @@ impl Show for Packed { /// ```example /// 1#super[st] try! /// ``` -#[elem(title = "Superscript", Show)] +#[elem(title = "Superscript", Locatable, Show)] pub struct SuperElem { /// Whether to create artificial superscripts by raising and scaling down /// regular glyphs. diff --git a/crates/typst-library/src/visualize/image/mod.rs b/crates/typst-library/src/visualize/image/mod.rs index f5109798b..5b715b85c 100644 --- a/crates/typst-library/src/visualize/image/mod.rs +++ b/crates/typst-library/src/visualize/image/mod.rs @@ -21,6 +21,7 @@ use crate::foundations::{ cast, elem, func, scope, Bytes, Cast, Content, Derived, NativeElement, Packed, Show, Smart, StyleChain, }; +use crate::introspection::Locatable; use crate::layout::{BlockElem, Length, Rel, Sizing}; use crate::loading::{DataSource, Load, LoadSource, Loaded, Readable}; use crate::model::Figurable; @@ -44,7 +45,7 @@ use crate::text::LocalName; /// ], /// ) /// ``` -#[elem(scope, Show, LocalName, Figurable)] +#[elem(scope, Locatable, Show, LocalName, Figurable)] pub struct ImageElem { /// A [path]($syntax/#paths) to an image file or raw bytes making up an /// image in one of the supported [formats]($image.format). From c6b3b371b00403ae5d7ebd74c52809382f055bcb Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 22 May 2025 12:03:10 +0200 Subject: [PATCH 03/76] feat: [WIP] write tags skip-checks:true --- crates/typst-layout/src/pages/run.rs | 2 + crates/typst-pdf/src/convert.rs | 67 ++++++------ crates/typst-pdf/src/lib.rs | 1 + crates/typst-pdf/src/tags.rs | 149 +++++++++++++++++++++++++++ 4 files changed, 182 insertions(+), 37 deletions(-) create mode 100644 crates/typst-pdf/src/tags.rs diff --git a/crates/typst-layout/src/pages/run.rs b/crates/typst-layout/src/pages/run.rs index 6d2d29da5..e9e4e1105 100644 --- a/crates/typst-layout/src/pages/run.rs +++ b/crates/typst-layout/src/pages/run.rs @@ -185,6 +185,8 @@ fn layout_page_run_impl( )?; // Layouts a single marginal. + // TODO: add some sort of tag that indicates the marginals and use it to + // mark them as artifacts for PDF/UA. let mut layout_marginal = |content: &Option, area, align| { let Some(content) = content else { return Ok(None) }; let aligned = content.clone().styled(AlignElem::set_alignment(align)); diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 94925756a..cd165b124 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -10,11 +10,11 @@ use krilla::error::KrillaError; use krilla::geom::PathBuilder; use krilla::page::{PageLabel, PageSettings}; use krilla::surface::Surface; -use krilla::tagging::{Node, SpanTag, Tag, TagGroup, TagTree}; +use krilla::tagging::{ArtifactType, ContentTag, Node}; use krilla::{Document, SerializeSettings}; use krilla_svg::render_svg_glyph; use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult}; -use typst_library::foundations::{NativeElement, Repr, StyleChain}; +use typst_library::foundations::{NativeElement, Repr}; use typst_library::introspection::{self, Location}; use typst_library::layout::{ Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform, @@ -31,6 +31,7 @@ use crate::metadata::build_metadata; use crate::outline::build_outline; use crate::page::PageLabelExt; use crate::shape::handle_shape; +use crate::tags::{handle_close_tag, handle_open_tag, Tags}; use crate::text::handle_text; use crate::util::{convert_path, display_font, AbsExt, TransformExt}; use crate::PdfOptions; @@ -49,6 +50,8 @@ pub fn convert( xmp_metadata: true, cmyk_profile: None, configuration: config, + // TODO: Should we just set this to false? If set to `false` this will + // automatically be enabled if the `UA1` validator is used. enable_tagging: true, render_svg_glyph_fn: render_svg_glyph, }; @@ -70,12 +73,7 @@ pub fn convert( document.set_outline(build_outline(&gc)); document.set_metadata(build_metadata(&gc)); - - let mut tag_tree = TagTree::new(); - for tag in gc.tags.drain(..) { - tag_tree.push(tag); - } - document.set_tag_tree(tag_tree); + document.set_tag_tree(gc.tags.take_tree()); finish(document, gc, options.standards.config) } @@ -115,6 +113,19 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul let mut surface = page.surface(); let mut fc = FrameContext::new(typst_page.frame.size()); + // Marked-content may not cross page boundaries: reopen tag + // that was closed at the end of the last page. + if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { + let tag = if gc.tags.in_artifact { + ContentTag::Artifact(ArtifactType::Other) + } else { + ContentTag::Other + }; + // TODO: somehow avoid empty marked-content sequences + let id = surface.start_tagged(tag); + nodes.push(Node::Leaf(id)); + } + handle_frame( &mut fc, &typst_page.frame, @@ -123,6 +134,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul gc, )?; + // Marked-content may not cross page boundaries: close open tag. + if !gc.tags.stack.is_empty() { + surface.end_tagged(); + } + surface.finish(); for annotation in fc.annotations { @@ -235,8 +251,8 @@ pub(crate) struct GlobalContext<'a> { /// The languages used throughout the document. pub(crate) languages: BTreeMap, pub(crate) page_index_converter: PageIndexConverter, - pub(crate) tag_stack: Vec, - pub(crate) tags: Vec, + /// Tagged PDF context. + pub(crate) tags: Tags, } impl<'a> GlobalContext<'a> { @@ -256,8 +272,8 @@ impl<'a> GlobalContext<'a> { image_spans: HashSet::new(), languages: BTreeMap::new(), page_index_converter, - tag_stack: Vec::new(), - tags: Vec::new(), + + tags: Tags::new(), } } } @@ -294,33 +310,10 @@ pub(crate) fn handle_frame( } FrameItem::Link(d, s) => handle_link(fc, gc, d, *s), FrameItem::Tag(introspection::Tag::Start(elem)) => { - let Some(heading) = elem.to_packed::() else { continue }; - let Some(loc) = heading.location() else { continue }; - - let level = heading.resolve_level(StyleChain::default()); - let name = heading.body.plain_text().to_string(); - let heading_id = surface - .start_tagged(krilla::tagging::ContentTag::Span(SpanTag::empty())); - let tag = match level.get() { - 1 => Tag::H1(Some(name)), - 2 => Tag::H2(Some(name)), - 3 => Tag::H3(Some(name)), - 4 => Tag::H4(Some(name)), - 5 => Tag::H5(Some(name)), - _ => Tag::H6(Some(name)), - }; - let mut tag_group = TagGroup::new(tag); - tag_group.push(Node::Leaf(heading_id)); - gc.tags.push(Node::Group(tag_group)); - - gc.tag_stack.push(loc); + handle_open_tag(gc, surface, elem) } FrameItem::Tag(introspection::Tag::End(loc, _)) => { - // FIXME: support or split up content tags that span multiple pages - if gc.tag_stack.last() == Some(loc) { - surface.end_tagged(); - gc.tag_stack.pop(); - } + handle_close_tag(gc, surface, loc); } } diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs index 88c6ee552..c3835d247 100644 --- a/crates/typst-pdf/src/lib.rs +++ b/crates/typst-pdf/src/lib.rs @@ -9,6 +9,7 @@ mod outline; mod page; mod paint; mod shape; +mod tags; mod text; mod util; diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs new file mode 100644 index 000000000..70792dfe8 --- /dev/null +++ b/crates/typst-pdf/src/tags.rs @@ -0,0 +1,149 @@ +use krilla::surface::Surface; +use krilla::tagging::{ContentTag, Node, Tag, TagGroup, TagTree}; +use typst_library::foundations::{Content, StyleChain}; +use typst_library::introspection::Location; +use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry}; + +use crate::convert::GlobalContext; + +pub(crate) struct Tags { + /// The intermediary stack of nested tag groups. + pub(crate) stack: Vec<(Location, Tag, Vec)>, + pub(crate) in_artifact: bool, + + /// The output. + pub(crate) tree: TagTree, +} + +impl Tags { + pub(crate) fn new() -> Self { + Self { + stack: Vec::new(), + in_artifact: false, + tree: TagTree::new(), + } + } + + pub(crate) fn take_tree(&mut self) -> TagTree { + std::mem::take(&mut self.tree) + } + + pub(crate) fn context_supports(&self, tag: &Tag) -> bool { + let Some((_, parent, _)) = self.stack.last() else { return true }; + + use Tag::*; + + match parent { + Part => true, + Article => !matches!(tag, Article), + Section => true, + BlockQuote => todo!(), + Caption => todo!(), + TOC => matches!(tag, TOC | TOCI), + // TODO: NonStruct is allowed to but (currently?) not supported by krilla + TOCI => matches!(tag, TOC | Lbl | Reference | P), + Index => todo!(), + P => todo!(), + H1(_) => todo!(), + H2(_) => todo!(), + H3(_) => todo!(), + H4(_) => todo!(), + H5(_) => todo!(), + H6(_) => todo!(), + L(_list_numbering) => todo!(), + LI => todo!(), + Lbl => todo!(), + LBody => todo!(), + Table => todo!(), + TR => todo!(), + TH(_table_header_scope) => todo!(), + TD => todo!(), + THead => todo!(), + TBody => todo!(), + TFoot => todo!(), + InlineQuote => todo!(), + Note => todo!(), + Reference => todo!(), + BibEntry => todo!(), + Code => todo!(), + Link => todo!(), + Annot => todo!(), + Figure(_) => todo!(), + Formula(_) => todo!(), + Datetime => todo!(), + Terms => todo!(), + Title => todo!(), + } + } +} + +pub(crate) fn handle_open_tag( + gc: &mut GlobalContext, + surface: &mut Surface, + elem: &Content, +) { + if gc.tags.in_artifact { + return; + } + + let Some(loc) = elem.location() else { return }; + + let tag = if let Some(heading) = elem.to_packed::() { + let level = heading.resolve_level(StyleChain::default()); + let name = heading.body.plain_text().to_string(); + match level.get() { + 1 => Tag::H1(Some(name)), + 2 => Tag::H2(Some(name)), + 3 => Tag::H3(Some(name)), + 4 => Tag::H4(Some(name)), + 5 => Tag::H5(Some(name)), + // TODO: when targeting PDF 2.0 headings `> 6` are supported + _ => Tag::H6(Some(name)), + } + } else if let Some(_) = elem.to_packed::() { + Tag::TOC + } else if let Some(_outline_entry) = elem.to_packed::() { + Tag::TOCI + } else { + return; + }; + + if !gc.tags.context_supports(&tag) { + // TODO: error or warning? + } + + // close previous marked-content and open a nested tag. + if !gc.tags.stack.is_empty() { + surface.end_tagged(); + } + let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other); + + gc.tags.stack.push((loc, tag, vec![Node::Leaf(content_id)])); +} + +pub(crate) fn handle_close_tag( + gc: &mut GlobalContext, + surface: &mut Surface, + loc: &Location, +) { + let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else { + return; + }; + // TODO: contstruct group directly from nodes + let mut tag_group = TagGroup::new(tag); + for node in nodes { + tag_group.push(node); + } + + surface.end_tagged(); + + if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() { + parent_nodes.push(Node::Group(tag_group)); + + // TODO: somehow avoid empty marked-content sequences + let id = surface.start_tagged(ContentTag::Other); + parent_nodes.push(Node::Leaf(id)); + } else { + gc.tags.tree.push(Node::Group(tag_group)); + } +} From e8ea83751433b6041900c2bd0e163bd92ed80f9f Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 28 May 2025 15:08:47 +0200 Subject: [PATCH 04/76] feat: [WIP] include links in tag tree skip-checks:true --- .../src/introspection/introspector.rs | 2 - crates/typst-pdf/src/convert.rs | 22 +++-- crates/typst-pdf/src/link.rs | 25 ++++-- crates/typst-pdf/src/tags.rs | 90 +++++++++++++++---- 4 files changed, 107 insertions(+), 32 deletions(-) diff --git a/crates/typst-library/src/introspection/introspector.rs b/crates/typst-library/src/introspection/introspector.rs index a81a92035..d2ad0525b 100644 --- a/crates/typst-library/src/introspection/introspector.rs +++ b/crates/typst-library/src/introspection/introspector.rs @@ -388,8 +388,6 @@ impl IntrospectorBuilder { ); } - dbg!(elems.len()); - self.finalize(elems) } diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index cd165b124..d0980d1df 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -31,7 +31,7 @@ use crate::metadata::build_metadata; use crate::outline::build_outline; use crate::page::PageLabelExt; use crate::shape::handle_shape; -use crate::tags::{handle_close_tag, handle_open_tag, Tags}; +use crate::tags::{handle_close_tag, handle_open_tag, Placeholder, TagNode, Tags}; use crate::text::handle_text; use crate::util::{convert_path, display_font, AbsExt, TransformExt}; use crate::PdfOptions; @@ -42,6 +42,7 @@ pub fn convert( options: &PdfOptions, ) -> SourceResult> { // HACK + // let config = Configuration::new(); let config = Configuration::new_with_validator(Validator::UA1); let settings = SerializeSettings { compress_content_streams: true, @@ -73,7 +74,7 @@ pub fn convert( document.set_outline(build_outline(&gc)); document.set_metadata(build_metadata(&gc)); - document.set_tag_tree(gc.tags.take_tree()); + document.set_tag_tree(gc.tags.build_tree()); finish(document, gc, options.standards.config) } @@ -123,7 +124,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul }; // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(tag); - nodes.push(Node::Leaf(id)); + nodes.push(TagNode::Leaf(id)); } handle_frame( @@ -141,8 +142,9 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul surface.finish(); - for annotation in fc.annotations { - page.add_annotation(annotation); + for (placeholder, annotation) in fc.annotations { + let annotation_id = page.add_tagged_annotation(annotation); + gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id)); } } } @@ -197,7 +199,7 @@ impl State { /// Context needed for converting a single frame. pub(crate) struct FrameContext { states: Vec, - annotations: Vec, + annotations: Vec<(Placeholder, Annotation)>, } impl FrameContext { @@ -224,8 +226,12 @@ impl FrameContext { self.states.last_mut().unwrap() } - pub(crate) fn push_annotation(&mut self, annotation: Annotation) { - self.annotations.push(annotation); + pub(crate) fn push_annotation( + &mut self, + placeholder: Placeholder, + annotation: Annotation, + ) { + self.annotations.push((placeholder, annotation)); } } diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index 64cb8f0a2..a792778dd 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -1,11 +1,12 @@ use krilla::action::{Action, LinkAction}; -use krilla::annotation::{LinkAnnotation, Target}; +use krilla::annotation::{Annotation, LinkAnnotation, Target}; use krilla::destination::XyzDestination; use krilla::geom::Rect; use typst_library::layout::{Abs, Point, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; +use crate::tags::TagNode; use crate::util::{AbsExt, PointExt}; pub(crate) fn handle_link( @@ -44,15 +45,23 @@ pub(crate) fn handle_link( // TODO: Support quad points. + let placeholder = gc.tags.reserve_placeholder(); + gc.tags.push(TagNode::Placeholder(placeholder)); + + // TODO: add some way to add alt text to annotations. + // probably through [typst_layout::modifiers::FrameModifiers] let pos = match dest { Destination::Url(u) => { fc.push_annotation( - LinkAnnotation::new( - rect, - None, - Target::Action(Action::Link(LinkAction::new(u.to_string()))), - ) - .into(), + placeholder, + Annotation::new_link( + LinkAnnotation::new( + rect, + None, + Target::Action(Action::Link(LinkAction::new(u.to_string()))), + ), + Some(u.to_string()), + ), ); return; } @@ -62,6 +71,7 @@ pub(crate) fn handle_link( // If a named destination has been registered, it's already guaranteed to // not point to an excluded page. fc.push_annotation( + placeholder, LinkAnnotation::new( rect, None, @@ -81,6 +91,7 @@ pub(crate) fn handle_link( let page_index = pos.page.get() - 1; if let Some(index) = gc.page_index_converter.pdf_page_index(page_index) { fc.push_annotation( + placeholder, LinkAnnotation::new( rect, None, diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 70792dfe8..2c43c8495 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,5 +1,7 @@ +use std::cell::OnceCell; + use krilla::surface::Surface; -use krilla::tagging::{ContentTag, Node, Tag, TagGroup, TagTree}; +use krilla::tagging::{ContentTag, Identifier, Node, Tag, TagGroup, TagTree}; use typst_library::foundations::{Content, StyleChain}; use typst_library::introspection::Location; use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry}; @@ -8,24 +10,87 @@ use crate::convert::GlobalContext; pub(crate) struct Tags { /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec<(Location, Tag, Vec)>, + pub(crate) stack: Vec<(Location, Tag, Vec)>, + pub(crate) placeholders: Vec>, pub(crate) in_artifact: bool, /// The output. - pub(crate) tree: TagTree, + pub(crate) tree: Vec, } +pub(crate) enum TagNode { + Group(Tag, Vec), + Leaf(Identifier), + /// Allows inserting a placeholder into the tag tree. + /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. + Placeholder(Placeholder), +} + +#[derive(Clone, Copy)] +pub(crate) struct Placeholder(usize); + impl Tags { pub(crate) fn new() -> Self { Self { stack: Vec::new(), + placeholders: Vec::new(), in_artifact: false, - tree: TagTree::new(), + + tree: Vec::new(), } } - pub(crate) fn take_tree(&mut self) -> TagTree { - std::mem::take(&mut self.tree) + pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { + let idx = self.placeholders.len(); + self.placeholders.push(OnceCell::new()); + Placeholder(idx) + } + + pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { + self.placeholders[placeholder.0] + .set(node) + .map_err(|_| ()) + .expect("placeholder to be uninitialized"); + } + + pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { + self.placeholders[placeholder.0] + .take() + .expect("initialized placeholder node") + } + + pub(crate) fn push(&mut self, node: TagNode) { + if let Some((_, _, nodes)) = self.stack.last_mut() { + nodes.push(node); + } else { + self.tree.push(node); + } + } + + pub(crate) fn build_tree(&mut self) -> TagTree { + let mut tree = TagTree::new(); + let nodes = std::mem::take(&mut self.tree); + // PERF: collect into vec and construct TagTree directly from tag nodes. + for node in nodes.into_iter().map(|node| self.resolve_node(node)) { + tree.push(node); + } + tree + } + + /// Resolves [`Placeholder`] nodes. + fn resolve_node(&mut self, node: TagNode) -> Node { + match node { + TagNode::Group(tag, nodes) => { + let mut group = TagGroup::new(tag); + // PERF: collect into vec and construct TagTree directly from tag nodes. + for node in nodes.into_iter().map(|node| self.resolve_node(node)) { + group.push(node); + } + Node::Group(group) + } + TagNode::Leaf(identifier) => Node::Leaf(identifier), + TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), + } } pub(crate) fn context_supports(&self, tag: &Tag) -> bool { @@ -118,7 +183,7 @@ pub(crate) fn handle_open_tag( } let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other); - gc.tags.stack.push((loc, tag, vec![Node::Leaf(content_id)])); + gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(content_id)])); } pub(crate) fn handle_close_tag( @@ -129,21 +194,16 @@ pub(crate) fn handle_close_tag( let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else { return; }; - // TODO: contstruct group directly from nodes - let mut tag_group = TagGroup::new(tag); - for node in nodes { - tag_group.push(node); - } surface.end_tagged(); if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() { - parent_nodes.push(Node::Group(tag_group)); + parent_nodes.push(TagNode::Group(tag, nodes)); // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(ContentTag::Other); - parent_nodes.push(Node::Leaf(id)); + parent_nodes.push(TagNode::Leaf(id)); } else { - gc.tags.tree.push(Node::Group(tag_group)); + gc.tags.tree.push(TagNode::Group(tag, nodes)); } } From cc70a785ddea08375db23d26e2fd6df7f11b5e62 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 28 May 2025 17:47:35 +0200 Subject: [PATCH 05/76] feat: [WIP] allow specifying alt text for links skip-checks:true # Please enter the commit message for your changes. Lines starting # with '#' will be kept; you may remove them yourself if you want to. # An empty message aborts the commit. # # Date: Wed May 28 17:47:35 2025 +0200 # # On branch pdf-accessibility # Your branch and 'origin/pdf-accessibility' have diverged, # and have 11 and 5 different commits each, respectively. # # Changes to be committed: # modified: crates/typst-ide/src/jump.rs # modified: crates/typst-layout/src/flow/distribute.rs # modified: crates/typst-layout/src/modifiers.rs # modified: crates/typst-library/src/foundations/content.rs # modified: crates/typst-library/src/layout/frame.rs # modified: crates/typst-library/src/model/bibliography.rs # modified: crates/typst-library/src/model/footnote.rs # modified: crates/typst-library/src/model/link.rs # modified: crates/typst-library/src/model/outline.rs # modified: crates/typst-library/src/model/reference.rs # modified: crates/typst-pdf/src/convert.rs # modified: crates/typst-pdf/src/link.rs # modified: crates/typst-render/src/lib.rs # modified: crates/typst-svg/src/lib.rs # modified: tests/src/run.rs # --- crates/typst-ide/src/jump.rs | 2 +- crates/typst-layout/src/flow/distribute.rs | 2 +- crates/typst-layout/src/modifiers.rs | 6 +- .../typst-library/src/foundations/content.rs | 5 +- crates/typst-library/src/layout/frame.rs | 5 +- .../typst-library/src/model/bibliography.rs | 6 +- crates/typst-library/src/model/footnote.rs | 6 +- crates/typst-library/src/model/link.rs | 8 +- crates/typst-library/src/model/outline.rs | 24 ++++-- crates/typst-library/src/model/reference.rs | 3 +- crates/typst-pdf/src/convert.rs | 6 +- crates/typst-pdf/src/link.rs | 76 +++++++------------ crates/typst-render/src/lib.rs | 2 +- crates/typst-svg/src/lib.rs | 4 +- tests/src/run.rs | 2 +- 15 files changed, 84 insertions(+), 73 deletions(-) diff --git a/crates/typst-ide/src/jump.rs b/crates/typst-ide/src/jump.rs index b29bc4a48..0f9f84ff7 100644 --- a/crates/typst-ide/src/jump.rs +++ b/crates/typst-ide/src/jump.rs @@ -36,7 +36,7 @@ pub fn jump_from_click( ) -> Option { // Try to find a link first. for (pos, item) in frame.items() { - if let FrameItem::Link(dest, size) = item { + if let FrameItem::Link(_, dest, size) = item { if is_in_rect(*pos, *size, click) { return Some(match dest { Destination::Url(url) => Jump::Url(url.clone()), diff --git a/crates/typst-layout/src/flow/distribute.rs b/crates/typst-layout/src/flow/distribute.rs index f504d22e7..108a8d651 100644 --- a/crates/typst-layout/src/flow/distribute.rs +++ b/crates/typst-layout/src/flow/distribute.rs @@ -93,7 +93,7 @@ impl Item<'_, '_> { Self::Frame(frame, _) => { frame.size().is_zero() && frame.items().all(|(_, item)| { - matches!(item, FrameItem::Link(_, _) | FrameItem::Tag(_)) + matches!(item, FrameItem::Link(..) | FrameItem::Tag(_)) }) } Self::Placed(_, placed) => !placed.float, diff --git a/crates/typst-layout/src/modifiers.rs b/crates/typst-layout/src/modifiers.rs index b0371d63e..a7d882617 100644 --- a/crates/typst-layout/src/modifiers.rs +++ b/crates/typst-layout/src/modifiers.rs @@ -1,3 +1,4 @@ +use ecow::EcoString; use typst_library::foundations::StyleChain; use typst_library::layout::{Abs, Fragment, Frame, FrameItem, HideElem, Point, Sides}; use typst_library::model::{Destination, LinkElem, ParElem}; @@ -21,6 +22,7 @@ use typst_library::model::{Destination, LinkElem, ParElem}; pub struct FrameModifiers { /// A destination to link to. dest: Option, + alt: Option, /// Whether the contents of the frame should be hidden. hidden: bool, } @@ -28,8 +30,10 @@ pub struct FrameModifiers { impl FrameModifiers { /// Retrieve all modifications that should be applied per-frame. pub fn get_in(styles: StyleChain) -> Self { + // TODO: maybe verify that an alt text was provided here Self { dest: LinkElem::current_in(styles), + alt: LinkElem::alt_in(styles), hidden: HideElem::hidden_in(styles), } } @@ -102,7 +106,7 @@ fn modify_frame( pos.x -= outset.left; size += outset.sum_by_axis(); } - frame.push(pos, FrameItem::Link(dest.clone(), size)); + frame.push(pos, FrameItem::Link(modifiers.alt.clone(), dest.clone(), size)); } if modifiers.hidden { diff --git a/crates/typst-library/src/foundations/content.rs b/crates/typst-library/src/foundations/content.rs index 1855bb70b..278d49401 100644 --- a/crates/typst-library/src/foundations/content.rs +++ b/crates/typst-library/src/foundations/content.rs @@ -503,8 +503,9 @@ impl Content { } /// Link the content somewhere. - pub fn linked(self, dest: Destination) -> Self { - self.styled(LinkElem::set_current(Some(dest))) + pub fn linked(self, alt: Option, dest: Destination) -> Self { + self.styled(LinkElem::set_alt(alt)) + .styled(LinkElem::set_current(Some(dest))) } /// Set alignments for this content. diff --git a/crates/typst-library/src/layout/frame.rs b/crates/typst-library/src/layout/frame.rs index a26a7d0ef..5b9d1f1a7 100644 --- a/crates/typst-library/src/layout/frame.rs +++ b/crates/typst-library/src/layout/frame.rs @@ -4,6 +4,7 @@ use std::fmt::{self, Debug, Formatter}; use std::num::NonZeroUsize; use std::sync::Arc; +use ecow::EcoString; use typst_syntax::Span; use typst_utils::{LazyHash, Numeric}; @@ -473,7 +474,7 @@ pub enum FrameItem { /// An image and its size. Image(Image, Size, Span), /// An internal or external link to a destination. - Link(Destination, Size), + Link(Option, Destination, Size), /// An introspectable element that produced something within this frame. Tag(Tag), } @@ -485,7 +486,7 @@ impl Debug for FrameItem { Self::Text(text) => write!(f, "{text:?}"), Self::Shape(shape, _) => write!(f, "{shape:?}"), Self::Image(image, _, _) => write!(f, "{image:?}"), - Self::Link(dest, _) => write!(f, "Link({dest:?})"), + Self::Link(alt, dest, _) => write!(f, "Link({alt:?}, {dest:?})"), Self::Tag(tag) => write!(f, "{tag:?}"), } } diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index f56f5813e..75ed0fce2 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -877,7 +877,8 @@ impl<'a> Generator<'a> { renderer.display_elem_child(elem, &mut None, false)?; if let Some(location) = first_occurrences.get(item.key.as_str()) { let dest = Destination::Location(*location); - content = content.linked(dest); + // TODO: accept user supplied alt text + content = content.linked(None, dest); } StrResult::Ok(content) }) @@ -1012,7 +1013,8 @@ impl ElemRenderer<'_> { if let Some(hayagriva::ElemMeta::Entry(i)) = elem.meta { if let Some(location) = (self.link)(i) { let dest = Destination::Location(location); - content = content.linked(dest); + // TODO: accept user supplied alt text + content = content.linked(None, dest); } } diff --git a/crates/typst-library/src/model/footnote.rs b/crates/typst-library/src/model/footnote.rs index 773f67467..af6664cb9 100644 --- a/crates/typst-library/src/model/footnote.rs +++ b/crates/typst-library/src/model/footnote.rs @@ -147,7 +147,8 @@ impl Show for Packed { let sup = SuperElem::new(num).pack().spanned(span); let loc = loc.variant(1); // Add zero-width weak spacing to make the footnote "sticky". - Ok(HElem::hole().pack() + sup.linked(Destination::Location(loc))) + // TODO: accept user supplied alt text + Ok(HElem::hole().pack() + sup.linked(None, Destination::Location(loc))) } } @@ -296,7 +297,8 @@ impl Show for Packed { let sup = SuperElem::new(num) .pack() .spanned(span) - .linked(Destination::Location(loc)) + // TODO: accept user supplied alt text + .linked(None, Destination::Location(loc)) .located(loc.variant(1)); Ok(Content::sequence([ diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index 3d9dc5e55..d64192f29 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -40,6 +40,9 @@ use crate::text::TextElem; /// `https://` is automatically turned into a link. #[elem(Locatable, Show)] pub struct LinkElem { + /// A text describing the link. + pub alt: Option, + /// The destination the link points to. /// /// - To link to web pages, `dest` should be a valid URL string. If the URL @@ -123,12 +126,13 @@ impl Show for Packed { body } } else { + let alt = self.alt(styles); match &self.dest { - LinkTarget::Dest(dest) => body.linked(dest.clone()), + LinkTarget::Dest(dest) => body.linked(alt, dest.clone()), LinkTarget::Label(label) => { let elem = engine.introspector.query_label(*label).at(self.span())?; let dest = Destination::Location(elem.location().unwrap()); - body.clone().linked(dest) + body.clone().linked(alt, dest) } } }) diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 7b5838d9f..11ecc23dd 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -2,6 +2,7 @@ use std::num::NonZeroUsize; use std::str::FromStr; use comemo::{Track, Tracked}; +use ecow::eco_format; use smallvec::SmallVec; use typst_syntax::Span; use typst_utils::{Get, NonZeroExt}; @@ -17,8 +18,7 @@ use crate::introspection::{ Counter, CounterKey, Introspector, Locatable, Location, Locator, LocatorLink, }; use crate::layout::{ - Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, Region, Rel, - RepeatElem, Sides, + Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem, Region, Rel, RepeatElem, Sides }; use crate::math::EquationElem; use crate::model::{Destination, HeadingElem, NumberingPattern, ParElem, Refable}; @@ -418,7 +418,17 @@ impl Show for Packed { let context = context.track(); let prefix = self.prefix(engine, context, span)?; - let inner = self.inner(engine, context, span)?; + let body = self.body().at(span)?; + let page = self.page(engine, context, span)?; + let alt = { + // TODO: accept user supplied alt text + let prefix = prefix.as_ref().map(|p| p.plain_text()).unwrap_or_default(); + let body = body.plain_text(); + let page_str = PageElem::local_name_in(styles); + let page_nr = page.plain_text(); + eco_format!("{prefix} {body} {page_str} {page_nr}") + }; + let inner = self.inner(engine, context, span, body, page)?; let block = if self.element.is::() { let body = prefix.unwrap_or_default() + inner; BlockElem::new() @@ -430,7 +440,7 @@ impl Show for Packed { }; let loc = self.element_location().at(span)?; - Ok(block.linked(Destination::Location(loc))) + Ok(block.linked(Some(alt), Destination::Location(loc))) } } @@ -568,6 +578,8 @@ impl OutlineEntry { engine: &mut Engine, context: Tracked, span: Span, + body: Content, + page: Content, ) -> SourceResult { let styles = context.styles().at(span)?; @@ -588,7 +600,7 @@ impl OutlineEntry { seq.push(TextElem::packed("\u{202B}")); } - seq.push(self.body().at(span)?); + seq.push(body); if rtl { // "Pop Directional Formatting" @@ -613,7 +625,7 @@ impl OutlineEntry { // Add the page number. The word joiner in front ensures that the page // number doesn't stand alone in its line. seq.push(TextElem::packed("\u{2060}")); - seq.push(self.page(engine, context, span)?); + seq.push(page); Ok(Content::sequence(seq)) } diff --git a/crates/typst-library/src/model/reference.rs b/crates/typst-library/src/model/reference.rs index 17f93b7c4..ca0e0f5e3 100644 --- a/crates/typst-library/src/model/reference.rs +++ b/crates/typst-library/src/model/reference.rs @@ -343,7 +343,8 @@ fn show_reference( content = supplement + TextElem::packed("\u{a0}") + content; } - Ok(content.linked(Destination::Location(loc))) + // TODO: accept user supplied alt text + Ok(content.linked(None, Destination::Location(loc))) } /// Turn a reference into a citation. diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index d0980d1df..aeb2cbf91 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::num::NonZeroU64; -use ecow::{eco_format, EcoVec}; +use ecow::{eco_format, EcoString, EcoVec}; use krilla::annotation::Annotation; use krilla::configure::{Configuration, ValidationError, Validator}; use krilla::destination::{NamedDestination, XyzDestination}; @@ -314,7 +314,9 @@ pub(crate) fn handle_frame( FrameItem::Image(image, size, span) => { handle_image(gc, fc, image, *size, surface, *span)? } - FrameItem::Link(d, s) => handle_link(fc, gc, d, *s), + FrameItem::Link(alt, dest, size) => { + handle_link(fc, gc, alt.as_ref().map(EcoString::to_string), dest, *size) + } FrameItem::Tag(introspection::Tag::Start(elem)) => { handle_open_tag(gc, surface, elem) } diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index a792778dd..6dfefbc11 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -2,7 +2,7 @@ use krilla::action::{Action, LinkAction}; use krilla::annotation::{Annotation, LinkAnnotation, Target}; use krilla::destination::XyzDestination; use krilla::geom::Rect; -use typst_library::layout::{Abs, Point, Size}; +use typst_library::layout::{Abs, Point, Position, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; @@ -12,6 +12,7 @@ use crate::util::{AbsExt, PointExt}; pub(crate) fn handle_link( fc: &mut FrameContext, gc: &mut GlobalContext, + alt: Option, dest: &Destination, size: Size, ) { @@ -45,61 +46,42 @@ pub(crate) fn handle_link( // TODO: Support quad points. - let placeholder = gc.tags.reserve_placeholder(); - gc.tags.push(TagNode::Placeholder(placeholder)); - - // TODO: add some way to add alt text to annotations. - // probably through [typst_layout::modifiers::FrameModifiers] - let pos = match dest { + let target = match dest { Destination::Url(u) => { - fc.push_annotation( - placeholder, - Annotation::new_link( - LinkAnnotation::new( - rect, - None, - Target::Action(Action::Link(LinkAction::new(u.to_string()))), - ), - Some(u.to_string()), - ), - ); - return; + Target::Action(Action::Link(LinkAction::new(u.to_string()))) } - Destination::Position(p) => *p, + Destination::Position(p) => match pos_to_target(gc, *p) { + Some(target) => target, + None => return, + }, Destination::Location(loc) => { if let Some(nd) = gc.loc_to_names.get(loc) { // If a named destination has been registered, it's already guaranteed to // not point to an excluded page. - fc.push_annotation( - placeholder, - LinkAnnotation::new( - rect, - None, - Target::Destination(krilla::destination::Destination::Named( - nd.clone(), - )), - ) - .into(), - ); - return; + Target::Destination(krilla::destination::Destination::Named(nd.clone())) } else { - gc.document.introspector.position(*loc) + let pos = gc.document.introspector.position(*loc); + match pos_to_target(gc, pos) { + Some(target) => target, + None => return, + } } } }; - let page_index = pos.page.get() - 1; - if let Some(index) = gc.page_index_converter.pdf_page_index(page_index) { - fc.push_annotation( - placeholder, - LinkAnnotation::new( - rect, - None, - Target::Destination(krilla::destination::Destination::Xyz( - XyzDestination::new(index, pos.point.to_krilla()), - )), - ) - .into(), - ); - } + let placeholder = gc.tags.reserve_placeholder(); + gc.tags.push(TagNode::Placeholder(placeholder)); + + fc.push_annotation( + placeholder, + Annotation::new_link(LinkAnnotation::new(rect, None, target), alt), + ); +} + +fn pos_to_target(gc: &mut GlobalContext, pos: Position) -> Option { + let page_index = pos.page.get() - 1; + let index = gc.page_index_converter.pdf_page_index(page_index)?; + + let dest = XyzDestination::new(index, pos.point.to_krilla()); + Some(Target::Destination(krilla::destination::Destination::Xyz(dest))) } diff --git a/crates/typst-render/src/lib.rs b/crates/typst-render/src/lib.rs index f43cd019b..3ecae4bad 100644 --- a/crates/typst-render/src/lib.rs +++ b/crates/typst-render/src/lib.rs @@ -167,7 +167,7 @@ fn render_frame(canvas: &mut sk::Pixmap, state: State, frame: &Frame) { FrameItem::Image(image, size, _) => { image::render_image(canvas, state.pre_translate(*pos), image, *size); } - FrameItem::Link(_, _) => {} + FrameItem::Link(..) => {} FrameItem::Tag(_) => {} } } diff --git a/crates/typst-svg/src/lib.rs b/crates/typst-svg/src/lib.rs index f4e81250f..91975ae37 100644 --- a/crates/typst-svg/src/lib.rs +++ b/crates/typst-svg/src/lib.rs @@ -207,7 +207,7 @@ impl SVGRenderer { for (pos, item) in frame.items() { // File size optimization. // TODO: SVGs could contain links, couldn't they? - if matches!(item, FrameItem::Link(_, _) | FrameItem::Tag(_)) { + if matches!(item, FrameItem::Link(..) | FrameItem::Tag(_)) { continue; } @@ -228,7 +228,7 @@ impl SVGRenderer { self.render_shape(state.pre_translate(*pos), shape) } FrameItem::Image(image, size, _) => self.render_image(image, size), - FrameItem::Link(_, _) => unreachable!(), + FrameItem::Link(..) => unreachable!(), FrameItem::Tag(_) => unreachable!(), }; diff --git a/tests/src/run.rs b/tests/src/run.rs index 1d93ba392..ce507d3c5 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -535,7 +535,7 @@ fn render_links(canvas: &mut sk::Pixmap, ts: sk::Transform, frame: &Frame) { let ts = ts.pre_concat(to_sk_transform(&group.transform)); render_links(canvas, ts, &group.frame); } - FrameItem::Link(_, size) => { + FrameItem::Link(_, _, size) => { let w = size.x.to_pt() as f32; let h = size.y.to_pt() as f32; let rect = sk::Rect::from_xywh(0.0, 0.0, w, h).unwrap(); From 19804305783ee47b0ccc2874c53781d1dff48711 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 2 Jun 2025 12:02:31 +0200 Subject: [PATCH 06/76] feat: mark artifacts --- crates/typst-layout/src/pages/finalize.rs | 39 +++++++- crates/typst-layout/src/pages/mod.rs | 10 +- crates/typst-layout/src/pages/run.rs | 2 - crates/typst-library/src/layout/page.rs | 24 ++++- crates/typst-library/src/model/outline.rs | 8 +- crates/typst-pdf/src/convert.rs | 40 ++------ crates/typst-pdf/src/tags.rs | 117 ++++++++++++++++++---- 7 files changed, 174 insertions(+), 66 deletions(-) diff --git a/crates/typst-layout/src/pages/finalize.rs b/crates/typst-layout/src/pages/finalize.rs index b16d95699..543dbb0ce 100644 --- a/crates/typst-layout/src/pages/finalize.rs +++ b/crates/typst-layout/src/pages/finalize.rs @@ -1,7 +1,10 @@ use typst_library::diag::SourceResult; use typst_library::engine::Engine; -use typst_library::introspection::{ManualPageCounter, Tag}; -use typst_library::layout::{Frame, FrameItem, Page, Point}; +use typst_library::foundations::{Content, NativeElement}; +use typst_library::introspection::{ManualPageCounter, SplitLocator, Tag}; +use typst_library::layout::{ + ArtifactKind, ArtifactMarker, Frame, FrameItem, Page, Point, +}; use super::LayoutedPage; @@ -10,6 +13,7 @@ use super::LayoutedPage; /// physical page number, which is unknown during parallel layout. pub fn finalize( engine: &mut Engine, + locator: &mut SplitLocator, counter: &mut ManualPageCounter, tags: &mut Vec, LayoutedPage { @@ -45,10 +49,12 @@ pub fn finalize( // important as it affects the relative ordering of introspectable elements // and thus how counters resolve. if let Some(background) = background { - frame.push_frame(Point::zero(), background); + let tag = ArtifactMarker::new(ArtifactKind::Page).pack(); + push_tagged(engine, locator, &mut frame, Point::zero(), background, tag); } if let Some(header) = header { - frame.push_frame(Point::with_x(margin.left), header); + let tag = ArtifactMarker::new(ArtifactKind::Header).pack(); + push_tagged(engine, locator, &mut frame, Point::with_x(margin.left), header, tag); } // Add the inner contents. @@ -57,7 +63,8 @@ pub fn finalize( // Add the "after" marginals. if let Some(footer) = footer { let y = frame.height() - footer.height(); - frame.push_frame(Point::new(margin.left, y), footer); + let tag = ArtifactMarker::new(ArtifactKind::Footer).pack(); + push_tagged(engine, locator, &mut frame, Point::new(margin.left, y), footer, tag); } if let Some(foreground) = foreground { frame.push_frame(Point::zero(), foreground); @@ -72,3 +79,25 @@ pub fn finalize( Ok(Page { frame, fill, numbering, supplement, number }) } + +fn push_tagged( + engine: &mut Engine, + locator: &mut SplitLocator, + frame: &mut Frame, + mut pos: Point, + inner: Frame, + mut tag: Content, +) { + // TODO: use general PDF Tagged/Artifact element that wraps some content and + // is also available to the user. + let key = typst_utils::hash128(&tag); + let loc = locator.next_location(engine.introspector, key); + tag.set_location(loc); + frame.push(pos, FrameItem::Tag(Tag::Start(tag))); + + let height = inner.height(); + frame.push_frame(pos, inner); + + pos.y += height; + frame.push(pos, FrameItem::Tag(Tag::End(loc, key))); +} diff --git a/crates/typst-layout/src/pages/mod.rs b/crates/typst-layout/src/pages/mod.rs index 14dc0f3fb..a64fee4b3 100644 --- a/crates/typst-layout/src/pages/mod.rs +++ b/crates/typst-layout/src/pages/mod.rs @@ -123,17 +123,19 @@ fn layout_pages<'a>( Item::Run(..) => { let layouted = runs.next().unwrap()?; for layouted in layouted { - let page = finalize(engine, &mut counter, &mut tags, layouted)?; + let page = + finalize(engine, locator, &mut counter, &mut tags, layouted)?; pages.push(page); } } - Item::Parity(parity, initial, locator) => { + Item::Parity(parity, initial, page_locator) => { if !parity.matches(pages.len()) { continue; } - let layouted = layout_blank_page(engine, locator.relayout(), *initial)?; - let page = finalize(engine, &mut counter, &mut tags, layouted)?; + let layouted = + layout_blank_page(engine, page_locator.relayout(), *initial)?; + let page = finalize(engine, locator, &mut counter, &mut tags, layouted)?; pages.push(page); } Item::Tags(items) => { diff --git a/crates/typst-layout/src/pages/run.rs b/crates/typst-layout/src/pages/run.rs index e9e4e1105..6d2d29da5 100644 --- a/crates/typst-layout/src/pages/run.rs +++ b/crates/typst-layout/src/pages/run.rs @@ -185,8 +185,6 @@ fn layout_page_run_impl( )?; // Layouts a single marginal. - // TODO: add some sort of tag that indicates the marginals and use it to - // mark them as artifacts for PDF/UA. let mut layout_marginal = |content: &Option, area, align| { let Some(content) = content else { return Ok(None) }; let aligned = content.clone().styled(AlignElem::set_alignment(align)); diff --git a/crates/typst-library/src/layout/page.rs b/crates/typst-library/src/layout/page.rs index 98afbd06f..b6fa5d0be 100644 --- a/crates/typst-library/src/layout/page.rs +++ b/crates/typst-library/src/layout/page.rs @@ -10,7 +10,7 @@ use crate::foundations::{ cast, elem, Args, AutoValue, Cast, Construct, Content, Dict, Fold, NativeElement, Set, Smart, Value, }; -use crate::introspection::Introspector; +use crate::introspection::{Introspector, Locatable}; use crate::layout::{ Abs, Alignment, FlushElem, Frame, HAlignment, Length, OuterVAlignment, Ratio, Rel, Sides, SpecificAlignment, @@ -451,6 +451,28 @@ impl PagebreakElem { } } +// HACK: this should probably not be an element +#[derive(Copy)] +#[elem(Construct, Locatable)] +pub struct ArtifactMarker { + #[internal] + #[required] + pub kind: ArtifactKind, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ArtifactKind { + Header, + Footer, + Page, +} + +impl Construct for ArtifactMarker { + fn construct(_: &mut Engine, args: &mut Args) -> SourceResult { + bail!(args.span, "cannot be constructed manually"); + } +} + /// A finished document with metadata and page frames. #[derive(Debug, Default, Clone)] pub struct PagedDocument { diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 11ecc23dd..bcdd15652 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -18,7 +18,8 @@ use crate::introspection::{ Counter, CounterKey, Introspector, Locatable, Location, Locator, LocatorLink, }; use crate::layout::{ - Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem, Region, Rel, RepeatElem, Sides + Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem, + Region, Rel, RepeatElem, Sides, }; use crate::math::EquationElem; use crate::model::{Destination, HeadingElem, NumberingPattern, ParElem, Refable}; @@ -426,9 +427,9 @@ impl Show for Packed { let body = body.plain_text(); let page_str = PageElem::local_name_in(styles); let page_nr = page.plain_text(); - eco_format!("{prefix} {body} {page_str} {page_nr}") + eco_format!("{prefix} \"{body}\", {page_str} {page_nr}") }; - let inner = self.inner(engine, context, span, body, page)?; + let inner = self.inner(context, span, body, page)?; let block = if self.element.is::() { let body = prefix.unwrap_or_default() + inner; BlockElem::new() @@ -575,7 +576,6 @@ impl OutlineEntry { #[func(contextual)] pub fn inner( &self, - engine: &mut Engine, context: Tracked, span: Span, body: Content, diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index aeb2cbf91..c53fd1e28 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -10,7 +10,6 @@ use krilla::error::KrillaError; use krilla::geom::PathBuilder; use krilla::page::{PageLabel, PageSettings}; use krilla::surface::Surface; -use krilla::tagging::{ArtifactType, ContentTag, Node}; use krilla::{Document, SerializeSettings}; use krilla_svg::render_svg_glyph; use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult}; @@ -31,7 +30,7 @@ use crate::metadata::build_metadata; use crate::outline::build_outline; use crate::page::PageLabelExt; use crate::shape::handle_shape; -use crate::tags::{handle_close_tag, handle_open_tag, Placeholder, TagNode, Tags}; +use crate::tags::{self, Placeholder, Tags}; use crate::text::handle_text; use crate::util::{convert_path, display_font, AbsExt, TransformExt}; use crate::PdfOptions; @@ -42,17 +41,15 @@ pub fn convert( options: &PdfOptions, ) -> SourceResult> { // HACK - // let config = Configuration::new(); let config = Configuration::new_with_validator(Validator::UA1); let settings = SerializeSettings { - compress_content_streams: true, + compress_content_streams: false, // true, no_device_cs: true, - ascii_compatible: false, + ascii_compatible: true, // false, xmp_metadata: true, cmyk_profile: None, - configuration: config, - // TODO: Should we just set this to false? If set to `false` this will - // automatically be enabled if the `UA1` validator is used. + configuration: config, // options.standards.config, + // TODO: allow opting out of tagging PDFs enable_tagging: true, render_svg_glyph_fn: render_svg_glyph, }; @@ -114,18 +111,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul let mut surface = page.surface(); let mut fc = FrameContext::new(typst_page.frame.size()); - // Marked-content may not cross page boundaries: reopen tag - // that was closed at the end of the last page. - if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { - let tag = if gc.tags.in_artifact { - ContentTag::Artifact(ArtifactType::Other) - } else { - ContentTag::Other - }; - // TODO: somehow avoid empty marked-content sequences - let id = surface.start_tagged(tag); - nodes.push(TagNode::Leaf(id)); - } + tags::restart(gc, &mut surface); handle_frame( &mut fc, @@ -135,17 +121,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul gc, )?; - // Marked-content may not cross page boundaries: close open tag. - if !gc.tags.stack.is_empty() { - surface.end_tagged(); - } + tags::end_open(gc, &mut surface); surface.finish(); - for (placeholder, annotation) in fc.annotations { - let annotation_id = page.add_tagged_annotation(annotation); - gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id)); - } + tags::add_annotations(gc, &mut page, fc.annotations); } } @@ -318,10 +298,10 @@ pub(crate) fn handle_frame( handle_link(fc, gc, alt.as_ref().map(EcoString::to_string), dest, *size) } FrameItem::Tag(introspection::Tag::Start(elem)) => { - handle_open_tag(gc, surface, elem) + tags::handle_start(gc, surface, elem) } FrameItem::Tag(introspection::Tag::End(loc, _)) => { - handle_close_tag(gc, surface, loc); + tags::handle_end(gc, surface, loc); } } diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 2c43c8495..ae15674f4 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,9 +1,15 @@ use std::cell::OnceCell; +use std::ops::Deref; +use krilla::annotation::Annotation; +use krilla::page::Page; use krilla::surface::Surface; -use krilla::tagging::{ContentTag, Identifier, Node, Tag, TagGroup, TagTree}; +use krilla::tagging::{ + ArtifactType, ContentTag, Identifier, Node, Tag, TagGroup, TagTree, +}; use typst_library::foundations::{Content, StyleChain}; use typst_library::introspection::Location; +use typst_library::layout::{ArtifactKind, ArtifactMarker}; use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry}; use crate::convert::GlobalContext; @@ -12,7 +18,7 @@ pub(crate) struct Tags { /// The intermediary stack of nested tag groups. pub(crate) stack: Vec<(Location, Tag, Vec)>, pub(crate) placeholders: Vec>, - pub(crate) in_artifact: bool, + pub(crate) in_artifact: Option<(Location, ArtifactMarker)>, /// The output. pub(crate) tree: Vec, @@ -34,7 +40,7 @@ impl Tags { Self { stack: Vec::new(), placeholders: Vec::new(), - in_artifact: false, + in_artifact: None, tree: Vec::new(), } @@ -93,7 +99,16 @@ impl Tags { } } - pub(crate) fn context_supports(&self, tag: &Tag) -> bool { + /// Returns the current parent's list of children and whether it is the tree root. + fn parent_nodes(&mut self) -> (bool, &mut Vec) { + if let Some((_, _, parent_nodes)) = self.stack.last_mut() { + (false, parent_nodes) + } else { + (true, &mut self.tree) + } + } + + fn context_supports(&self, tag: &Tag) -> bool { let Some((_, parent, _)) = self.stack.last() else { return true }; use Tag::*; @@ -142,16 +157,57 @@ impl Tags { } } -pub(crate) fn handle_open_tag( +/// Marked-content may not cross page boundaries: restart tag that was still open +/// at the end of the last page. +pub(crate) fn restart(gc: &mut GlobalContext, surface: &mut Surface) { + // TODO: somehow avoid empty marked-content sequences + if let Some((_, marker)) = gc.tags.in_artifact { + start_artifact(gc, surface, marker.kind); + } else if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { + let id = surface.start_tagged(ContentTag::Other); + nodes.push(TagNode::Leaf(id)); + } +} + +/// Marked-content may not cross page boundaries: end any open tag. +pub(crate) fn end_open(gc: &mut GlobalContext, surface: &mut Surface) { + if !gc.tags.stack.is_empty() || gc.tags.in_artifact.is_some() { + surface.end_tagged(); + } +} + +/// Add all annotations that were found in the page frame. +pub(crate) fn add_annotations( + gc: &mut GlobalContext, + page: &mut Page, + annotations: Vec<(Placeholder, Annotation)>, +) { + for (placeholder, annotation) in annotations { + let annotation_id = page.add_tagged_annotation(annotation); + gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id)); + } +} + +pub(crate) fn handle_start( gc: &mut GlobalContext, surface: &mut Surface, elem: &Content, ) { - if gc.tags.in_artifact { + if gc.tags.in_artifact.is_some() { + // Don't nest artifacts return; } - let Some(loc) = elem.location() else { return }; + let loc = elem.location().unwrap(); + + if let Some(marker) = elem.to_packed::() { + if !gc.tags.stack.is_empty() { + surface.end_tagged(); + } + start_artifact(gc, surface, marker.kind); + gc.tags.in_artifact = Some((loc, *marker.deref())); + return; + } let tag = if let Some(heading) = elem.to_packed::() { let level = heading.resolve_level(StyleChain::default()); @@ -181,29 +237,50 @@ pub(crate) fn handle_open_tag( if !gc.tags.stack.is_empty() { surface.end_tagged(); } - let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other); - - gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(content_id)])); + let id = surface.start_tagged(krilla::tagging::ContentTag::Other); + gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(id)])); } -pub(crate) fn handle_close_tag( - gc: &mut GlobalContext, - surface: &mut Surface, - loc: &Location, -) { +pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: &Location) { + if let Some((l, _)) = &gc.tags.in_artifact { + if l == loc { + gc.tags.in_artifact = None; + surface.end_tagged(); + if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { + let id = surface.start_tagged(ContentTag::Other); + nodes.push(TagNode::Leaf(id)); + } + } + return; + } + let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else { return; }; surface.end_tagged(); - if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() { - parent_nodes.push(TagNode::Group(tag, nodes)); - + let (is_root, parent_nodes) = gc.tags.parent_nodes(); + parent_nodes.push(TagNode::Group(tag, nodes)); + if !is_root { // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(ContentTag::Other); parent_nodes.push(TagNode::Leaf(id)); - } else { - gc.tags.tree.push(TagNode::Group(tag, nodes)); + } +} + +fn start_artifact(gc: &mut GlobalContext, surface: &mut Surface, kind: ArtifactKind) { + let ty = artifact_type(kind); + let id = surface.start_tagged(ContentTag::Artifact(ty)); + + let (_, parent_nodes) = gc.tags.parent_nodes(); + parent_nodes.push(TagNode::Leaf(id)); +} + +fn artifact_type(kind: ArtifactKind) -> ArtifactType { + match kind { + ArtifactKind::Header => ArtifactType::Header, + ArtifactKind::Footer => ArtifactType::Footer, + ArtifactKind::Page => ArtifactType::Page, } } From 9e2235dbd87e8f1f9bb76d79833139da8616f295 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 11 Jun 2025 16:10:34 +0200 Subject: [PATCH 07/76] feat: pdf.artifact element --- crates/typst-layout/src/pages/finalize.rs | 39 ++------------ crates/typst-layout/src/pages/mod.rs | 10 ++-- crates/typst-layout/src/pages/run.rs | 12 +++-- .../typst-library/src/foundations/content.rs | 7 +++ crates/typst-library/src/layout/page.rs | 24 +-------- crates/typst-library/src/pdf/accessibility.rs | 54 +++++++++++++++++++ crates/typst-library/src/pdf/mod.rs | 3 ++ crates/typst-pdf/src/tags.rs | 17 +++--- 8 files changed, 92 insertions(+), 74 deletions(-) create mode 100644 crates/typst-library/src/pdf/accessibility.rs diff --git a/crates/typst-layout/src/pages/finalize.rs b/crates/typst-layout/src/pages/finalize.rs index 543dbb0ce..b16d95699 100644 --- a/crates/typst-layout/src/pages/finalize.rs +++ b/crates/typst-layout/src/pages/finalize.rs @@ -1,10 +1,7 @@ use typst_library::diag::SourceResult; use typst_library::engine::Engine; -use typst_library::foundations::{Content, NativeElement}; -use typst_library::introspection::{ManualPageCounter, SplitLocator, Tag}; -use typst_library::layout::{ - ArtifactKind, ArtifactMarker, Frame, FrameItem, Page, Point, -}; +use typst_library::introspection::{ManualPageCounter, Tag}; +use typst_library::layout::{Frame, FrameItem, Page, Point}; use super::LayoutedPage; @@ -13,7 +10,6 @@ use super::LayoutedPage; /// physical page number, which is unknown during parallel layout. pub fn finalize( engine: &mut Engine, - locator: &mut SplitLocator, counter: &mut ManualPageCounter, tags: &mut Vec, LayoutedPage { @@ -49,12 +45,10 @@ pub fn finalize( // important as it affects the relative ordering of introspectable elements // and thus how counters resolve. if let Some(background) = background { - let tag = ArtifactMarker::new(ArtifactKind::Page).pack(); - push_tagged(engine, locator, &mut frame, Point::zero(), background, tag); + frame.push_frame(Point::zero(), background); } if let Some(header) = header { - let tag = ArtifactMarker::new(ArtifactKind::Header).pack(); - push_tagged(engine, locator, &mut frame, Point::with_x(margin.left), header, tag); + frame.push_frame(Point::with_x(margin.left), header); } // Add the inner contents. @@ -63,8 +57,7 @@ pub fn finalize( // Add the "after" marginals. if let Some(footer) = footer { let y = frame.height() - footer.height(); - let tag = ArtifactMarker::new(ArtifactKind::Footer).pack(); - push_tagged(engine, locator, &mut frame, Point::new(margin.left, y), footer, tag); + frame.push_frame(Point::new(margin.left, y), footer); } if let Some(foreground) = foreground { frame.push_frame(Point::zero(), foreground); @@ -79,25 +72,3 @@ pub fn finalize( Ok(Page { frame, fill, numbering, supplement, number }) } - -fn push_tagged( - engine: &mut Engine, - locator: &mut SplitLocator, - frame: &mut Frame, - mut pos: Point, - inner: Frame, - mut tag: Content, -) { - // TODO: use general PDF Tagged/Artifact element that wraps some content and - // is also available to the user. - let key = typst_utils::hash128(&tag); - let loc = locator.next_location(engine.introspector, key); - tag.set_location(loc); - frame.push(pos, FrameItem::Tag(Tag::Start(tag))); - - let height = inner.height(); - frame.push_frame(pos, inner); - - pos.y += height; - frame.push(pos, FrameItem::Tag(Tag::End(loc, key))); -} diff --git a/crates/typst-layout/src/pages/mod.rs b/crates/typst-layout/src/pages/mod.rs index a64fee4b3..14dc0f3fb 100644 --- a/crates/typst-layout/src/pages/mod.rs +++ b/crates/typst-layout/src/pages/mod.rs @@ -123,19 +123,17 @@ fn layout_pages<'a>( Item::Run(..) => { let layouted = runs.next().unwrap()?; for layouted in layouted { - let page = - finalize(engine, locator, &mut counter, &mut tags, layouted)?; + let page = finalize(engine, &mut counter, &mut tags, layouted)?; pages.push(page); } } - Item::Parity(parity, initial, page_locator) => { + Item::Parity(parity, initial, locator) => { if !parity.matches(pages.len()) { continue; } - let layouted = - layout_blank_page(engine, page_locator.relayout(), *initial)?; - let page = finalize(engine, locator, &mut counter, &mut tags, layouted)?; + let layouted = layout_blank_page(engine, locator.relayout(), *initial)?; + let page = finalize(engine, &mut counter, &mut tags, layouted)?; pages.push(page); } Item::Tags(items) => { diff --git a/crates/typst-layout/src/pages/run.rs b/crates/typst-layout/src/pages/run.rs index 6d2d29da5..233608386 100644 --- a/crates/typst-layout/src/pages/run.rs +++ b/crates/typst-layout/src/pages/run.rs @@ -13,6 +13,7 @@ use typst_library::layout::{ VAlignment, }; use typst_library::model::Numbering; +use typst_library::pdf::ArtifactKind; use typst_library::routines::{Pair, Routines}; use typst_library::text::{LocalName, TextElem}; use typst_library::visualize::Paint; @@ -200,6 +201,11 @@ fn layout_page_run_impl( // Layout marginals. let mut layouted = Vec::with_capacity(fragment.len()); + + let header = header.as_ref().map(|h| h.clone().artifact(ArtifactKind::Header)); + let footer = footer.as_ref().map(|f| f.clone().artifact(ArtifactKind::Footer)); + let background = background.as_ref().map(|b| b.clone().artifact(ArtifactKind::Page)); + for inner in fragment { let header_size = Size::new(inner.width(), margin.top - header_ascent); let footer_size = Size::new(inner.width(), margin.bottom - footer_descent); @@ -210,9 +216,9 @@ fn layout_page_run_impl( fill: fill.clone(), numbering: numbering.clone(), supplement: supplement.clone(), - header: layout_marginal(header, header_size, Alignment::BOTTOM)?, - footer: layout_marginal(footer, footer_size, Alignment::TOP)?, - background: layout_marginal(background, full_size, mid)?, + header: layout_marginal(&header, header_size, Alignment::BOTTOM)?, + footer: layout_marginal(&footer, footer_size, Alignment::TOP)?, + background: layout_marginal(&background, full_size, mid)?, foreground: layout_marginal(foreground, full_size, mid)?, margin, binding, diff --git a/crates/typst-library/src/foundations/content.rs b/crates/typst-library/src/foundations/content.rs index 278d49401..8cd46f0dd 100644 --- a/crates/typst-library/src/foundations/content.rs +++ b/crates/typst-library/src/foundations/content.rs @@ -22,6 +22,7 @@ use crate::foundations::{ use crate::introspection::Location; use crate::layout::{AlignElem, Alignment, Axes, Length, MoveElem, PadElem, Rel, Sides}; use crate::model::{Destination, EmphElem, LinkElem, StrongElem}; +use crate::pdf::{ArtifactElem, ArtifactKind}; use crate::text::UnderlineElem; /// A piece of document content. @@ -534,6 +535,12 @@ impl Content { .pack() .spanned(span) } + + /// Link the content somewhere. + pub fn artifact(self, kind: ArtifactKind) -> Self { + let span = self.span(); + ArtifactElem::new(self).with_kind(kind).pack().spanned(span) + } } #[scope] diff --git a/crates/typst-library/src/layout/page.rs b/crates/typst-library/src/layout/page.rs index b6fa5d0be..98afbd06f 100644 --- a/crates/typst-library/src/layout/page.rs +++ b/crates/typst-library/src/layout/page.rs @@ -10,7 +10,7 @@ use crate::foundations::{ cast, elem, Args, AutoValue, Cast, Construct, Content, Dict, Fold, NativeElement, Set, Smart, Value, }; -use crate::introspection::{Introspector, Locatable}; +use crate::introspection::Introspector; use crate::layout::{ Abs, Alignment, FlushElem, Frame, HAlignment, Length, OuterVAlignment, Ratio, Rel, Sides, SpecificAlignment, @@ -451,28 +451,6 @@ impl PagebreakElem { } } -// HACK: this should probably not be an element -#[derive(Copy)] -#[elem(Construct, Locatable)] -pub struct ArtifactMarker { - #[internal] - #[required] - pub kind: ArtifactKind, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum ArtifactKind { - Header, - Footer, - Page, -} - -impl Construct for ArtifactMarker { - fn construct(_: &mut Engine, args: &mut Args) -> SourceResult { - bail!(args.span, "cannot be constructed manually"); - } -} - /// A finished document with metadata and page frames. #[derive(Debug, Default, Clone)] pub struct PagedDocument { diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs new file mode 100644 index 000000000..586e2cbb1 --- /dev/null +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -0,0 +1,54 @@ +use typst_macros::{cast, elem}; + +use crate::diag::SourceResult; +use crate::engine::Engine; +use crate::foundations::{Content, Packed, Show, StyleChain}; +use crate::introspection::Locatable; + +// TODO: docs + +/// Mark content as a PDF artifact. +/// TODO: also use to mark html elements with `aria-hidden="true"`? +#[elem(Locatable, Show)] +pub struct ArtifactElem { + #[default(ArtifactKind::Other)] + pub kind: ArtifactKind, + + /// The content to underline. + #[required] + pub body: Content, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub enum ArtifactKind { + /// Page header artifacts. + Header, + /// Page footer artifacts. + Footer, + /// Other page artifacts. + Page, + /// Other artifacts. + #[default] + Other, +} + +cast! { + ArtifactKind, + self => match self { + ArtifactKind::Header => "header".into_value(), + ArtifactKind::Footer => "footer".into_value(), + ArtifactKind::Page => "page".into_value(), + ArtifactKind::Other => "other".into_value(), + }, + "header" => Self::Header, + "footer" => Self::Footer, + "page" => Self::Page, + "other" => Self::Other, +} + +impl Show for Packed { + #[typst_macros::time(name = "underline", span = self.span())] + fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { + Ok(self.body.clone()) + } +} diff --git a/crates/typst-library/src/pdf/mod.rs b/crates/typst-library/src/pdf/mod.rs index 786a36372..835cc69fe 100644 --- a/crates/typst-library/src/pdf/mod.rs +++ b/crates/typst-library/src/pdf/mod.rs @@ -1,7 +1,9 @@ //! PDF-specific functionality. +mod accessibility; mod embed; +pub use self::accessibility::*; pub use self::embed::*; use crate::foundations::{Module, Scope}; @@ -11,5 +13,6 @@ pub fn module() -> Module { let mut pdf = Scope::deduplicating(); pdf.start_category(crate::Category::Pdf); pdf.define_elem::(); + pdf.define_elem::(); Module::new("pdf", pdf) } diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index ae15674f4..d6415adeb 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,5 +1,4 @@ use std::cell::OnceCell; -use std::ops::Deref; use krilla::annotation::Annotation; use krilla::page::Page; @@ -9,8 +8,8 @@ use krilla::tagging::{ }; use typst_library::foundations::{Content, StyleChain}; use typst_library::introspection::Location; -use typst_library::layout::{ArtifactKind, ArtifactMarker}; use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry}; +use typst_library::pdf::{ArtifactElem, ArtifactKind}; use crate::convert::GlobalContext; @@ -18,7 +17,7 @@ pub(crate) struct Tags { /// The intermediary stack of nested tag groups. pub(crate) stack: Vec<(Location, Tag, Vec)>, pub(crate) placeholders: Vec>, - pub(crate) in_artifact: Option<(Location, ArtifactMarker)>, + pub(crate) in_artifact: Option<(Location, ArtifactKind)>, /// The output. pub(crate) tree: Vec, @@ -161,8 +160,8 @@ impl Tags { /// at the end of the last page. pub(crate) fn restart(gc: &mut GlobalContext, surface: &mut Surface) { // TODO: somehow avoid empty marked-content sequences - if let Some((_, marker)) = gc.tags.in_artifact { - start_artifact(gc, surface, marker.kind); + if let Some((_, kind)) = gc.tags.in_artifact { + start_artifact(gc, surface, kind); } else if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { let id = surface.start_tagged(ContentTag::Other); nodes.push(TagNode::Leaf(id)); @@ -200,12 +199,13 @@ pub(crate) fn handle_start( let loc = elem.location().unwrap(); - if let Some(marker) = elem.to_packed::() { + if let Some(artifact) = elem.to_packed::() { if !gc.tags.stack.is_empty() { surface.end_tagged(); } - start_artifact(gc, surface, marker.kind); - gc.tags.in_artifact = Some((loc, *marker.deref())); + let kind = artifact.kind(StyleChain::default()); + start_artifact(gc, surface, kind); + gc.tags.in_artifact = Some((loc, kind)); return; } @@ -282,5 +282,6 @@ fn artifact_type(kind: ArtifactKind) -> ArtifactType { ArtifactKind::Header => ArtifactType::Header, ArtifactKind::Footer => ArtifactType::Footer, ArtifactKind::Page => ArtifactType::Page, + ArtifactKind::Other => ArtifactType::Other, } } From 6c686bd460d9db388edaeaf014e61621d6ebf661 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 20 Jun 2025 15:55:24 +0200 Subject: [PATCH 08/76] feat: write tags for links and use quadpoints in link annotations --- crates/typst-ide/src/jump.rs | 4 +- crates/typst-layout/src/modifiers.rs | 19 +- .../typst-library/src/foundations/content.rs | 41 ++++- crates/typst-library/src/layout/frame.rs | 8 +- .../typst-library/src/model/bibliography.rs | 4 +- crates/typst-library/src/model/footnote.rs | 4 +- crates/typst-library/src/model/link.rs | 9 +- crates/typst-library/src/model/outline.rs | 2 +- crates/typst-library/src/model/reference.rs | 2 +- crates/typst-pdf/src/convert.rs | 29 +-- crates/typst-pdf/src/image.rs | 8 - crates/typst-pdf/src/link.rs | 133 +++++++++----- crates/typst-pdf/src/tags.rs | 166 +++++++++--------- tests/src/run.rs | 2 +- 14 files changed, 238 insertions(+), 193 deletions(-) diff --git a/crates/typst-ide/src/jump.rs b/crates/typst-ide/src/jump.rs index 0f9f84ff7..1c66cb785 100644 --- a/crates/typst-ide/src/jump.rs +++ b/crates/typst-ide/src/jump.rs @@ -36,9 +36,9 @@ pub fn jump_from_click( ) -> Option { // Try to find a link first. for (pos, item) in frame.items() { - if let FrameItem::Link(_, dest, size) = item { + if let FrameItem::Link(link, size) = item { if is_in_rect(*pos, *size, click) { - return Some(match dest { + return Some(match &link.dest { Destination::Url(url) => Jump::Url(url.clone()), Destination::Position(pos) => Jump::Position(*pos), Destination::Location(loc) => { diff --git a/crates/typst-layout/src/modifiers.rs b/crates/typst-layout/src/modifiers.rs index a7d882617..00d42e42c 100644 --- a/crates/typst-layout/src/modifiers.rs +++ b/crates/typst-layout/src/modifiers.rs @@ -1,7 +1,6 @@ -use ecow::EcoString; -use typst_library::foundations::StyleChain; +use typst_library::foundations::{LinkMarker, Packed, StyleChain}; use typst_library::layout::{Abs, Fragment, Frame, FrameItem, HideElem, Point, Sides}; -use typst_library::model::{Destination, LinkElem, ParElem}; +use typst_library::model::ParElem; /// Frame-level modifications resulting from styles that do not impose any /// layout structure. @@ -21,8 +20,7 @@ use typst_library::model::{Destination, LinkElem, ParElem}; #[derive(Debug, Clone)] pub struct FrameModifiers { /// A destination to link to. - dest: Option, - alt: Option, + link: Option>, /// Whether the contents of the frame should be hidden. hidden: bool, } @@ -32,8 +30,7 @@ impl FrameModifiers { pub fn get_in(styles: StyleChain) -> Self { // TODO: maybe verify that an alt text was provided here Self { - dest: LinkElem::current_in(styles), - alt: LinkElem::alt_in(styles), + link: LinkMarker::current_in(styles), hidden: HideElem::hidden_in(styles), } } @@ -98,7 +95,7 @@ fn modify_frame( modifiers: &FrameModifiers, link_box_outset: Option>, ) { - if let Some(dest) = &modifiers.dest { + if let Some(link) = &modifiers.link { let mut pos = Point::zero(); let mut size = frame.size(); if let Some(outset) = link_box_outset { @@ -106,7 +103,7 @@ fn modify_frame( pos.x -= outset.left; size += outset.sum_by_axis(); } - frame.push(pos, FrameItem::Link(modifiers.alt.clone(), dest.clone(), size)); + frame.push(pos, FrameItem::Link(link.clone(), size)); } if modifiers.hidden { @@ -133,8 +130,8 @@ where let reset; let outer = styles; let mut styles = styles; - if modifiers.dest.is_some() { - reset = LinkElem::set_current(None).wrap(); + if modifiers.link.is_some() { + reset = LinkMarker::set_current(None).wrap(); styles = outer.chain(&reset); } diff --git a/crates/typst-library/src/foundations/content.rs b/crates/typst-library/src/foundations/content.rs index 8cd46f0dd..518deca75 100644 --- a/crates/typst-library/src/foundations/content.rs +++ b/crates/typst-library/src/foundations/content.rs @@ -16,12 +16,12 @@ use crate::diag::{SourceResult, StrResult}; use crate::engine::Engine; use crate::foundations::{ elem, func, scope, ty, Context, Dict, Element, Fields, IntoValue, Label, - NativeElement, Recipe, RecipeIndex, Repr, Selector, Str, Style, StyleChain, Styles, - Value, + NativeElement, Recipe, RecipeIndex, Repr, Selector, Show, Str, Style, StyleChain, + Styles, Value, }; -use crate::introspection::Location; +use crate::introspection::{Locatable, Location}; use crate::layout::{AlignElem, Alignment, Axes, Length, MoveElem, PadElem, Rel, Sides}; -use crate::model::{Destination, EmphElem, LinkElem, StrongElem}; +use crate::model::{Destination, EmphElem, StrongElem}; use crate::pdf::{ArtifactElem, ArtifactKind}; use crate::text::UnderlineElem; @@ -504,9 +504,13 @@ impl Content { } /// Link the content somewhere. - pub fn linked(self, alt: Option, dest: Destination) -> Self { - self.styled(LinkElem::set_alt(alt)) - .styled(LinkElem::set_current(Some(dest))) + pub fn linked(self, dest: Destination, alt: Option) -> Self { + let span = self.span(); + let link = Packed::new(LinkMarker::new(self, dest, alt)); + link.clone() + .pack() + .spanned(span) + .styled(LinkMarker::set_current(Some(link))) } /// Set alignments for this content. @@ -988,6 +992,29 @@ pub trait PlainText { fn plain_text(&self, text: &mut EcoString); } +/// An element that associates the body of a link with the destination. +#[elem(Show, Locatable)] +pub struct LinkMarker { + /// The content. + #[required] + pub body: Content, + #[required] + pub dest: Destination, + #[required] + pub alt: Option, + + /// A link style that should be applied to elements. + #[internal] + #[ghost] + pub current: Option>, +} + +impl Show for Packed { + fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { + Ok(self.body.clone()) + } +} + /// An error arising when trying to access a field of content. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum FieldAccessError { diff --git a/crates/typst-library/src/layout/frame.rs b/crates/typst-library/src/layout/frame.rs index 5b9d1f1a7..71bb9aa1b 100644 --- a/crates/typst-library/src/layout/frame.rs +++ b/crates/typst-library/src/layout/frame.rs @@ -4,14 +4,12 @@ use std::fmt::{self, Debug, Formatter}; use std::num::NonZeroUsize; use std::sync::Arc; -use ecow::EcoString; use typst_syntax::Span; use typst_utils::{LazyHash, Numeric}; -use crate::foundations::{cast, dict, Dict, Label, Value}; +use crate::foundations::{cast, dict, Dict, Label, LinkMarker, Packed, Value}; use crate::introspection::{Location, Tag}; use crate::layout::{Abs, Axes, FixedAlignment, Length, Point, Size, Transform}; -use crate::model::Destination; use crate::text::TextItem; use crate::visualize::{Color, Curve, FixedStroke, Geometry, Image, Paint, Shape}; @@ -474,7 +472,7 @@ pub enum FrameItem { /// An image and its size. Image(Image, Size, Span), /// An internal or external link to a destination. - Link(Option, Destination, Size), + Link(Packed, Size), /// An introspectable element that produced something within this frame. Tag(Tag), } @@ -486,7 +484,7 @@ impl Debug for FrameItem { Self::Text(text) => write!(f, "{text:?}"), Self::Shape(shape, _) => write!(f, "{shape:?}"), Self::Image(image, _, _) => write!(f, "{image:?}"), - Self::Link(alt, dest, _) => write!(f, "Link({alt:?}, {dest:?})"), + Self::Link(link, _) => write!(f, "Link({:?}, {:?})", link.dest, link.alt), Self::Tag(tag) => write!(f, "{tag:?}"), } } diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index 75ed0fce2..3b14c8cbf 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -878,7 +878,7 @@ impl<'a> Generator<'a> { if let Some(location) = first_occurrences.get(item.key.as_str()) { let dest = Destination::Location(*location); // TODO: accept user supplied alt text - content = content.linked(None, dest); + content = content.linked(dest, None); } StrResult::Ok(content) }) @@ -1014,7 +1014,7 @@ impl ElemRenderer<'_> { if let Some(location) = (self.link)(i) { let dest = Destination::Location(location); // TODO: accept user supplied alt text - content = content.linked(None, dest); + content = content.linked(dest, None); } } diff --git a/crates/typst-library/src/model/footnote.rs b/crates/typst-library/src/model/footnote.rs index af6664cb9..872827d90 100644 --- a/crates/typst-library/src/model/footnote.rs +++ b/crates/typst-library/src/model/footnote.rs @@ -148,7 +148,7 @@ impl Show for Packed { let loc = loc.variant(1); // Add zero-width weak spacing to make the footnote "sticky". // TODO: accept user supplied alt text - Ok(HElem::hole().pack() + sup.linked(None, Destination::Location(loc))) + Ok(HElem::hole().pack() + sup.linked(Destination::Location(loc), None)) } } @@ -298,7 +298,7 @@ impl Show for Packed { .pack() .spanned(span) // TODO: accept user supplied alt text - .linked(None, Destination::Location(loc)) + .linked(Destination::Location(loc), None) .located(loc.variant(1)); Ok(Content::sequence([ diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index d64192f29..34ce14877 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -91,11 +91,6 @@ pub struct LinkElem { _ => args.expect("body")?, })] pub body: Content, - - /// A destination style that should be applied to elements. - #[internal] - #[ghost] - pub current: Option, } impl LinkElem { @@ -128,11 +123,11 @@ impl Show for Packed { } else { let alt = self.alt(styles); match &self.dest { - LinkTarget::Dest(dest) => body.linked(alt, dest.clone()), + LinkTarget::Dest(dest) => body.linked(dest.clone(), alt), LinkTarget::Label(label) => { let elem = engine.introspector.query_label(*label).at(self.span())?; let dest = Destination::Location(elem.location().unwrap()); - body.clone().linked(alt, dest) + body.linked(dest, alt) } } }) diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index bcdd15652..e2c8650ca 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -441,7 +441,7 @@ impl Show for Packed { }; let loc = self.element_location().at(span)?; - Ok(block.linked(Some(alt), Destination::Location(loc))) + Ok(block.linked(Destination::Location(loc), Some(alt))) } } diff --git a/crates/typst-library/src/model/reference.rs b/crates/typst-library/src/model/reference.rs index ca0e0f5e3..b04c57c4a 100644 --- a/crates/typst-library/src/model/reference.rs +++ b/crates/typst-library/src/model/reference.rs @@ -344,7 +344,7 @@ fn show_reference( } // TODO: accept user supplied alt text - Ok(content.linked(None, Destination::Location(loc))) + Ok(content.linked(Destination::Location(loc), None)) } /// Turn a reference into a citation. diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index c53fd1e28..abdb4c1dd 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -1,8 +1,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::num::NonZeroU64; -use ecow::{eco_format, EcoString, EcoVec}; -use krilla::annotation::Annotation; +use ecow::{eco_format, EcoVec}; use krilla::configure::{Configuration, ValidationError, Validator}; use krilla::destination::{NamedDestination, XyzDestination}; use krilla::embed::EmbedError; @@ -25,12 +24,12 @@ use typst_syntax::Span; use crate::embed::embed_files; use crate::image::handle_image; -use crate::link::handle_link; +use crate::link::{handle_link, LinkAnnotation}; use crate::metadata::build_metadata; use crate::outline::build_outline; use crate::page::PageLabelExt; use crate::shape::handle_shape; -use crate::tags::{self, Placeholder, Tags}; +use crate::tags::{self, Tags}; use crate::text::handle_text; use crate::util::{convert_path, display_font, AbsExt, TransformExt}; use crate::PdfOptions; @@ -111,7 +110,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul let mut surface = page.surface(); let mut fc = FrameContext::new(typst_page.frame.size()); - tags::restart(gc, &mut surface); + tags::restart_open(gc, &mut surface); handle_frame( &mut fc, @@ -125,7 +124,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul surface.finish(); - tags::add_annotations(gc, &mut page, fc.annotations); + tags::add_annotations(gc, &mut page, fc.link_annotations); } } @@ -179,14 +178,14 @@ impl State { /// Context needed for converting a single frame. pub(crate) struct FrameContext { states: Vec, - annotations: Vec<(Placeholder, Annotation)>, + pub(crate) link_annotations: HashMap, } impl FrameContext { pub(crate) fn new(size: Size) -> Self { Self { states: vec![State::new(size)], - annotations: vec![], + link_annotations: HashMap::new(), } } @@ -205,14 +204,6 @@ impl FrameContext { pub(crate) fn state_mut(&mut self) -> &mut State { self.states.last_mut().unwrap() } - - pub(crate) fn push_annotation( - &mut self, - placeholder: Placeholder, - annotation: Annotation, - ) { - self.annotations.push((placeholder, annotation)); - } } /// Globally needed context for converting a typst document. @@ -294,14 +285,12 @@ pub(crate) fn handle_frame( FrameItem::Image(image, size, span) => { handle_image(gc, fc, image, *size, surface, *span)? } - FrameItem::Link(alt, dest, size) => { - handle_link(fc, gc, alt.as_ref().map(EcoString::to_string), dest, *size) - } + FrameItem::Link(link, size) => handle_link(fc, gc, link, *size), FrameItem::Tag(introspection::Tag::Start(elem)) => { tags::handle_start(gc, surface, elem) } FrameItem::Tag(introspection::Tag::End(loc, _)) => { - tags::handle_end(gc, surface, loc); + tags::handle_end(gc, surface, *loc); } } diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 93bdb1950..0809ae046 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -30,10 +30,6 @@ pub(crate) fn handle_image( let interpolate = image.scaling() == Smart::Custom(ImageScaling::Smooth); - if let Some(alt) = image.alt() { - surface.start_alt_text(alt); - } - gc.image_spans.insert(span); match image.kind() { @@ -62,10 +58,6 @@ pub(crate) fn handle_image( } } - if image.alt().is_some() { - surface.end_alt_text(); - } - surface.pop(); surface.reset_location(); diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index 6dfefbc11..2d360cfc3 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -1,52 +1,33 @@ +use std::collections::hash_map::Entry; + +use ecow::EcoString; use krilla::action::{Action, LinkAction}; -use krilla::annotation::{Annotation, LinkAnnotation, Target}; +use krilla::annotation::Target; use krilla::destination::XyzDestination; -use krilla::geom::Rect; +use krilla::geom as kg; +use typst_library::foundations::LinkMarker; use typst_library::layout::{Abs, Point, Position, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; -use crate::tags::TagNode; +use crate::tags::{Placeholder, TagNode}; use crate::util::{AbsExt, PointExt}; +pub(crate) struct LinkAnnotation { + pub(crate) placeholder: Placeholder, + pub(crate) alt: Option, + pub(crate) rect: kg::Rect, + pub(crate) quad_points: Vec, + pub(crate) target: Target, +} + pub(crate) fn handle_link( fc: &mut FrameContext, gc: &mut GlobalContext, - alt: Option, - dest: &Destination, + link: &LinkMarker, size: Size, ) { - let mut min_x = Abs::inf(); - let mut min_y = Abs::inf(); - let mut max_x = -Abs::inf(); - let mut max_y = -Abs::inf(); - - let pos = Point::zero(); - - // Compute the bounding box of the transformed link. - for point in [ - pos, - pos + Point::with_x(size.x), - pos + Point::with_y(size.y), - pos + size.to_point(), - ] { - let t = point.transform(fc.state().transform()); - min_x.set_min(t.x); - min_y.set_min(t.y); - max_x.set_max(t.x); - max_y.set_max(t.y); - } - - let x1 = min_x.to_f32(); - let x2 = max_x.to_f32(); - let y1 = min_y.to_f32(); - let y2 = max_y.to_f32(); - - let rect = Rect::from_ltrb(x1, y1, x2, y2).unwrap(); - - // TODO: Support quad points. - - let target = match dest { + let target = match &link.dest { Destination::Url(u) => { Target::Action(Action::Link(LinkAction::new(u.to_string()))) } @@ -69,13 +50,81 @@ pub(crate) fn handle_link( } }; - let placeholder = gc.tags.reserve_placeholder(); - gc.tags.push(TagNode::Placeholder(placeholder)); + let entry = gc.tags.stack.last_mut().expect("a link parent"); + let link_id = entry.link_id.expect("a link parent"); - fc.push_annotation( - placeholder, - Annotation::new_link(LinkAnnotation::new(rect, None, target), alt), - ); + let rect = to_rect(fc, size); + let quadpoints = quadpoints(rect); + + match fc.link_annotations.entry(link_id) { + Entry::Occupied(occupied) => { + // Update the bounding box and add the quadpoints of an existing link annotation. + let annotation = occupied.into_mut(); + annotation.rect = bounding_rect(annotation.rect, rect); + annotation.quad_points.extend_from_slice(&quadpoints); + } + Entry::Vacant(vacant) => { + let placeholder = gc.tags.reserve_placeholder(); + gc.tags.push(TagNode::Placeholder(placeholder)); + + vacant.insert(LinkAnnotation { + placeholder, + rect, + quad_points: quadpoints.to_vec(), + alt: link.alt.as_ref().map(EcoString::to_string), + target, + }); + } + } +} + +// Compute the bounding box of the transformed link. +fn to_rect(fc: &FrameContext, size: Size) -> kg::Rect { + let mut min_x = Abs::inf(); + let mut min_y = Abs::inf(); + let mut max_x = -Abs::inf(); + let mut max_y = -Abs::inf(); + + let pos = Point::zero(); + + for point in [ + pos, + pos + Point::with_x(size.x), + pos + Point::with_y(size.y), + pos + size.to_point(), + ] { + let t = point.transform(fc.state().transform()); + min_x.set_min(t.x); + min_y.set_min(t.y); + max_x.set_max(t.x); + max_y.set_max(t.y); + } + + let x1 = min_x.to_f32(); + let x2 = max_x.to_f32(); + let y1 = min_y.to_f32(); + let y2 = max_y.to_f32(); + + kg::Rect::from_ltrb(x1, y1, x2, y2).unwrap() +} + +fn bounding_rect(a: kg::Rect, b: kg::Rect) -> kg::Rect { + kg::Rect::from_ltrb( + a.left().min(b.left()), + a.top().min(b.top()), + a.right().max(b.right()), + a.bottom().max(b.bottom()), + ) + .unwrap() +} + +fn quadpoints(rect: kg::Rect) -> [kg::Point; 4] { + [ + kg::Point::from_xy(rect.left(), rect.bottom()), + kg::Point::from_xy(rect.right(), rect.bottom()), + kg::Point::from_xy(rect.right(), rect.top()), + kg::Point::from_xy(rect.left(), rect.top()), + ] } fn pos_to_target(gc: &mut GlobalContext, pos: Position) -> Option { diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index d6415adeb..92d3bfe78 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,28 +1,43 @@ use std::cell::OnceCell; +use std::collections::HashMap; -use krilla::annotation::Annotation; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ ArtifactType, ContentTag, Identifier, Node, Tag, TagGroup, TagTree, }; -use typst_library::foundations::{Content, StyleChain}; +use typst_library::foundations::{Content, LinkMarker, StyleChain}; use typst_library::introspection::Location; -use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry}; +use typst_library::model::{ + Destination, HeadingElem, Outlinable, OutlineElem, OutlineEntry, +}; use typst_library::pdf::{ArtifactElem, ArtifactKind}; use crate::convert::GlobalContext; +use crate::link::LinkAnnotation; pub(crate) struct Tags { /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec<(Location, Tag, Vec)>, + pub(crate) stack: Vec, + /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. pub(crate) placeholders: Vec>, pub(crate) in_artifact: Option<(Location, ArtifactKind)>, + pub(crate) link_id: LinkId, /// The output. pub(crate) tree: Vec, } +pub(crate) struct StackEntry { + pub(crate) loc: Location, + pub(crate) link_id: Option, + /// A list of tags that are wrapped around this tag when it is inserted into + /// the tag tree. + pub(crate) wrappers: Vec, + pub(crate) tag: Tag, + pub(crate) nodes: Vec, +} + pub(crate) enum TagNode { Group(Tag, Vec), Leaf(Identifier), @@ -31,6 +46,9 @@ pub(crate) enum TagNode { Placeholder(Placeholder), } +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct LinkId(u32); + #[derive(Clone, Copy)] pub(crate) struct Placeholder(usize); @@ -42,6 +60,7 @@ impl Tags { in_artifact: None, tree: Vec::new(), + link_id: LinkId(0), } } @@ -64,14 +83,20 @@ impl Tags { .expect("initialized placeholder node") } - pub(crate) fn push(&mut self, node: TagNode) { - if let Some((_, _, nodes)) = self.stack.last_mut() { - nodes.push(node); + /// Returns the current parent's list of children and the structure type ([Tag]). + /// In case of the document root the structure type will be `None`. + pub(crate) fn parent(&mut self) -> (Option<&mut Tag>, &mut Vec) { + if let Some(entry) = self.stack.last_mut() { + (Some(&mut entry.tag), &mut entry.nodes) } else { - self.tree.push(node); + (None, &mut self.tree) } } + pub(crate) fn push(&mut self, node: TagNode) { + self.parent().1.push(node); + } + pub(crate) fn build_tree(&mut self) -> TagTree { let mut tree = TagTree::new(); let nodes = std::mem::take(&mut self.tree); @@ -98,73 +123,26 @@ impl Tags { } } - /// Returns the current parent's list of children and whether it is the tree root. - fn parent_nodes(&mut self) -> (bool, &mut Vec) { - if let Some((_, _, parent_nodes)) = self.stack.last_mut() { - (false, parent_nodes) - } else { - (true, &mut self.tree) - } + fn context_supports(&self, _tag: &Tag) -> bool { + // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ + true } - fn context_supports(&self, tag: &Tag) -> bool { - let Some((_, parent, _)) = self.stack.last() else { return true }; - - use Tag::*; - - match parent { - Part => true, - Article => !matches!(tag, Article), - Section => true, - BlockQuote => todo!(), - Caption => todo!(), - TOC => matches!(tag, TOC | TOCI), - // TODO: NonStruct is allowed to but (currently?) not supported by krilla - TOCI => matches!(tag, TOC | Lbl | Reference | P), - Index => todo!(), - P => todo!(), - H1(_) => todo!(), - H2(_) => todo!(), - H3(_) => todo!(), - H4(_) => todo!(), - H5(_) => todo!(), - H6(_) => todo!(), - L(_list_numbering) => todo!(), - LI => todo!(), - Lbl => todo!(), - LBody => todo!(), - Table => todo!(), - TR => todo!(), - TH(_table_header_scope) => todo!(), - TD => todo!(), - THead => todo!(), - TBody => todo!(), - TFoot => todo!(), - InlineQuote => todo!(), - Note => todo!(), - Reference => todo!(), - BibEntry => todo!(), - Code => todo!(), - Link => todo!(), - Annot => todo!(), - Figure(_) => todo!(), - Formula(_) => todo!(), - Datetime => todo!(), - Terms => todo!(), - Title => todo!(), - } + fn next_link_id(&mut self) -> LinkId { + self.link_id.0 += 1; + self.link_id } } /// Marked-content may not cross page boundaries: restart tag that was still open /// at the end of the last page. -pub(crate) fn restart(gc: &mut GlobalContext, surface: &mut Surface) { +pub(crate) fn restart_open(gc: &mut GlobalContext, surface: &mut Surface) { // TODO: somehow avoid empty marked-content sequences if let Some((_, kind)) = gc.tags.in_artifact { start_artifact(gc, surface, kind); - } else if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { + } else if let Some(entry) = gc.tags.stack.last_mut() { let id = surface.start_tagged(ContentTag::Other); - nodes.push(TagNode::Leaf(id)); + entry.nodes.push(TagNode::Leaf(id)); } } @@ -179,11 +157,16 @@ pub(crate) fn end_open(gc: &mut GlobalContext, surface: &mut Surface) { pub(crate) fn add_annotations( gc: &mut GlobalContext, page: &mut Page, - annotations: Vec<(Placeholder, Annotation)>, + annotations: HashMap, ) { - for (placeholder, annotation) in annotations { - let annotation_id = page.add_tagged_annotation(annotation); - gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id)); + for annotation in annotations.into_values() { + let LinkAnnotation { placeholder, alt, rect, quad_points, target } = annotation; + let annot = krilla::annotation::Annotation::new_link( + krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), + alt, + ); + let annot_id = page.add_tagged_annotation(annot); + gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); } } @@ -209,8 +192,10 @@ pub(crate) fn handle_start( return; } + let mut link_id = None; + let mut wrappers = Vec::new(); let tag = if let Some(heading) = elem.to_packed::() { - let level = heading.resolve_level(StyleChain::default()); + let level = heading.level(); let name = heading.body.plain_text().to_string(); match level.get() { 1 => Tag::H1(Some(name)), @@ -223,8 +208,14 @@ pub(crate) fn handle_start( } } else if let Some(_) = elem.to_packed::() { Tag::TOC - } else if let Some(_outline_entry) = elem.to_packed::() { + } else if let Some(_) = elem.to_packed::() { Tag::TOCI + } else if let Some(link) = elem.to_packed::() { + link_id = Some(gc.tags.next_link_id()); + if let Destination::Position(_) | Destination::Location(_) = link.dest { + wrappers.push(Tag::Reference); + } + Tag::Link } else { return; }; @@ -234,35 +225,43 @@ pub(crate) fn handle_start( } // close previous marked-content and open a nested tag. - if !gc.tags.stack.is_empty() { - surface.end_tagged(); - } + end_open(gc, surface); let id = surface.start_tagged(krilla::tagging::ContentTag::Other); - gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(id)])); + gc.tags.stack.push(StackEntry { + loc, + link_id, + wrappers, + tag, + nodes: vec![TagNode::Leaf(id)], + }); } -pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: &Location) { - if let Some((l, _)) = &gc.tags.in_artifact { +pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Location) { + if let Some((l, _)) = gc.tags.in_artifact { if l == loc { gc.tags.in_artifact = None; surface.end_tagged(); - if let Some((_, _, nodes)) = gc.tags.stack.last_mut() { + if let Some(entry) = gc.tags.stack.last_mut() { let id = surface.start_tagged(ContentTag::Other); - nodes.push(TagNode::Leaf(id)); + entry.nodes.push(TagNode::Leaf(id)); } } return; } - let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else { + let Some(entry) = gc.tags.stack.pop_if(|e| e.loc == loc) else { return; }; surface.end_tagged(); - let (is_root, parent_nodes) = gc.tags.parent_nodes(); - parent_nodes.push(TagNode::Group(tag, nodes)); - if !is_root { + let (parent_tag, parent_nodes) = gc.tags.parent(); + let mut node = TagNode::Group(entry.tag, entry.nodes); + for tag in entry.wrappers { + node = TagNode::Group(tag, vec![node]); + } + parent_nodes.push(node); + if parent_tag.is_some() { // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(ContentTag::Other); parent_nodes.push(TagNode::Leaf(id)); @@ -273,8 +272,7 @@ fn start_artifact(gc: &mut GlobalContext, surface: &mut Surface, kind: ArtifactK let ty = artifact_type(kind); let id = surface.start_tagged(ContentTag::Artifact(ty)); - let (_, parent_nodes) = gc.tags.parent_nodes(); - parent_nodes.push(TagNode::Leaf(id)); + gc.tags.push(TagNode::Leaf(id)); } fn artifact_type(kind: ArtifactKind) -> ArtifactType { diff --git a/tests/src/run.rs b/tests/src/run.rs index ce507d3c5..1d93ba392 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -535,7 +535,7 @@ fn render_links(canvas: &mut sk::Pixmap, ts: sk::Transform, frame: &Frame) { let ts = ts.pre_concat(to_sk_transform(&group.transform)); render_links(canvas, ts, &group.frame); } - FrameItem::Link(_, _, size) => { + FrameItem::Link(_, size) => { let w = size.x.to_pt() as f32; let h = size.y.to_pt() as f32; let rect = sk::Rect::from_xywh(0.0, 0.0, w, h).unwrap(); From 00c3b62f1d01ffe2e7d827114ec866ca862a52cd Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 20 Jun 2025 15:59:43 +0200 Subject: [PATCH 09/76] feat: write tags for more elements --- crates/typst-library/src/layout/repeat.rs | 1 + crates/typst-library/src/model/outline.rs | 1 + crates/typst-library/src/pdf/accessibility.rs | 2 +- crates/typst-pdf/src/tags.rs | 28 ++++++++++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/crates/typst-library/src/layout/repeat.rs b/crates/typst-library/src/layout/repeat.rs index ab042ceb1..ffc149bb2 100644 --- a/crates/typst-library/src/layout/repeat.rs +++ b/crates/typst-library/src/layout/repeat.rs @@ -25,6 +25,7 @@ use crate::layout::{BlockElem, Length}; /// Berlin, the 22nd of December, 2022 /// ] /// ``` +// TODO: should this be a PDF artifact by deafult? #[elem(Locatable, Show)] pub struct RepeatElem { /// The content to repeat. diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index e2c8650ca..9db263be1 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -418,6 +418,7 @@ impl Show for Packed { let context = Context::new(None, Some(styles)); let context = context.track(); + // TODO: prefix should be wrapped in a `Lbl` structure element let prefix = self.prefix(engine, context, span)?; let body = self.body().at(span)?; let page = self.page(engine, context, span)?; diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 586e2cbb1..a0a0bb950 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -47,7 +47,7 @@ cast! { } impl Show for Packed { - #[typst_macros::time(name = "underline", span = self.span())] + #[typst_macros::time(name = "pdf.artifact", span = self.span())] fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { Ok(self.body.clone()) } diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 92d3bfe78..28dc9ddec 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -9,9 +9,11 @@ use krilla::tagging::{ use typst_library::foundations::{Content, LinkMarker, StyleChain}; use typst_library::introspection::Location; use typst_library::model::{ - Destination, HeadingElem, Outlinable, OutlineElem, OutlineEntry, + Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, + OutlineEntry, }; use typst_library::pdf::{ArtifactElem, ArtifactKind}; +use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; use crate::link::LinkAnnotation; @@ -210,6 +212,30 @@ pub(crate) fn handle_start( Tag::TOC } else if let Some(_) = elem.to_packed::() { Tag::TOCI + } else if let Some(_) = elem.to_packed::() { + let alt = None; // TODO + Tag::Figure(alt) + } else if let Some(image) = elem.to_packed::() { + let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); + + end_open(gc, surface); + let id = surface.start_tagged(ContentTag::Other); + let mut node = TagNode::Leaf(id); + + if let Some(Tag::Figure(alt_text)) = gc.tags.parent().0 { + // HACK: set alt text of outer figure tag, if the contained image + // has alt text specified + if alt_text.is_none() { + *alt_text = alt; + } + } else { + node = TagNode::Group(Tag::Figure(alt), vec![node]); + } + gc.tags.push(node); + + return; + } else if let Some(_) = elem.to_packed::() { + Tag::Caption } else if let Some(link) = elem.to_packed::() { link_id = Some(gc.tags.next_link_id()); if let Destination::Position(_) | Destination::Location(_) = link.dest { From ac6b9d60088ba12114e22806616c75e06b77b786 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 20 Jun 2025 16:01:04 +0200 Subject: [PATCH 10/76] feat: pdf.tag function to manually create pdf tags --- crates/typst-library/src/pdf/accessibility.rs | 183 +++++++++++++++++- crates/typst-library/src/pdf/mod.rs | 1 + crates/typst-pdf/src/tags.rs | 10 +- 3 files changed, 191 insertions(+), 3 deletions(-) diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index a0a0bb950..f5210476d 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -1,3 +1,4 @@ +use ecow::EcoString; use typst_macros::{cast, elem}; use crate::diag::SourceResult; @@ -6,9 +7,189 @@ use crate::foundations::{Content, Packed, Show, StyleChain}; use crate::introspection::Locatable; // TODO: docs +#[elem(Locatable, Show)] +pub struct PdfTagElem { + #[default(PdfTagKind::NonStruct)] + pub kind: PdfTagKind, + + /// An alternate description. + pub alt: Option, + /// Exact replacement for this structure element and its children. + pub actual_text: Option, + /// The expanded form of an abbreviation/acronym. + pub expansion: Option, + + /// The content to underline. + #[required] + pub body: Content, +} + +impl Show for Packed { + #[typst_macros::time(name = "pdf.tag", span = self.span())] + fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { + Ok(self.body.clone()) + } +} + +// TODO: docs +/// PDF structure elements +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum PdfTagKind { + // grouping elements + /// (Part) + Part, + /// (Article) + Art, + /// (Section) + Sect, + /// (Division) + Div, + /// (Block quotation) + BlockQuote, + /// (Caption) + Caption, + /// (Table of contents) + TOC, + /// (Table of contents item) + TOCI, + /// (Index) + Index, + /// (Nonstructural element) + NonStruct, + /// (Private element) + Private, + + // paragraph like elements + /// (Heading) + H { title: Option }, + /// (Heading level 1) + H1 { title: Option }, + /// (Heading level 2) + H2 { title: Option }, + /// (Heading level 3) + H4 { title: Option }, + /// (Heading level 4) + H3 { title: Option }, + /// (Heading level 5) + H5 { title: Option }, + /// (Heading level 6) + H6 { title: Option }, + /// (Paragraph) + P, + + // list elements + /// (List) + L { numbering: ListNumbering }, + /// (List item) + LI, + /// (Label) + Lbl, + /// (List body) + LBody, + + // table elements + /// (Table) + Table, + /// (Table row) + TR, + /// (Table header) + TH { scope: TableHeaderScope }, + /// (Table data cell) + TD, + /// (Table header row group) + THead, + /// (Table body row group) + TBody, + /// (Table footer row group) + TFoot, + + // inline elements + /// (Span) + Span, + /// (Quotation) + Quote, + /// (Note) + Note, + /// (Reference) + Reference, + /// (Bibliography Entry) + BibEntry, + /// (Code) + Code, + /// (Link) + Link, + /// (Annotation) + Annot, + + /// (Ruby) + Ruby, + /// (Ruby base text) + RB, + /// (Ruby annotation text) + RT, + /// (Ruby punctuation) + RP, + + /// (Warichu) + Warichu, + /// (Warichu text) + WT, + /// (Warichu punctuation) + WP, + + /// (Figure) + Figure, + /// (Formula) + Formula, + /// (Form) + Form, +} + +cast! { + PdfTagKind, + self => match self { + PdfTagKind::Part => "part".into_value(), + _ => todo!(), + }, + "part" => Self::Part, + // TODO +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum ListNumbering { + /// No numbering. + None, + /// Solid circular bullets. + Disc, + /// Open circular bullets. + Circle, + /// Solid square bullets. + Square, + /// Decimal numbers. + Decimal, + /// Lowercase Roman numerals. + LowerRoman, + /// Uppercase Roman numerals. + UpperRoman, + /// Lowercase letters. + LowerAlpha, + /// Uppercase letters. + UpperAlpha, +} + +/// The scope of a table header cell. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum TableHeaderScope { + /// The header cell refers to the row. + Row, + /// The header cell refers to the column. + Column, + /// The header cell refers to both the row and the column. + Both, +} /// Mark content as a PDF artifact. -/// TODO: also use to mark html elements with `aria-hidden="true"`? +/// TODO: maybe generalize this and use it to mark html elements with `aria-hidden="true"`? #[elem(Locatable, Show)] pub struct ArtifactElem { #[default(ArtifactKind::Other)] diff --git a/crates/typst-library/src/pdf/mod.rs b/crates/typst-library/src/pdf/mod.rs index 835cc69fe..952e7fe32 100644 --- a/crates/typst-library/src/pdf/mod.rs +++ b/crates/typst-library/src/pdf/mod.rs @@ -13,6 +13,7 @@ pub fn module() -> Module { let mut pdf = Scope::deduplicating(); pdf.start_category(crate::Category::Pdf); pdf.define_elem::(); + pdf.define_elem::(); pdf.define_elem::(); Module::new("pdf", pdf) } diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 28dc9ddec..94219b1a1 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -12,7 +12,7 @@ use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, OutlineEntry, }; -use typst_library::pdf::{ArtifactElem, ArtifactKind}; +use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; @@ -196,7 +196,13 @@ pub(crate) fn handle_start( let mut link_id = None; let mut wrappers = Vec::new(); - let tag = if let Some(heading) = elem.to_packed::() { + let tag = if let Some(pdf_tag) = elem.to_packed::() { + let kind = pdf_tag.kind(StyleChain::default()); + match kind { + PdfTagKind::Part => Tag::Part, + _ => todo!(), + } + } else if let Some(heading) = elem.to_packed::() { let level = heading.level(); let name = heading.body.plain_text().to_string(); match level.get() { From 8075f551e269876339e6412730e7835029b70616 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 23 Jun 2025 18:42:13 +0200 Subject: [PATCH 11/76] feat: use local krilla version --- Cargo.lock | 2 - Cargo.toml | 4 +- crates/typst-pdf/src/convert.rs | 10 +++++ crates/typst-pdf/src/tags.rs | 66 ++++++++++++++++----------------- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 550c4141a..0ad90fb38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1367,7 +1367,6 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/LaurenzV/krilla?rev=20c14fe#20c14fefee5002566b3d6668b338bbe2168784e7" dependencies = [ "base64", "bumpalo", @@ -1395,7 +1394,6 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/LaurenzV/krilla?rev=20c14fe#20c14fefee5002566b3d6668b338bbe2168784e7" dependencies = [ "flate2", "fontdb", diff --git a/Cargo.toml b/Cargo.toml index 6cc59ee89..7029c389d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,8 +73,8 @@ image = { version = "0.25.5", default-features = false, features = ["png", "jpeg indexmap = { version = "2", features = ["serde"] } infer = { version = "0.19.0", default-features = false } kamadak-exif = "0.6" -krilla = { git = "https://github.com/LaurenzV/krilla", rev = "20c14fe", default-features = false, features = ["raster-images", "comemo", "rayon"] } -krilla-svg = { git = "https://github.com/LaurenzV/krilla", rev = "20c14fe" } +krilla = { path = "../krilla/crates/krilla", default-features = false, features = ["raster-images", "comemo", "rayon"] } +krilla-svg = { path = "../krilla/crates/krilla-svg" } kurbo = "0.11" libfuzzer-sys = "0.4" lipsum = "0.9" diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index abdb4c1dd..6f62cff58 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -588,6 +588,16 @@ fn convert_error( "{prefix} missing document date"; hint: "set the date of the document" ), + ValidationError::DuplicateTagId(_id, loc) => { + // TODO: display the id and better error message + let span = to_span(*loc); + error!(span, "{prefix} duplicate tag id") + } + ValidationError::UnknownHeaderTagId(_id, loc) => { + // TODO: display the id and better error message + let span = to_span(*loc); + error!(span, "{prefix} unknown header tag id") + } } } diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 94219b1a1..e36c15ef0 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -4,7 +4,8 @@ use std::collections::HashMap; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, Tag, TagGroup, TagTree, + ArtifactType, ContentTag, Identifier, Node, Tag, TagBuilder, TagGroup, TagKind, + TagTree, }; use typst_library::foundations::{Content, LinkMarker, StyleChain}; use typst_library::introspection::Location; @@ -100,25 +101,22 @@ impl Tags { } pub(crate) fn build_tree(&mut self) -> TagTree { - let mut tree = TagTree::new(); - let nodes = std::mem::take(&mut self.tree); - // PERF: collect into vec and construct TagTree directly from tag nodes. - for node in nodes.into_iter().map(|node| self.resolve_node(node)) { - tree.push(node); - } - tree + let children = std::mem::take(&mut self.tree) + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + TagTree::from(children) } /// Resolves [`Placeholder`] nodes. fn resolve_node(&mut self, node: TagNode) -> Node { match node { TagNode::Group(tag, nodes) => { - let mut group = TagGroup::new(tag); - // PERF: collect into vec and construct TagTree directly from tag nodes. - for node in nodes.into_iter().map(|node| self.resolve_node(node)) { - group.push(node); - } - Node::Group(group) + let children = nodes + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + Node::Group(TagGroup::with_children(tag, children)) } TagNode::Leaf(identifier) => Node::Leaf(identifier), TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), @@ -196,31 +194,31 @@ pub(crate) fn handle_start( let mut link_id = None; let mut wrappers = Vec::new(); - let tag = if let Some(pdf_tag) = elem.to_packed::() { + let tag: Tag = if let Some(pdf_tag) = elem.to_packed::() { let kind = pdf_tag.kind(StyleChain::default()); match kind { - PdfTagKind::Part => Tag::Part, + PdfTagKind::Part => TagKind::Part.into(), _ => todo!(), } } else if let Some(heading) = elem.to_packed::() { let level = heading.level(); let name = heading.body.plain_text().to_string(); match level.get() { - 1 => Tag::H1(Some(name)), - 2 => Tag::H2(Some(name)), - 3 => Tag::H3(Some(name)), - 4 => Tag::H4(Some(name)), - 5 => Tag::H5(Some(name)), + 1 => TagKind::H1(Some(name)).into(), + 2 => TagKind::H2(Some(name)).into(), + 3 => TagKind::H3(Some(name)).into(), + 4 => TagKind::H4(Some(name)).into(), + 5 => TagKind::H5(Some(name)).into(), // TODO: when targeting PDF 2.0 headings `> 6` are supported - _ => Tag::H6(Some(name)), + _ => TagKind::H6(Some(name)).into(), } } else if let Some(_) = elem.to_packed::() { - Tag::TOC + TagKind::TOC.into() } else if let Some(_) = elem.to_packed::() { - Tag::TOCI + TagKind::TOCI.into() } else if let Some(_) = elem.to_packed::() { let alt = None; // TODO - Tag::Figure(alt) + TagKind::Figure.with_alt_text(alt) } else if let Some(image) = elem.to_packed::() { let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); @@ -228,26 +226,26 @@ pub(crate) fn handle_start( let id = surface.start_tagged(ContentTag::Other); let mut node = TagNode::Leaf(id); - if let Some(Tag::Figure(alt_text)) = gc.tags.parent().0 { - // HACK: set alt text of outer figure tag, if the contained image - // has alt text specified - if alt_text.is_none() { - *alt_text = alt; + if let Some(parent) = gc.tags.parent().0 { + if parent.kind == TagKind::Figure && parent.alt_text.is_none() { + // HACK: set alt text of outer figure tag, if the contained image + // has alt text specified + parent.alt_text = alt; } } else { - node = TagNode::Group(Tag::Figure(alt), vec![node]); + node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); } gc.tags.push(node); return; } else if let Some(_) = elem.to_packed::() { - Tag::Caption + TagKind::Caption.into() } else if let Some(link) = elem.to_packed::() { link_id = Some(gc.tags.next_link_id()); if let Destination::Position(_) | Destination::Location(_) = link.dest { - wrappers.push(Tag::Reference); + wrappers.push(TagKind::Reference.into()); } - Tag::Link + TagKind::Link.into() } else { return; }; From 8231439b11d5543f3e63949c42003218d2677ec5 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 24 Jun 2025 12:45:42 +0200 Subject: [PATCH 12/76] feat: generate tags for tables --- crates/typst-library/src/model/table.rs | 4 + crates/typst-pdf/src/link.rs | 6 +- crates/typst-pdf/src/tags.rs | 199 ++++++++++++++++++------ 3 files changed, 162 insertions(+), 47 deletions(-) diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index 76ba500a2..a120423b3 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -1,6 +1,7 @@ use std::num::{NonZeroU32, NonZeroUsize}; use std::sync::Arc; +use ecow::EcoString; use typst_utils::NonZeroExt; use crate::diag::{bail, HintedStrResult, HintedString, SourceResult}; @@ -237,6 +238,9 @@ pub struct TableElem { #[default(Celled::Value(Sides::splat(Some(Abs::pt(5.0).into()))))] pub inset: Celled>>>, + // TODO: docs + pub summary: Option, + /// The contents of the table cells, plus any extra table lines specified /// with the [`table.hline`]($table.hline) and /// [`table.vline`]($table.vline) elements. diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index 2d360cfc3..d489df781 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -10,7 +10,7 @@ use typst_library::layout::{Abs, Point, Position, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; -use crate::tags::{Placeholder, TagNode}; +use crate::tags::{Placeholder, StackEntryKind, TagNode}; use crate::util::{AbsExt, PointExt}; pub(crate) struct LinkAnnotation { @@ -51,7 +51,9 @@ pub(crate) fn handle_link( }; let entry = gc.tags.stack.last_mut().expect("a link parent"); - let link_id = entry.link_id.expect("a link parent"); + let StackEntryKind::Link(link_id, _) = entry.kind else { + unreachable!("expected a link parent") + }; let rect = to_rect(fc, size); let quadpoints = quadpoints(rect); diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index e36c15ef0..0446eebce 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,17 +1,18 @@ use std::cell::OnceCell; use std::collections::HashMap; +use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, Tag, TagBuilder, TagGroup, TagKind, - TagTree, + ArtifactType, ContentTag, Identifier, Node, TableCellSpan, TableDataCell, + TableHeaderCell, TableHeaderScope, Tag, TagBuilder, TagGroup, TagKind, TagTree, }; -use typst_library::foundations::{Content, LinkMarker, StyleChain}; +use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, - OutlineEntry, + OutlineEntry, TableCell, TableElem, TableHLine, TableVLine, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; @@ -33,14 +34,71 @@ pub(crate) struct Tags { pub(crate) struct StackEntry { pub(crate) loc: Location, - pub(crate) link_id: Option, - /// A list of tags that are wrapped around this tag when it is inserted into - /// the tag tree. - pub(crate) wrappers: Vec, - pub(crate) tag: Tag, + pub(crate) kind: StackEntryKind, pub(crate) nodes: Vec, } +pub(crate) enum StackEntryKind { + Standard(Tag), + Link(LinkId, Packed), + Table(TableCtx), + TableCell(Packed), +} + +pub(crate) struct TableCtx { + table: Packed, + rows: Vec, Tag, Vec)>>>, +} + +impl TableCtx { + fn insert(&mut self, cell: Packed, nodes: Vec) { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let rowspan = cell.rowspan(StyleChain::default()).get(); + let colspan = cell.colspan(StyleChain::default()).get(); + + // TODO: possibly set internal field on TableCell when resolving + // the cell grid. + let is_header = false; + let span = TableCellSpan { rows: rowspan as i32, cols: colspan as i32 }; + let tag = if is_header { + let scope = TableHeaderScope::Column; // TODO + TagKind::TH(TableHeaderCell::new(scope).with_span(span)) + } else { + TagKind::TD(TableDataCell::new().with_span(span)) + }; + + let required_height = y + rowspan; + if self.rows.len() < required_height { + self.rows.resize_with(required_height, Vec::new); + } + + let required_width = x + colspan; + let row = &mut self.rows[y]; + if row.len() < required_width { + row.resize_with(required_width, || None); + } + + row[x] = Some((cell, tag.into(), nodes)); + } + + fn build_table(self, mut nodes: Vec) -> Vec { + // Table layouting ensures that there are no overlapping cells, and that + // any gaps left by the user are filled with empty cells. + for row in self.rows.into_iter() { + let mut row_nodes = Vec::new(); + for (_, tag, nodes) in row.into_iter().flatten() { + row_nodes.push(TagNode::Group(tag, nodes)); + } + + // TODO: generate `THead`, `TBody`, and `TFoot` + nodes.push(TagNode::Group(TagKind::TR.into(), row_nodes)); + } + + nodes + } +} + pub(crate) enum TagNode { Group(Tag, Vec), Leaf(Identifier), @@ -86,11 +144,15 @@ impl Tags { .expect("initialized placeholder node") } + pub(crate) fn is_root(&self) -> bool { + self.stack.is_empty() + } + /// Returns the current parent's list of children and the structure type ([Tag]). /// In case of the document root the structure type will be `None`. - pub(crate) fn parent(&mut self) -> (Option<&mut Tag>, &mut Vec) { + pub(crate) fn parent(&mut self) -> (Option<&mut StackEntryKind>, &mut Vec) { if let Some(entry) = self.stack.last_mut() { - (Some(&mut entry.tag), &mut entry.nodes) + (Some(&mut entry.kind), &mut entry.nodes) } else { (None, &mut self.tree) } @@ -123,7 +185,7 @@ impl Tags { } } - fn context_supports(&self, _tag: &Tag) -> bool { + fn context_supports(&self, _tag: &StackEntryKind) -> bool { // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ true } @@ -138,8 +200,8 @@ impl Tags { /// at the end of the last page. pub(crate) fn restart_open(gc: &mut GlobalContext, surface: &mut Surface) { // TODO: somehow avoid empty marked-content sequences - if let Some((_, kind)) = gc.tags.in_artifact { - start_artifact(gc, surface, kind); + if let Some((loc, kind)) = gc.tags.in_artifact { + start_artifact(gc, surface, loc, kind); } else if let Some(entry) = gc.tags.stack.last_mut() { let id = surface.start_tagged(ContentTag::Other); entry.nodes.push(TagNode::Leaf(id)); @@ -183,17 +245,12 @@ pub(crate) fn handle_start( let loc = elem.location().unwrap(); if let Some(artifact) = elem.to_packed::() { - if !gc.tags.stack.is_empty() { - surface.end_tagged(); - } + end_open(gc, surface); let kind = artifact.kind(StyleChain::default()); - start_artifact(gc, surface, kind); - gc.tags.in_artifact = Some((loc, kind)); + start_artifact(gc, surface, loc, kind); return; } - let mut link_id = None; - let mut wrappers = Vec::new(); let tag: Tag = if let Some(pdf_tag) = elem.to_packed::() { let kind = pdf_tag.kind(StyleChain::default()); match kind { @@ -226,11 +283,13 @@ pub(crate) fn handle_start( let id = surface.start_tagged(ContentTag::Other); let mut node = TagNode::Leaf(id); - if let Some(parent) = gc.tags.parent().0 { + if let Some(StackEntryKind::Standard(parent)) = gc.tags.parent().0 { if parent.kind == TagKind::Figure && parent.alt_text.is_none() { // HACK: set alt text of outer figure tag, if the contained image // has alt text specified parent.alt_text = alt; + } else { + node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); } } else { node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); @@ -241,29 +300,47 @@ pub(crate) fn handle_start( } else if let Some(_) = elem.to_packed::() { TagKind::Caption.into() } else if let Some(link) = elem.to_packed::() { - link_id = Some(gc.tags.next_link_id()); - if let Destination::Position(_) | Destination::Location(_) = link.dest { - wrappers.push(TagKind::Reference.into()); - } - TagKind::Link.into() + let link_id = gc.tags.next_link_id(); + push_stack(gc, surface, loc, StackEntryKind::Link(link_id, link.clone())); + return; + } else if let Some(table) = elem.to_packed::() { + let ctx = TableCtx { table: table.clone(), rows: Vec::new() }; + push_stack(gc, surface, loc, StackEntryKind::Table(ctx)); + return; + } else if let Some(cell) = elem.to_packed::() { + push_stack(gc, surface, loc, StackEntryKind::TableCell(cell.clone())); + return; + } else if let Some(_) = elem.to_packed::() { + end_open(gc, surface); + start_artifact(gc, surface, loc, ArtifactKind::Other); + return; + } else if let Some(_) = elem.to_packed::() { + end_open(gc, surface); + start_artifact(gc, surface, loc, ArtifactKind::Other); + return; } else { return; }; - if !gc.tags.context_supports(&tag) { + push_stack(gc, surface, loc, StackEntryKind::Standard(tag)); +} + +fn push_stack( + gc: &mut GlobalContext, + surface: &mut Surface, + loc: Location, + kind: StackEntryKind, +) { + if !gc.tags.context_supports(&kind) { // TODO: error or warning? } // close previous marked-content and open a nested tag. end_open(gc, surface); let id = surface.start_tagged(krilla::tagging::ContentTag::Other); - gc.tags.stack.push(StackEntry { - loc, - link_id, - wrappers, - tag, - nodes: vec![TagNode::Leaf(id)], - }); + gc.tags + .stack + .push(StackEntry { loc, kind, nodes: vec![TagNode::Leaf(id)] }); } pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Location) { @@ -285,24 +362,56 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc surface.end_tagged(); - let (parent_tag, parent_nodes) = gc.tags.parent(); - let mut node = TagNode::Group(entry.tag, entry.nodes); - for tag in entry.wrappers { - node = TagNode::Group(tag, vec![node]); - } - parent_nodes.push(node); - if parent_tag.is_some() { + let node = match entry.kind { + StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), + StackEntryKind::Link(_, link) => { + let alt = link.alt.as_ref().map(EcoString::to_string); + let tag = TagKind::Link.with_alt_text(alt); + let mut node = TagNode::Group(tag, entry.nodes); + // Wrap link in reference tag, if it's not a url. + if let Destination::Position(_) | Destination::Location(_) = link.dest { + node = TagNode::Group(TagKind::Reference.into(), vec![node]); + } + node + } + StackEntryKind::Table(ctx) => { + let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into); + let nodes = ctx.build_table(entry.nodes); + TagNode::Group(TagKind::Table(summary).into(), nodes) + } + StackEntryKind::TableCell(cell) => { + let parent = gc.tags.stack.last_mut().expect("table"); + let StackEntryKind::Table(table_ctx) = &mut parent.kind else { + unreachable!("expected table") + }; + + table_ctx.insert(cell, entry.nodes); + + // TODO: somehow avoid empty marked-content sequences + let id = surface.start_tagged(ContentTag::Other); + gc.tags.push(TagNode::Leaf(id)); + return; + } + }; + + gc.tags.push(node); + if !gc.tags.is_root() { // TODO: somehow avoid empty marked-content sequences let id = surface.start_tagged(ContentTag::Other); - parent_nodes.push(TagNode::Leaf(id)); + gc.tags.push(TagNode::Leaf(id)); } } -fn start_artifact(gc: &mut GlobalContext, surface: &mut Surface, kind: ArtifactKind) { +fn start_artifact( + gc: &mut GlobalContext, + surface: &mut Surface, + loc: Location, + kind: ArtifactKind, +) { let ty = artifact_type(kind); let id = surface.start_tagged(ContentTag::Artifact(ty)); - gc.tags.push(TagNode::Leaf(id)); + gc.tags.in_artifact = Some((loc, kind)); } fn artifact_type(kind: ArtifactKind) -> ArtifactType { From e6341c0fe495b03603a2139b3826ff6f21470f0b Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 14:51:25 +0200 Subject: [PATCH 13/76] fix: avoid empty marked-content sequences --- crates/typst-pdf/src/convert.rs | 20 ++--- crates/typst-pdf/src/image.rs | 3 + crates/typst-pdf/src/shape.rs | 5 +- crates/typst-pdf/src/tags.rs | 144 +++++++++++++------------------- crates/typst-pdf/src/text.rs | 5 +- 5 files changed, 77 insertions(+), 100 deletions(-) diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 6f62cff58..372fd2f3a 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -13,7 +13,7 @@ use krilla::{Document, SerializeSettings}; use krilla_svg::render_svg_glyph; use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult}; use typst_library::foundations::{NativeElement, Repr}; -use typst_library::introspection::{self, Location}; +use typst_library::introspection::{Location, Tag}; use typst_library::layout::{ Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform, }; @@ -110,8 +110,6 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul let mut surface = page.surface(); let mut fc = FrameContext::new(typst_page.frame.size()); - tags::restart_open(gc, &mut surface); - handle_frame( &mut fc, &typst_page.frame, @@ -120,8 +118,6 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul gc, )?; - tags::end_open(gc, &mut surface); - surface.finish(); tags::add_annotations(gc, &mut page, fc.link_annotations); @@ -286,12 +282,8 @@ pub(crate) fn handle_frame( handle_image(gc, fc, image, *size, surface, *span)? } FrameItem::Link(link, size) => handle_link(fc, gc, link, *size), - FrameItem::Tag(introspection::Tag::Start(elem)) => { - tags::handle_start(gc, surface, elem) - } - FrameItem::Tag(introspection::Tag::End(loc, _)) => { - tags::handle_end(gc, surface, *loc); - } + FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, elem), + FrameItem::Tag(Tag::End(loc, _)) => tags::handle_end(gc, *loc), } fc.pop(); @@ -306,7 +298,7 @@ pub(crate) fn handle_group( fc: &mut FrameContext, group: &GroupItem, surface: &mut Surface, - context: &mut GlobalContext, + gc: &mut GlobalContext, ) -> SourceResult<()> { fc.push(); fc.state_mut().pre_concat(group.transform); @@ -322,10 +314,12 @@ pub(crate) fn handle_group( .and_then(|p| p.transform(fc.state().transform.to_krilla())); if let Some(clip_path) = &clip_path { + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); surface.push_clip_path(clip_path, &krilla::paint::FillRule::NonZero); } - handle_frame(fc, &group.frame, None, surface, context)?; + handle_frame(fc, &group.frame, None, surface, gc)?; if clip_path.is_some() { surface.pop(); diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 0809ae046..41d0aa3d8 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -14,6 +14,7 @@ use typst_library::visualize::{ use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; +use crate::tags; use crate::util::{SizeExt, TransformExt}; #[typst_macros::time(name = "handle image")] @@ -32,6 +33,8 @@ pub(crate) fn handle_image( gc.image_spans.insert(span); + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); match image.kind() { ImageKind::Raster(raster) => { let (exif_transform, new_size) = exif_transform(raster, size); diff --git a/crates/typst-pdf/src/shape.rs b/crates/typst-pdf/src/shape.rs index 5b9232dbe..3b52939da 100644 --- a/crates/typst-pdf/src/shape.rs +++ b/crates/typst-pdf/src/shape.rs @@ -5,8 +5,8 @@ use typst_library::visualize::{Geometry, Shape}; use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; -use crate::paint; use crate::util::{convert_path, AbsExt, TransformExt}; +use crate::{paint, tags}; #[typst_macros::time(name = "handle shape")] pub(crate) fn handle_shape( @@ -16,6 +16,9 @@ pub(crate) fn handle_shape( gc: &mut GlobalContext, span: Span, ) -> SourceResult<()> { + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); + surface.set_location(span.into_raw().get()); surface.push_transform(&fc.state().transform().to_krilla()); diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 0446eebce..26776d090 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -45,6 +45,16 @@ pub(crate) enum StackEntryKind { TableCell(Packed), } +impl StackEntryKind { + pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { + if let Self::Standard(v) = self { + Some(v) + } else { + None + } + } +} + pub(crate) struct TableCtx { table: Packed, rows: Vec, Tag, Vec)>>>, @@ -144,10 +154,6 @@ impl Tags { .expect("initialized placeholder node") } - pub(crate) fn is_root(&self) -> bool { - self.stack.is_empty() - } - /// Returns the current parent's list of children and the structure type ([Tag]). /// In case of the document root the structure type will be `None`. pub(crate) fn parent(&mut self) -> (Option<&mut StackEntryKind>, &mut Vec) { @@ -196,25 +202,40 @@ impl Tags { } } -/// Marked-content may not cross page boundaries: restart tag that was still open -/// at the end of the last page. -pub(crate) fn restart_open(gc: &mut GlobalContext, surface: &mut Surface) { - // TODO: somehow avoid empty marked-content sequences - if let Some((loc, kind)) = gc.tags.in_artifact { - start_artifact(gc, surface, loc, kind); - } else if let Some(entry) = gc.tags.stack.last_mut() { - let id = surface.start_tagged(ContentTag::Other); - entry.nodes.push(TagNode::Leaf(id)); +/// Automatically calls [`Surface::end_tagged`] when dropped. +pub(crate) struct TagHandle<'a, 'b> { + surface: &'b mut Surface<'a>, +} + +impl Drop for TagHandle<'_, '_> { + fn drop(&mut self) { + self.surface.end_tagged(); } } -/// Marked-content may not cross page boundaries: end any open tag. -pub(crate) fn end_open(gc: &mut GlobalContext, surface: &mut Surface) { - if !gc.tags.stack.is_empty() || gc.tags.in_artifact.is_some() { - surface.end_tagged(); +impl<'a> TagHandle<'a, '_> { + pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { + &mut self.surface } } +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_marked<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, +) -> TagHandle<'a, 'b> { + let content = if let Some((_, kind)) = gc.tags.in_artifact { + let ty = artifact_type(kind); + ContentTag::Artifact(ty) + } else { + ContentTag::Other + }; + let id = surface.start_tagged(content); + gc.tags.push(TagNode::Leaf(id)); + TagHandle { surface } +} + /// Add all annotations that were found in the page frame. pub(crate) fn add_annotations( gc: &mut GlobalContext, @@ -232,11 +253,7 @@ pub(crate) fn add_annotations( } } -pub(crate) fn handle_start( - gc: &mut GlobalContext, - surface: &mut Surface, - elem: &Content, -) { +pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { if gc.tags.in_artifact.is_some() { // Don't nest artifacts return; @@ -245,9 +262,8 @@ pub(crate) fn handle_start( let loc = elem.location().unwrap(); if let Some(artifact) = elem.to_packed::() { - end_open(gc, surface); let kind = artifact.kind(StyleChain::default()); - start_artifact(gc, surface, loc, kind); + start_artifact(gc, loc, kind); return; } @@ -279,79 +295,55 @@ pub(crate) fn handle_start( } else if let Some(image) = elem.to_packed::() { let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); - end_open(gc, surface); - let id = surface.start_tagged(ContentTag::Other); - let mut node = TagNode::Leaf(id); - - if let Some(StackEntryKind::Standard(parent)) = gc.tags.parent().0 { - if parent.kind == TagKind::Figure && parent.alt_text.is_none() { - // HACK: set alt text of outer figure tag, if the contained image - // has alt text specified - parent.alt_text = alt; - } else { - node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); - } + let figure_tag = (gc.tags.parent().0) + .and_then(|parent| parent.as_standard_mut()) + .filter(|tag| tag.kind == TagKind::Figure && tag.alt_text.is_none()); + if let Some(figure_tag) = figure_tag { + // HACK: set alt text of outer figure tag, if the contained image + // has alt text specified + figure_tag.alt_text = alt; + return; } else { - node = TagNode::Group(TagKind::Figure.with_alt_text(alt), vec![node]); + TagKind::Figure.with_alt_text(alt) } - gc.tags.push(node); - - return; } else if let Some(_) = elem.to_packed::() { TagKind::Caption.into() } else if let Some(link) = elem.to_packed::() { let link_id = gc.tags.next_link_id(); - push_stack(gc, surface, loc, StackEntryKind::Link(link_id, link.clone())); + push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); return; } else if let Some(table) = elem.to_packed::() { let ctx = TableCtx { table: table.clone(), rows: Vec::new() }; - push_stack(gc, surface, loc, StackEntryKind::Table(ctx)); + push_stack(gc, loc, StackEntryKind::Table(ctx)); return; } else if let Some(cell) = elem.to_packed::() { - push_stack(gc, surface, loc, StackEntryKind::TableCell(cell.clone())); + push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); return; } else if let Some(_) = elem.to_packed::() { - end_open(gc, surface); - start_artifact(gc, surface, loc, ArtifactKind::Other); + start_artifact(gc, loc, ArtifactKind::Other); return; } else if let Some(_) = elem.to_packed::() { - end_open(gc, surface); - start_artifact(gc, surface, loc, ArtifactKind::Other); + start_artifact(gc, loc, ArtifactKind::Other); return; } else { return; }; - push_stack(gc, surface, loc, StackEntryKind::Standard(tag)); + push_stack(gc, loc, StackEntryKind::Standard(tag)); } -fn push_stack( - gc: &mut GlobalContext, - surface: &mut Surface, - loc: Location, - kind: StackEntryKind, -) { +fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) { if !gc.tags.context_supports(&kind) { // TODO: error or warning? } - // close previous marked-content and open a nested tag. - end_open(gc, surface); - let id = surface.start_tagged(krilla::tagging::ContentTag::Other); - gc.tags - .stack - .push(StackEntry { loc, kind, nodes: vec![TagNode::Leaf(id)] }); + gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); } -pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Location) { +pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { if let Some((l, _)) = gc.tags.in_artifact { if l == loc { gc.tags.in_artifact = None; - surface.end_tagged(); - if let Some(entry) = gc.tags.stack.last_mut() { - let id = surface.start_tagged(ContentTag::Other); - entry.nodes.push(TagNode::Leaf(id)); - } } return; } @@ -360,8 +352,6 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc return; }; - surface.end_tagged(); - let node = match entry.kind { StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), StackEntryKind::Link(_, link) => { @@ -387,30 +377,14 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc table_ctx.insert(cell, entry.nodes); - // TODO: somehow avoid empty marked-content sequences - let id = surface.start_tagged(ContentTag::Other); - gc.tags.push(TagNode::Leaf(id)); return; } }; gc.tags.push(node); - if !gc.tags.is_root() { - // TODO: somehow avoid empty marked-content sequences - let id = surface.start_tagged(ContentTag::Other); - gc.tags.push(TagNode::Leaf(id)); - } } -fn start_artifact( - gc: &mut GlobalContext, - surface: &mut Surface, - loc: Location, - kind: ArtifactKind, -) { - let ty = artifact_type(kind); - let id = surface.start_tagged(ContentTag::Artifact(ty)); - gc.tags.push(TagNode::Leaf(id)); +fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { gc.tags.in_artifact = Some((loc, kind)); } diff --git a/crates/typst-pdf/src/text.rs b/crates/typst-pdf/src/text.rs index 9876927d0..9a12de969 100644 --- a/crates/typst-pdf/src/text.rs +++ b/crates/typst-pdf/src/text.rs @@ -11,8 +11,8 @@ use typst_library::visualize::FillRule; use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; -use crate::paint; use crate::util::{display_font, AbsExt, TransformExt}; +use crate::{paint, tags}; #[typst_macros::time(name = "handle text")] pub(crate) fn handle_text( @@ -23,6 +23,9 @@ pub(crate) fn handle_text( ) -> SourceResult<()> { *gc.languages.entry(t.lang).or_insert(0) += t.glyphs.len(); + let mut handle = tags::start_marked(gc, surface); + let surface = handle.surface(); + let font = convert_font(gc, t.font.clone())?; let fill = paint::convert_fill( gc, From 2d6e3b615109c9949f1268ad87b291380e35a708 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 14:54:16 +0200 Subject: [PATCH 14/76] refactor: derive(Cast) for ArtifactKind --- crates/typst-library/src/pdf/accessibility.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index f5210476d..086a172bf 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -1,5 +1,5 @@ use ecow::EcoString; -use typst_macros::{cast, elem}; +use typst_macros::{cast, elem, Cast}; use crate::diag::SourceResult; use crate::engine::Engine; @@ -200,7 +200,7 @@ pub struct ArtifactElem { pub body: Content, } -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Cast)] pub enum ArtifactKind { /// Page header artifacts. Header, @@ -213,20 +213,6 @@ pub enum ArtifactKind { Other, } -cast! { - ArtifactKind, - self => match self { - ArtifactKind::Header => "header".into_value(), - ArtifactKind::Footer => "footer".into_value(), - ArtifactKind::Page => "page".into_value(), - ArtifactKind::Other => "other".into_value(), - }, - "header" => Self::Header, - "footer" => Self::Footer, - "page" => Self::Page, - "other" => Self::Other, -} - impl Show for Packed { #[typst_macros::time(name = "pdf.artifact", span = self.span())] fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { From 4894a227d2fa96e88237f651ea34a2ca00d781ed Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 15:08:08 +0200 Subject: [PATCH 15/76] refactor: revert some changes to FrameItem::Link --- crates/typst-ide/src/jump.rs | 4 ++-- crates/typst-layout/src/modifiers.rs | 17 ++++++++--------- crates/typst-library/src/foundations/content.rs | 12 +++--------- crates/typst-library/src/layout/frame.rs | 7 ++++--- crates/typst-library/src/model/link.rs | 5 +++++ crates/typst-pdf/src/convert.rs | 2 +- crates/typst-pdf/src/link.rs | 12 ++++++------ 7 files changed, 29 insertions(+), 30 deletions(-) diff --git a/crates/typst-ide/src/jump.rs b/crates/typst-ide/src/jump.rs index 1c66cb785..b29bc4a48 100644 --- a/crates/typst-ide/src/jump.rs +++ b/crates/typst-ide/src/jump.rs @@ -36,9 +36,9 @@ pub fn jump_from_click( ) -> Option { // Try to find a link first. for (pos, item) in frame.items() { - if let FrameItem::Link(link, size) = item { + if let FrameItem::Link(dest, size) = item { if is_in_rect(*pos, *size, click) { - return Some(match &link.dest { + return Some(match dest { Destination::Url(url) => Jump::Url(url.clone()), Destination::Position(pos) => Jump::Position(*pos), Destination::Location(loc) => { diff --git a/crates/typst-layout/src/modifiers.rs b/crates/typst-layout/src/modifiers.rs index 00d42e42c..b0371d63e 100644 --- a/crates/typst-layout/src/modifiers.rs +++ b/crates/typst-layout/src/modifiers.rs @@ -1,6 +1,6 @@ -use typst_library::foundations::{LinkMarker, Packed, StyleChain}; +use typst_library::foundations::StyleChain; use typst_library::layout::{Abs, Fragment, Frame, FrameItem, HideElem, Point, Sides}; -use typst_library::model::ParElem; +use typst_library::model::{Destination, LinkElem, ParElem}; /// Frame-level modifications resulting from styles that do not impose any /// layout structure. @@ -20,7 +20,7 @@ use typst_library::model::ParElem; #[derive(Debug, Clone)] pub struct FrameModifiers { /// A destination to link to. - link: Option>, + dest: Option, /// Whether the contents of the frame should be hidden. hidden: bool, } @@ -28,9 +28,8 @@ pub struct FrameModifiers { impl FrameModifiers { /// Retrieve all modifications that should be applied per-frame. pub fn get_in(styles: StyleChain) -> Self { - // TODO: maybe verify that an alt text was provided here Self { - link: LinkMarker::current_in(styles), + dest: LinkElem::current_in(styles), hidden: HideElem::hidden_in(styles), } } @@ -95,7 +94,7 @@ fn modify_frame( modifiers: &FrameModifiers, link_box_outset: Option>, ) { - if let Some(link) = &modifiers.link { + if let Some(dest) = &modifiers.dest { let mut pos = Point::zero(); let mut size = frame.size(); if let Some(outset) = link_box_outset { @@ -103,7 +102,7 @@ fn modify_frame( pos.x -= outset.left; size += outset.sum_by_axis(); } - frame.push(pos, FrameItem::Link(link.clone(), size)); + frame.push(pos, FrameItem::Link(dest.clone(), size)); } if modifiers.hidden { @@ -130,8 +129,8 @@ where let reset; let outer = styles; let mut styles = styles; - if modifiers.link.is_some() { - reset = LinkMarker::set_current(None).wrap(); + if modifiers.dest.is_some() { + reset = LinkElem::set_current(None).wrap(); styles = outer.chain(&reset); } diff --git a/crates/typst-library/src/foundations/content.rs b/crates/typst-library/src/foundations/content.rs index 518deca75..5ecf514a2 100644 --- a/crates/typst-library/src/foundations/content.rs +++ b/crates/typst-library/src/foundations/content.rs @@ -21,7 +21,7 @@ use crate::foundations::{ }; use crate::introspection::{Locatable, Location}; use crate::layout::{AlignElem, Alignment, Axes, Length, MoveElem, PadElem, Rel, Sides}; -use crate::model::{Destination, EmphElem, StrongElem}; +use crate::model::{Destination, EmphElem, LinkElem, StrongElem}; use crate::pdf::{ArtifactElem, ArtifactKind}; use crate::text::UnderlineElem; @@ -506,11 +506,10 @@ impl Content { /// Link the content somewhere. pub fn linked(self, dest: Destination, alt: Option) -> Self { let span = self.span(); - let link = Packed::new(LinkMarker::new(self, dest, alt)); - link.clone() + LinkMarker::new(self, dest.clone(), alt) .pack() .spanned(span) - .styled(LinkMarker::set_current(Some(link))) + .styled(LinkElem::set_current(Some(dest))) } /// Set alignments for this content. @@ -1002,11 +1001,6 @@ pub struct LinkMarker { pub dest: Destination, #[required] pub alt: Option, - - /// A link style that should be applied to elements. - #[internal] - #[ghost] - pub current: Option>, } impl Show for Packed { diff --git a/crates/typst-library/src/layout/frame.rs b/crates/typst-library/src/layout/frame.rs index 71bb9aa1b..a26a7d0ef 100644 --- a/crates/typst-library/src/layout/frame.rs +++ b/crates/typst-library/src/layout/frame.rs @@ -7,9 +7,10 @@ use std::sync::Arc; use typst_syntax::Span; use typst_utils::{LazyHash, Numeric}; -use crate::foundations::{cast, dict, Dict, Label, LinkMarker, Packed, Value}; +use crate::foundations::{cast, dict, Dict, Label, Value}; use crate::introspection::{Location, Tag}; use crate::layout::{Abs, Axes, FixedAlignment, Length, Point, Size, Transform}; +use crate::model::Destination; use crate::text::TextItem; use crate::visualize::{Color, Curve, FixedStroke, Geometry, Image, Paint, Shape}; @@ -472,7 +473,7 @@ pub enum FrameItem { /// An image and its size. Image(Image, Size, Span), /// An internal or external link to a destination. - Link(Packed, Size), + Link(Destination, Size), /// An introspectable element that produced something within this frame. Tag(Tag), } @@ -484,7 +485,7 @@ impl Debug for FrameItem { Self::Text(text) => write!(f, "{text:?}"), Self::Shape(shape, _) => write!(f, "{shape:?}"), Self::Image(image, _, _) => write!(f, "{image:?}"), - Self::Link(link, _) => write!(f, "Link({:?}, {:?})", link.dest, link.alt), + Self::Link(dest, _) => write!(f, "Link({dest:?})"), Self::Tag(tag) => write!(f, "{tag:?}"), } } diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index 34ce14877..f42032878 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -91,6 +91,11 @@ pub struct LinkElem { _ => args.expect("body")?, })] pub body: Content, + + /// A destination style that should be applied to elements. + #[internal] + #[ghost] + pub current: Option, } impl LinkElem { diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 372fd2f3a..38abc8fd1 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -281,7 +281,7 @@ pub(crate) fn handle_frame( FrameItem::Image(image, size, span) => { handle_image(gc, fc, image, *size, surface, *span)? } - FrameItem::Link(link, size) => handle_link(fc, gc, link, *size), + FrameItem::Link(dest, size) => handle_link(fc, gc, dest, *size), FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, elem), FrameItem::Tag(Tag::End(loc, _)) => tags::handle_end(gc, *loc), } diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index d489df781..e0df6a58d 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -5,7 +5,6 @@ use krilla::action::{Action, LinkAction}; use krilla::annotation::Target; use krilla::destination::XyzDestination; use krilla::geom as kg; -use typst_library::foundations::LinkMarker; use typst_library::layout::{Abs, Point, Position, Size}; use typst_library::model::Destination; @@ -24,10 +23,10 @@ pub(crate) struct LinkAnnotation { pub(crate) fn handle_link( fc: &mut FrameContext, gc: &mut GlobalContext, - link: &LinkMarker, + dest: &Destination, size: Size, ) { - let target = match &link.dest { + let target = match dest { Destination::Url(u) => { Target::Action(Action::Link(LinkAction::new(u.to_string()))) } @@ -51,14 +50,15 @@ pub(crate) fn handle_link( }; let entry = gc.tags.stack.last_mut().expect("a link parent"); - let StackEntryKind::Link(link_id, _) = entry.kind else { + let StackEntryKind::Link(link_id, link) = &entry.kind else { unreachable!("expected a link parent") }; + let alt = link.alt.as_ref().map(EcoString::to_string); let rect = to_rect(fc, size); let quadpoints = quadpoints(rect); - match fc.link_annotations.entry(link_id) { + match fc.link_annotations.entry(*link_id) { Entry::Occupied(occupied) => { // Update the bounding box and add the quadpoints of an existing link annotation. let annotation = occupied.into_mut(); @@ -73,7 +73,7 @@ pub(crate) fn handle_link( placeholder, rect, quad_points: quadpoints.to_vec(), - alt: link.alt.as_ref().map(EcoString::to_string), + alt, target, }); } From 0d35ae28ad84ba18114ed7ba4ec0bf620ae3d942 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 16:22:08 +0200 Subject: [PATCH 16/76] feat: add cli args for PDF/UA-1 standard and to disable tagging --- crates/typst-cli/src/args.rs | 10 ++++++++++ crates/typst-cli/src/compile.rs | 5 +++++ crates/typst-pdf/src/convert.rs | 11 ++++------- crates/typst-pdf/src/lib.rs | 9 +++++++++ 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/crates/typst-cli/src/args.rs b/crates/typst-cli/src/args.rs index c3fd541ad..cb708028e 100644 --- a/crates/typst-cli/src/args.rs +++ b/crates/typst-cli/src/args.rs @@ -246,6 +246,13 @@ pub struct CompileArgs { #[arg(long = "pdf-standard", value_delimiter = ',')] pub pdf_standard: Vec, + /// By default, even when not producing a `PDF/UA-1` document, a tagged PDF + /// document is written to provide a baseline of accessibility. In some + /// circumstances (for example when trying to reduce the size of a document) + /// it can be desirable to disable tagged PDF. + #[arg(long = "disable-pdf-tags")] + pub disable_pdf_tags: bool, + /// The PPI (pixels per inch) to use for PNG export. #[arg(long = "ppi", default_value_t = 144.0)] pub ppi: f32, @@ -506,6 +513,9 @@ pub enum PdfStandard { /// PDF/A-4e. #[value(name = "a-4e")] A_4e, + /// PDF/UA-1. + #[value(name = "ua-1")] + Ua_1, } display_possible_values!(PdfStandard); diff --git a/crates/typst-cli/src/compile.rs b/crates/typst-cli/src/compile.rs index 207bb7d09..8e420eecf 100644 --- a/crates/typst-cli/src/compile.rs +++ b/crates/typst-cli/src/compile.rs @@ -65,6 +65,8 @@ pub struct CompileConfig { pub open: Option>, /// A list of standards the PDF should conform to. pub pdf_standards: PdfStandards, + /// Whether to write PDF (accessibility) tags. + pub disable_pdf_tags: bool, /// A path to write a Makefile rule describing the current compilation. pub make_deps: Option, /// The PPI (pixels per inch) to use for PNG export. @@ -150,6 +152,7 @@ impl CompileConfig { output_format, pages, pdf_standards, + disable_pdf_tags: args.disable_pdf_tags, creation_timestamp: args.world.creation_timestamp, make_deps: args.make_deps.clone(), ppi: args.ppi, @@ -291,6 +294,7 @@ fn export_pdf(document: &PagedDocument, config: &CompileConfig) -> SourceResult< timestamp, page_ranges: config.pages.clone(), standards: config.pdf_standards.clone(), + disable_tags: config.disable_pdf_tags, }; let buffer = typst_pdf::pdf(document, &options)?; config @@ -773,6 +777,7 @@ impl From for typst_pdf::PdfStandard { PdfStandard::A_4 => typst_pdf::PdfStandard::A_4, PdfStandard::A_4f => typst_pdf::PdfStandard::A_4f, PdfStandard::A_4e => typst_pdf::PdfStandard::A_4e, + PdfStandard::Ua_1 => typst_pdf::PdfStandard::Ua_1, } } } diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 38abc8fd1..3f24494bb 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -39,17 +39,14 @@ pub fn convert( typst_document: &PagedDocument, options: &PdfOptions, ) -> SourceResult> { - // HACK - let config = Configuration::new_with_validator(Validator::UA1); let settings = SerializeSettings { - compress_content_streams: false, // true, + compress_content_streams: true, no_device_cs: true, - ascii_compatible: true, // false, + ascii_compatible: false, xmp_metadata: true, cmyk_profile: None, - configuration: config, // options.standards.config, - // TODO: allow opting out of tagging PDFs - enable_tagging: true, + configuration: options.standards.config, + enable_tagging: !options.disable_tags, render_svg_glyph_fn: render_svg_glyph, }; diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs index c3835d247..45805b07b 100644 --- a/crates/typst-pdf/src/lib.rs +++ b/crates/typst-pdf/src/lib.rs @@ -54,6 +54,11 @@ pub struct PdfOptions<'a> { pub page_ranges: Option, /// A list of PDF standards that Typst will enforce conformance with. pub standards: PdfStandards, + /// By default, even when not producing a `PDF/UA-1` document, a tagged PDF + /// document is written to provide a baseline of accessibility. In some + /// circumstances, for example when trying to reduce the size of a document, + /// it can be desirable to disable tagged PDF. + pub disable_tags: bool, } /// Encapsulates a list of compatible PDF standards. @@ -105,6 +110,7 @@ impl PdfStandards { PdfStandard::A_4 => set_validator(Validator::A4)?, PdfStandard::A_4f => set_validator(Validator::A4F)?, PdfStandard::A_4e => set_validator(Validator::A4E)?, + PdfStandard::Ua_1 => set_validator(Validator::UA1)?, } } @@ -188,4 +194,7 @@ pub enum PdfStandard { /// PDF/A-4e. #[serde(rename = "a-4e")] A_4e, + /// PDF/UA-1. + #[serde(rename = "ua-1")] + Ua_1, } From 5bd9accb9ce286ab94a115f68c371d930417cf3e Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 16:43:31 +0200 Subject: [PATCH 17/76] feat: always write alt text in marked content sequence for images --- crates/typst-pdf/src/image.rs | 4 ++- crates/typst-pdf/src/tags.rs | 50 ++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 41d0aa3d8..832d3e15c 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -4,6 +4,7 @@ use std::sync::{Arc, OnceLock}; use image::{DynamicImage, EncodableLayout, GenericImageView, Rgba}; use krilla::image::{BitsPerComponent, CustomImage, ImageColorspace}; use krilla::surface::Surface; +use krilla::tagging::SpanTag; use krilla_svg::{SurfaceExt, SvgSettings}; use typst_library::diag::{bail, SourceResult}; use typst_library::foundations::Smart; @@ -33,7 +34,8 @@ pub(crate) fn handle_image( gc.image_spans.insert(span); - let mut handle = tags::start_marked(gc, surface); + let mut handle = + tags::start_span(gc, surface, SpanTag::empty().with_alt_text(image.alt())); let surface = handle.surface(); match image.kind() { ImageKind::Raster(raster) => { diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 26776d090..8b27f7f58 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -5,7 +5,7 @@ use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, TableCellSpan, TableDataCell, + ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellSpan, TableDataCell, TableHeaderCell, TableHeaderScope, Tag, TagBuilder, TagGroup, TagKind, TagTree, }; use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; @@ -156,16 +156,16 @@ impl Tags { /// Returns the current parent's list of children and the structure type ([Tag]). /// In case of the document root the structure type will be `None`. - pub(crate) fn parent(&mut self) -> (Option<&mut StackEntryKind>, &mut Vec) { - if let Some(entry) = self.stack.last_mut() { - (Some(&mut entry.kind), &mut entry.nodes) - } else { - (None, &mut self.tree) - } + pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { + self.stack.last_mut().map(|e| &mut e.kind) } pub(crate) fn push(&mut self, node: TagNode) { - self.parent().1.push(node); + if let Some(entry) = self.stack.last_mut() { + entry.nodes.push(node); + } else { + self.tree.push(node); + } } pub(crate) fn build_tree(&mut self) -> TagTree { @@ -224,12 +224,30 @@ impl<'a> TagHandle<'a, '_> { pub(crate) fn start_marked<'a, 'b>( gc: &mut GlobalContext, surface: &'b mut Surface<'a>, +) -> TagHandle<'a, 'b> { + start_content(gc, surface, ContentTag::Other) +} + +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_span<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + span: SpanTag, +) -> TagHandle<'a, 'b> { + start_content(gc, surface, ContentTag::Span(span)) +} + +fn start_content<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + content: ContentTag, ) -> TagHandle<'a, 'b> { let content = if let Some((_, kind)) = gc.tags.in_artifact { let ty = artifact_type(kind); ContentTag::Artifact(ty) } else { - ContentTag::Other + content }; let id = surface.start_tagged(content); gc.tags.push(TagNode::Leaf(id)); @@ -295,13 +313,15 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { } else if let Some(image) = elem.to_packed::() { let alt = image.alt(StyleChain::default()).map(|s| s.to_string()); - let figure_tag = (gc.tags.parent().0) - .and_then(|parent| parent.as_standard_mut()) - .filter(|tag| tag.kind == TagKind::Figure && tag.alt_text.is_none()); + let figure_tag = (gc.tags.parent()) + .and_then(StackEntryKind::as_standard_mut) + .filter(|tag| tag.kind == TagKind::Figure); if let Some(figure_tag) = figure_tag { - // HACK: set alt text of outer figure tag, if the contained image - // has alt text specified - figure_tag.alt_text = alt; + if figure_tag.alt_text.is_none() { + // HACK: set alt text of outer figure tag, if the contained image + // has alt text specified + figure_tag.alt_text = alt; + } return; } else { TagKind::Figure.with_alt_text(alt) From 612aa8fc5375c77e671ceb99e5809833f38968f0 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 17:39:50 +0200 Subject: [PATCH 18/76] fix: mark table gutter and fill as artifacts --- crates/typst-pdf/src/tags.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 8b27f7f58..713163bbd 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -12,7 +12,7 @@ use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, - OutlineEntry, TableCell, TableElem, TableHLine, TableVLine, + OutlineEntry, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; @@ -246,6 +246,10 @@ fn start_content<'a, 'b>( let content = if let Some((_, kind)) = gc.tags.in_artifact { let ty = artifact_type(kind); ContentTag::Artifact(ty) + } else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) { + // Mark any direct child of a table as an aritfact. Any real content + // will be wrapped inside a `TableCell`. + ContentTag::Artifact(ArtifactType::Other) } else { content }; @@ -339,12 +343,6 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { } else if let Some(cell) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); return; - } else if let Some(_) = elem.to_packed::() { - start_artifact(gc, loc, ArtifactKind::Other); - return; - } else if let Some(_) = elem.to_packed::() { - start_artifact(gc, loc, ArtifactKind::Other); - return; } else { return; }; From 6717a184149e7a0af83fc3c5eda53d6a73308ea1 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 17:46:36 +0200 Subject: [PATCH 19/76] feat: mark RepeatElem as artifact --- crates/typst-library/src/layout/repeat.rs | 1 - crates/typst-pdf/src/tags.rs | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/typst-library/src/layout/repeat.rs b/crates/typst-library/src/layout/repeat.rs index ffc149bb2..ab042ceb1 100644 --- a/crates/typst-library/src/layout/repeat.rs +++ b/crates/typst-library/src/layout/repeat.rs @@ -25,7 +25,6 @@ use crate::layout::{BlockElem, Length}; /// Berlin, the 22nd of December, 2022 /// ] /// ``` -// TODO: should this be a PDF artifact by deafult? #[elem(Locatable, Show)] pub struct RepeatElem { /// The content to repeat. diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 713163bbd..4795edef8 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -10,6 +10,7 @@ use krilla::tagging::{ }; use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; +use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, OutlineEntry, TableCell, TableElem, @@ -287,6 +288,9 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { let kind = artifact.kind(StyleChain::default()); start_artifact(gc, loc, kind); return; + } else if let Some(_) = elem.to_packed::() { + start_artifact(gc, loc, ArtifactKind::Other); + return; } let tag: Tag = if let Some(pdf_tag) = elem.to_packed::() { From 09b2cd6de51610a44bdf9f000b74a83273d279c2 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 25 Jun 2025 17:47:05 +0200 Subject: [PATCH 20/76] docs: fixup some comments --- crates/typst-library/src/pdf/accessibility.rs | 3 ++- crates/typst-pdf/src/tags.rs | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 086a172bf..a5df131d6 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -192,10 +192,11 @@ pub enum TableHeaderScope { /// TODO: maybe generalize this and use it to mark html elements with `aria-hidden="true"`? #[elem(Locatable, Show)] pub struct ArtifactElem { + /// The artifact kind. #[default(ArtifactKind::Other)] pub kind: ArtifactKind, - /// The content to underline. + /// The content that is an artifact. #[required] pub body: Content, } diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 4795edef8..b03279a5b 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -325,9 +325,8 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { .and_then(StackEntryKind::as_standard_mut) .filter(|tag| tag.kind == TagKind::Figure); if let Some(figure_tag) = figure_tag { + // Set alt text of outer figure tag, if not present. if figure_tag.alt_text.is_none() { - // HACK: set alt text of outer figure tag, if the contained image - // has alt text specified figure_tag.alt_text = alt; } return; From d6307831dd78b4ac5c2d6498b2874387331ec36f Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 26 Jun 2025 17:11:08 +0200 Subject: [PATCH 21/76] feat: hierarchical outline tags --- crates/typst-pdf/src/tags.rs | 97 ++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index b03279a5b..942335ab8 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -42,6 +42,8 @@ pub(crate) struct StackEntry { pub(crate) enum StackEntryKind { Standard(Tag), Link(LinkId, Packed), + Outline(OutlineCtx), + OutlineEntry(Packed), Table(TableCtx), TableCell(Packed), } @@ -56,12 +58,82 @@ impl StackEntryKind { } } +pub(crate) struct OutlineCtx { + stack: Vec, +} + +pub(crate) struct OutlineSection { + entries: Vec, +} + +impl OutlineSection { + const fn new() -> Self { + OutlineSection { entries: Vec::new() } + } + + fn push(&mut self, entry: TagNode) { + self.entries.push(entry); + } + + fn into_tag(self) -> TagNode { + TagNode::Group(TagKind::TOC.into(), self.entries) + } +} + +impl OutlineCtx { + fn new() -> Self { + Self { stack: Vec::new() } + } + + fn insert( + &mut self, + outline_nodes: &mut Vec, + entry: Packed, + nodes: Vec, + ) { + let expected_len = entry.level.get() - 1; + if self.stack.len() < expected_len { + self.stack.resize_with(expected_len, || OutlineSection::new()); + } else { + while self.stack.len() > expected_len { + self.finish_section(outline_nodes); + } + } + + let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); + self.push(outline_nodes, section_entry); + } + + fn finish_section(&mut self, outline_nodes: &mut Vec) { + let sub_section = self.stack.pop().unwrap().into_tag(); + self.push(outline_nodes, sub_section); + } + + fn push(&mut self, outline_nodes: &mut Vec, entry: TagNode) { + match self.stack.last_mut() { + Some(section) => section.push(entry), + None => outline_nodes.push(entry), + } + } + + fn build_outline(mut self, mut outline_nodes: Vec) -> Vec { + while self.stack.len() > 0 { + self.finish_section(&mut outline_nodes); + } + outline_nodes + } +} + pub(crate) struct TableCtx { table: Packed, rows: Vec, Tag, Vec)>>>, } impl TableCtx { + fn new(table: Packed) -> Self { + Self { table: table.clone(), rows: Vec::new() } + } + fn insert(&mut self, cell: Packed, nodes: Vec) { let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); @@ -312,9 +384,11 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { _ => TagKind::H6(Some(name)).into(), } } else if let Some(_) = elem.to_packed::() { - TagKind::TOC.into() - } else if let Some(_) = elem.to_packed::() { - TagKind::TOCI.into() + push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new())); + return; + } else if let Some(entry) = elem.to_packed::() { + push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone())); + return; } else if let Some(_) = elem.to_packed::() { let alt = None; // TODO TagKind::Figure.with_alt_text(alt) @@ -340,8 +414,7 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); return; } else if let Some(table) = elem.to_packed::() { - let ctx = TableCtx { table: table.clone(), rows: Vec::new() }; - push_stack(gc, loc, StackEntryKind::Table(ctx)); + push_stack(gc, loc, StackEntryKind::Table(TableCtx::new(table.clone()))); return; } else if let Some(cell) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); @@ -385,6 +458,20 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { } node } + StackEntryKind::Outline(ctx) => { + let nodes = ctx.build_outline(entry.nodes); + TagNode::Group(TagKind::TOC.into(), nodes) + } + StackEntryKind::OutlineEntry(outline_entry) => { + let parent = gc.tags.stack.last_mut().expect("outline"); + let StackEntryKind::Outline(outline_ctx) = &mut parent.kind else { + unreachable!("expected outline") + }; + + outline_ctx.insert(&mut parent.nodes, outline_entry, entry.nodes); + + return; + } StackEntryKind::Table(ctx) => { let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into); let nodes = ctx.build_table(entry.nodes); From 76d09b567345c0eb8f51622064a998e4bcdde416 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 26 Jun 2025 17:38:25 +0200 Subject: [PATCH 22/76] fix: only use link annotation quadpoints when exporting a PDF/UA-1 document --- crates/typst-pdf/src/convert.rs | 18 ++++++++++++++++-- crates/typst-pdf/src/link.rs | 25 ++++++++++++++----------- crates/typst-pdf/src/tags.rs | 8 ++++---- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 3f24494bb..a8a7e88b2 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -171,14 +171,14 @@ impl State { /// Context needed for converting a single frame. pub(crate) struct FrameContext { states: Vec, - pub(crate) link_annotations: HashMap, + link_annotations: Vec, } impl FrameContext { pub(crate) fn new(size: Size) -> Self { Self { states: vec![State::new(size)], - link_annotations: HashMap::new(), + link_annotations: Vec::new(), } } @@ -197,6 +197,20 @@ impl FrameContext { pub(crate) fn state_mut(&mut self) -> &mut State { self.states.last_mut().unwrap() } + + pub(crate) fn get_link_annotation( + &mut self, + link_id: tags::LinkId, + ) -> Option<&mut LinkAnnotation> { + self.link_annotations + .iter_mut() + .rev() + .find(|annot| annot.id == link_id) + } + + pub(crate) fn push_link_annotation(&mut self, annotation: LinkAnnotation) { + self.link_annotations.push(annotation); + } } /// Globally needed context for converting a typst document. diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index e0df6a58d..32949068b 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -1,18 +1,18 @@ -use std::collections::hash_map::Entry; - use ecow::EcoString; use krilla::action::{Action, LinkAction}; use krilla::annotation::Target; +use krilla::configure::Validator; use krilla::destination::XyzDestination; use krilla::geom as kg; use typst_library::layout::{Abs, Point, Position, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; -use crate::tags::{Placeholder, StackEntryKind, TagNode}; +use crate::tags::{self, Placeholder, StackEntryKind, TagNode}; use crate::util::{AbsExt, PointExt}; pub(crate) struct LinkAnnotation { + pub(crate) id: tags::LinkId, pub(crate) placeholder: Placeholder, pub(crate) alt: Option, pub(crate) rect: kg::Rect, @@ -50,7 +50,7 @@ pub(crate) fn handle_link( }; let entry = gc.tags.stack.last_mut().expect("a link parent"); - let StackEntryKind::Link(link_id, link) = &entry.kind else { + let StackEntryKind::Link(link_id, ref link) = entry.kind else { unreachable!("expected a link parent") }; let alt = link.alt.as_ref().map(EcoString::to_string); @@ -58,18 +58,21 @@ pub(crate) fn handle_link( let rect = to_rect(fc, size); let quadpoints = quadpoints(rect); - match fc.link_annotations.entry(*link_id) { - Entry::Occupied(occupied) => { - // Update the bounding box and add the quadpoints of an existing link annotation. - let annotation = occupied.into_mut(); + // Unfortunately quadpoints still aren't well supported by most PDF readers, + // even by acrobat. Which is understandable since they were only introduced + // in PDF 1.6 (2005) /s + let should_use_quadpoints = gc.options.standards.config.validator() == Validator::UA1; + match fc.get_link_annotation(link_id) { + Some(annotation) if should_use_quadpoints => { + // Update the bounding box and add the quadpoints to an existing link annotation. annotation.rect = bounding_rect(annotation.rect, rect); annotation.quad_points.extend_from_slice(&quadpoints); } - Entry::Vacant(vacant) => { + _ => { let placeholder = gc.tags.reserve_placeholder(); gc.tags.push(TagNode::Placeholder(placeholder)); - - vacant.insert(LinkAnnotation { + fc.push_link_annotation(LinkAnnotation { + id: link_id, placeholder, rect, quad_points: quadpoints.to_vec(), diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 942335ab8..d65e898c8 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,5 +1,4 @@ use std::cell::OnceCell; -use std::collections::HashMap; use ecow::EcoString; use krilla::page::Page; @@ -335,10 +334,11 @@ fn start_content<'a, 'b>( pub(crate) fn add_annotations( gc: &mut GlobalContext, page: &mut Page, - annotations: HashMap, + annotations: Vec, ) { - for annotation in annotations.into_values() { - let LinkAnnotation { placeholder, alt, rect, quad_points, target } = annotation; + for annotation in annotations.into_iter() { + let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = + annotation; let annot = krilla::annotation::Annotation::new_link( krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), alt, From 6ebe85d67877254683e8038ad7fbf262fd495636 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 26 Jun 2025 18:50:43 +0200 Subject: [PATCH 23/76] fix: don't include outline title in TOC hierarchy --- crates/typst-library/src/model/outline.rs | 19 ++++++++++++++++++- crates/typst-pdf/src/tags.rs | 4 ++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 9db263be1..1e177130d 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -273,6 +273,7 @@ impl Show for Packed { let depth = self.depth(styles).unwrap_or(NonZeroUsize::MAX); // Build the outline entries. + let mut entries = vec![]; for elem in elems { let Some(outlinable) = elem.with::() else { bail!(span, "cannot outline {}", elem.func().name()); @@ -281,10 +282,13 @@ impl Show for Packed { let level = outlinable.level(); if outlinable.outlined() && level <= depth { let entry = OutlineEntry::new(level, elem); - seq.push(entry.pack().spanned(span)); + entries.push(entry.pack().spanned(span)); } } + // Wrap the entries into a marker for pdf tagging. + seq.push(OutlineBody::new(Content::sequence(entries)).pack()); + Ok(Content::sequence(seq)) } } @@ -307,6 +311,19 @@ impl LocalName for Packed { const KEY: &'static str = "outline"; } +/// Only used to mark +#[elem(Locatable, Show)] +pub struct OutlineBody { + #[required] + body: Content, +} + +impl Show for Packed { + fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult { + Ok(self.body.clone()) + } +} + /// Defines how an outline is indented. #[derive(Debug, Clone, PartialEq, Hash)] pub enum OutlineIndent { diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index d65e898c8..8e41ea2ff 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -11,7 +11,7 @@ use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ - Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, + Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, OutlineEntry, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; @@ -383,7 +383,7 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { // TODO: when targeting PDF 2.0 headings `> 6` are supported _ => TagKind::H6(Some(name)).into(), } - } else if let Some(_) = elem.to_packed::() { + } else if let Some(_) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new())); return; } else if let Some(entry) = elem.to_packed::() { From 605681d4356a66f0d05b0d0036419db374852f8c Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 26 Jun 2025 20:44:04 +0200 Subject: [PATCH 24/76] refactor: move link tagging code --- crates/typst-pdf/src/tags.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 8e41ea2ff..5b5963946 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -40,11 +40,11 @@ pub(crate) struct StackEntry { pub(crate) enum StackEntryKind { Standard(Tag), - Link(LinkId, Packed), Outline(OutlineCtx), OutlineEntry(Packed), Table(TableCtx), TableCell(Packed), + Link(LinkId, Packed), } impl StackEntryKind { @@ -409,16 +409,16 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { } } else if let Some(_) = elem.to_packed::() { TagKind::Caption.into() - } else if let Some(link) = elem.to_packed::() { - let link_id = gc.tags.next_link_id(); - push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); - return; } else if let Some(table) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::Table(TableCtx::new(table.clone()))); return; } else if let Some(cell) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); return; + } else if let Some(link) = elem.to_packed::() { + let link_id = gc.tags.next_link_id(); + push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); + return; } else { return; }; @@ -448,16 +448,6 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { let node = match entry.kind { StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), - StackEntryKind::Link(_, link) => { - let alt = link.alt.as_ref().map(EcoString::to_string); - let tag = TagKind::Link.with_alt_text(alt); - let mut node = TagNode::Group(tag, entry.nodes); - // Wrap link in reference tag, if it's not a url. - if let Destination::Position(_) | Destination::Location(_) = link.dest { - node = TagNode::Group(TagKind::Reference.into(), vec![node]); - } - node - } StackEntryKind::Outline(ctx) => { let nodes = ctx.build_outline(entry.nodes); TagNode::Group(TagKind::TOC.into(), nodes) @@ -487,6 +477,16 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { return; } + StackEntryKind::Link(_, link) => { + let alt = link.alt.as_ref().map(EcoString::to_string); + let tag = TagKind::Link.with_alt_text(alt); + let mut node = TagNode::Group(tag, entry.nodes); + // Wrap link in reference tag, if it's not a url. + if let Destination::Position(_) | Destination::Location(_) = link.dest { + node = TagNode::Group(TagKind::Reference.into(), vec![node]); + } + node + } }; gc.tags.push(node); From bfcf2bd4cced0b3f08b9b00af9bb14b824aabb13 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 27 Jun 2025 14:20:58 +0200 Subject: [PATCH 25/76] feat: support headings with level >= 7 --- crates/typst-pdf/src/tags.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 5b5963946..8a7e1362c 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,4 +1,5 @@ use std::cell::OnceCell; +use std::num::NonZeroU32; use ecow::EcoString; use krilla::page::Page; @@ -372,17 +373,9 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { _ => todo!(), } } else if let Some(heading) = elem.to_packed::() { - let level = heading.level(); + let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); let name = heading.body.plain_text().to_string(); - match level.get() { - 1 => TagKind::H1(Some(name)).into(), - 2 => TagKind::H2(Some(name)).into(), - 3 => TagKind::H3(Some(name)).into(), - 4 => TagKind::H4(Some(name)).into(), - 5 => TagKind::H5(Some(name)).into(), - // TODO: when targeting PDF 2.0 headings `> 6` are supported - _ => TagKind::H6(Some(name)).into(), - } + TagKind::Hn(level, Some(name)).into() } else if let Some(_) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new())); return; From 3404fecd36d58731d27b9bddbbdb50fe64b37f77 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sat, 28 Jun 2025 18:22:30 +0200 Subject: [PATCH 26/76] feat: tag table headers and footers --- .../typst-library/src/layout/grid/resolve.rs | 25 ++- crates/typst-library/src/model/table.rs | 22 +++ crates/typst-pdf/src/tags.rs | 182 +++++++++++++++--- 3 files changed, 200 insertions(+), 29 deletions(-) diff --git a/crates/typst-library/src/layout/grid/resolve.rs b/crates/typst-library/src/layout/grid/resolve.rs index baf6b7383..0de5a6b9c 100644 --- a/crates/typst-library/src/layout/grid/resolve.rs +++ b/crates/typst-library/src/layout/grid/resolve.rs @@ -22,6 +22,7 @@ use typst_syntax::Span; use typst_utils::NonZeroExt; use crate::introspection::SplitLocator; +use crate::model::TableCellKind; /// Convert a grid to a cell grid. #[typst_macros::time(span = elem.span())] @@ -217,6 +218,7 @@ impl ResolvableCell for Packed { breakable: bool, locator: Locator<'a>, styles: StyleChain, + kind: Smart, ) -> Cell<'a> { let cell = &mut *self; let colspan = cell.colspan(styles); @@ -224,6 +226,8 @@ impl ResolvableCell for Packed { let breakable = cell.breakable(styles).unwrap_or(breakable); let fill = cell.fill(styles).unwrap_or_else(|| fill.clone()); + let kind = cell.kind(styles).or(kind); + let cell_stroke = cell.stroke(styles); let stroke_overridden = cell_stroke.as_ref().map(|side| matches!(side, Some(Some(_)))); @@ -267,6 +271,7 @@ impl ResolvableCell for Packed { }), ); cell.push_breakable(Smart::Custom(breakable)); + cell.push_kind(kind); Cell { body: self.pack(), locator, @@ -312,6 +317,7 @@ impl ResolvableCell for Packed { breakable: bool, locator: Locator<'a>, styles: StyleChain, + _: Smart, ) -> Cell<'a> { let cell = &mut *self; let colspan = cell.colspan(styles); @@ -522,6 +528,7 @@ pub trait ResolvableCell { breakable: bool, locator: Locator<'a>, styles: StyleChain, + kind: Smart, ) -> Cell<'a>; /// Returns this cell's column override. @@ -1206,8 +1213,12 @@ impl<'x> CellGridResolver<'_, '_, 'x> { // a non-empty row. let mut first_available_row = 0; + let mut cell_kind: Smart = Smart::Auto; + let (header_footer_items, simple_item) = match child { ResolvableGridChild::Header { repeat, level, span, items, .. } => { + cell_kind = Smart::Custom(TableCellKind::Header); + row_group_data = Some(RowGroupData { range: None, span, @@ -1239,6 +1250,8 @@ impl<'x> CellGridResolver<'_, '_, 'x> { bail!(span, "cannot have more than one footer"); } + cell_kind = Smart::Custom(TableCellKind::Footer); + row_group_data = Some(RowGroupData { range: None, span, @@ -1447,7 +1460,7 @@ impl<'x> CellGridResolver<'_, '_, 'x> { // Let's resolve the cell so it can determine its own fields // based on its final position. - let cell = self.resolve_cell(cell, x, y, rowspan, cell_span)?; + let cell = self.resolve_cell(cell, x, y, rowspan, cell_span, cell_kind)?; if largest_index >= resolved_cells.len() { // Ensure the length of the vector of resolved cells is @@ -1542,6 +1555,10 @@ impl<'x> CellGridResolver<'_, '_, 'x> { // and footers without having to loop through them each time. // Cells themselves, unfortunately, still have to. assert!(resolved_cells[*local_auto_index].is_none()); + let kind = match row_group.kind { + RowGroupKind::Header => TableCellKind::Header, + RowGroupKind::Footer => TableCellKind::Header, + }; resolved_cells[*local_auto_index] = Some(Entry::Cell(self.resolve_cell( T::default(), @@ -1549,6 +1566,7 @@ impl<'x> CellGridResolver<'_, '_, 'x> { first_available_row, 1, Span::detached(), + Smart::Custom(kind), )?)); group_start..group_end @@ -1673,6 +1691,9 @@ impl<'x> CellGridResolver<'_, '_, 'x> { y, 1, Span::detached(), + // FIXME: empty cells will within header and footer rows + // will prevent row group tags. + Smart::Auto, )?)) } }) @@ -1918,6 +1939,7 @@ impl<'x> CellGridResolver<'_, '_, 'x> { y: usize, rowspan: usize, cell_span: Span, + kind: Smart, ) -> SourceResult> where T: ResolvableCell + Default, @@ -1954,6 +1976,7 @@ impl<'x> CellGridResolver<'_, '_, 'x> { breakable, self.locator.next(&cell_span), self.styles, + kind, )) } } diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index a120423b3..b10bfb002 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -2,6 +2,7 @@ use std::num::{NonZeroU32, NonZeroUsize}; use std::sync::Arc; use ecow::EcoString; +use typst_macros::Cast; use typst_utils::NonZeroExt; use crate::diag::{bail, HintedStrResult, HintedString, SourceResult}; @@ -810,6 +811,12 @@ pub struct TableCell { #[fold] pub stroke: Sides>>>, + // TODO: feature gate + pub kind: Smart, + + // TODO: feature gate + pub header_scope: Smart, + /// Whether rows spanned by this cell can be placed in different pages. /// When equal to `{auto}`, a cell spanning only fixed-size rows is /// unbreakable, while a cell spanning at least one `{auto}`-sized row is @@ -847,3 +854,18 @@ impl From for TableCell { value.unpack::().unwrap_or_else(Self::new) } } + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum TableHeaderScope { + Both, + Column, + Row, +} + +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum TableCellKind { + Header, + Footer, + #[default] + Data, +} diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 8a7e1362c..911278e15 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -1,19 +1,19 @@ use std::cell::OnceCell; -use std::num::NonZeroU32; +use std::num::{NonZeroU32, NonZeroUsize}; use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellSpan, TableDataCell, - TableHeaderCell, TableHeaderScope, Tag, TagBuilder, TagGroup, TagKind, TagTree, + TableHeaderCell, Tag, TagBuilder, TagGroup, TagKind, TagTree, }; -use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; +use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain}; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, - OutlineEntry, TableCell, TableElem, + OutlineEntry, TableCell, TableCellKind, TableElem, TableHeaderScope, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; @@ -126,7 +126,42 @@ impl OutlineCtx { pub(crate) struct TableCtx { table: Packed, - rows: Vec, Tag, Vec)>>>, + rows: Vec>, +} + +#[derive(Clone, Default)] +enum GridCell { + Cell(TableCtxCell), + Spanned(usize, usize), + #[default] + Missing, +} + +impl GridCell { + fn as_cell(&self) -> Option<&TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn into_cell(self) -> Option { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +struct TableCtxCell { + rowspan: NonZeroUsize, + colspan: NonZeroUsize, + kind: TableCellKind, + header_scope: Smart, + nodes: Vec, } impl TableCtx { @@ -137,51 +172,134 @@ impl TableCtx { fn insert(&mut self, cell: Packed, nodes: Vec) { let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let rowspan = cell.rowspan(StyleChain::default()).get(); - let colspan = cell.colspan(StyleChain::default()).get(); + let rowspan = cell.rowspan(StyleChain::default()); + let colspan = cell.colspan(StyleChain::default()); + let kind = cell.kind(StyleChain::default()); + let header_scope = cell.header_scope(StyleChain::default()); - // TODO: possibly set internal field on TableCell when resolving - // the cell grid. - let is_header = false; - let span = TableCellSpan { rows: rowspan as i32, cols: colspan as i32 }; - let tag = if is_header { - let scope = TableHeaderScope::Column; // TODO - TagKind::TH(TableHeaderCell::new(scope).with_span(span)) - } else { - TagKind::TD(TableDataCell::new().with_span(span)) + // The explicit cell kind takes precedence, but if it is `auto` and a + // scope was specified, make this a header cell. + let kind = match (kind, header_scope) { + (Smart::Custom(kind), _) => kind, + (Smart::Auto, Smart::Custom(_)) => TableCellKind::Header, + (Smart::Auto, Smart::Auto) => TableCellKind::Data, }; - let required_height = y + rowspan; + // Extend the table grid to fit this cell. + let required_height = y + rowspan.get(); + let required_width = x + colspan.get(); if self.rows.len() < required_height { - self.rows.resize_with(required_height, Vec::new); + self.rows + .resize(required_height, vec![GridCell::Missing; required_width]); } - - let required_width = x + colspan; let row = &mut self.rows[y]; if row.len() < required_width { - row.resize_with(required_width, || None); + row.resize_with(required_width, || GridCell::Missing); } - row[x] = Some((cell, tag.into(), nodes)); + // Store references to the cell for all spanned cells. + for i in y..y + rowspan.get() { + for j in x..x + colspan.get() { + self.rows[i][j] = GridCell::Spanned(x, y); + } + } + + self.rows[y][x] = + GridCell::Cell(TableCtxCell { rowspan, colspan, kind, header_scope, nodes }); } fn build_table(self, mut nodes: Vec) -> Vec { // Table layouting ensures that there are no overlapping cells, and that // any gaps left by the user are filled with empty cells. - for row in self.rows.into_iter() { - let mut row_nodes = Vec::new(); - for (_, tag, nodes) in row.into_iter().flatten() { - row_nodes.push(TagNode::Group(tag, nodes)); + + // Only generate row groups such as `THead`, `TFoot`, and `TBody` if + // there are no rows with mixed cell kinds. + let mut mixed_row_kinds = false; + let row_kinds = (self.rows.iter()) + .map(|row| { + row.iter() + .filter_map(|cell| match cell { + GridCell::Cell(cell) => Some(cell), + &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), + GridCell::Missing => None, + }) + .map(|cell| cell.kind) + .reduce(|a, b| { + if a != b { + mixed_row_kinds = true; + } + a + }) + .expect("tables must have at least one column") + }) + .collect::>(); + + let Some(mut chunk_kind) = row_kinds.first().copied() else { + return nodes; + }; + let mut row_chunk = Vec::new(); + for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { + let row_nodes = row + .into_iter() + .filter_map(|cell| { + let cell = cell.into_cell()?; + let span = TableCellSpan { + rows: cell.rowspan.get() as i32, + cols: cell.colspan.get() as i32, + }; + let tag = match cell.kind { + TableCellKind::Header => { + let scope = match cell.header_scope { + Smart::Custom(scope) => table_header_scope(scope), + Smart::Auto => krilla::tagging::TableHeaderScope::Column, + }; + TagKind::TH(TableHeaderCell::new(scope).with_span(span)) + } + TableCellKind::Footer | TableCellKind::Data => { + TagKind::TD(TableDataCell::new().with_span(span)) + } + }; + + Some(TagNode::Group(tag.into(), cell.nodes)) + }) + .collect(); + + let row = TagNode::Group(TagKind::TR.into(), row_nodes); + + // Push the `TR` tags directly. + if mixed_row_kinds { + nodes.push(row); + continue; } - // TODO: generate `THead`, `TBody`, and `TFoot` - nodes.push(TagNode::Group(TagKind::TR.into(), row_nodes)); + // Generate row groups. + if row_kind != chunk_kind { + let tag = match chunk_kind { + TableCellKind::Header => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); + + chunk_kind = row_kind; + } + row_chunk.push(row); + } + + if !row_chunk.is_empty() { + let tag = match chunk_kind { + TableCellKind::Header => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), row_chunk)); } nodes } } +#[derive(Clone)] pub(crate) enum TagNode { Group(Tag, Vec), Leaf(Identifier), @@ -489,6 +607,14 @@ fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { gc.tags.in_artifact = Some((loc, kind)); } +fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { + match scope { + TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, + TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, + TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, + } +} + fn artifact_type(kind: ArtifactKind) -> ArtifactType { match kind { ArtifactKind::Header => ArtifactType::Header, From 773efb5572c0eb18163001359c4e4f06d4dba5f2 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 1 Jul 2025 16:09:41 +0200 Subject: [PATCH 27/76] fix: bug due to table cell start tags in grid layout code --- crates/typst-layout/src/grid/layouter.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/typst-layout/src/grid/layouter.rs b/crates/typst-layout/src/grid/layouter.rs index 42fe38dbe..78057f106 100644 --- a/crates/typst-layout/src/grid/layouter.rs +++ b/crates/typst-layout/src/grid/layouter.rs @@ -1284,10 +1284,18 @@ impl<'a> GridLayouter<'a> { if let Some([first, rest @ ..]) = frames.get(measurement_data.frames_in_previous_regions..) { + // HACK: reconsider if this is the right decision + fn is_empty_frame(frame: &Frame) -> bool { + !frame.items().any(|(_, item)| match item { + FrameItem::Group(group) => is_empty_frame(&group.frame), + FrameItem::Tag(_) => false, + _ => true, + }) + } if can_skip && breakable - && first.is_empty() - && rest.iter().any(|frame| !frame.is_empty()) + && is_empty_frame(first) + && rest.iter().any(|frame| !is_empty_frame(frame)) { return Ok(None); } From 746926c7da8187e784120043fe93e96ebd691754 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 1 Jul 2025 17:47:18 +0200 Subject: [PATCH 28/76] fix: ignore repeated table headers/footers in tag tree --- crates/typst-pdf/src/tags.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 911278e15..815b752e6 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -169,6 +169,15 @@ impl TableCtx { Self { table: table.clone(), rows: Vec::new() } } + fn contains(&self, cell: &Packed) -> bool { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + + let Some(row) = self.rows.get(y) else { return false }; + let Some(cell) = row.get(x) else { return false }; + !matches!(cell, GridCell::Missing) + } + fn insert(&mut self, cell: Packed, nodes: Vec) { let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); @@ -230,7 +239,7 @@ impl TableCtx { } a }) - .expect("tables must have at least one column") + .unwrap_or(TableCellKind::Data) }) .collect::>(); @@ -524,7 +533,20 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { push_stack(gc, loc, StackEntryKind::Table(TableCtx::new(table.clone()))); return; } else if let Some(cell) = elem.to_packed::() { - push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); + let parent = gc.tags.stack.last_mut().expect("table"); + let StackEntryKind::Table(table_ctx) = &mut parent.kind else { + unreachable!("expected table") + }; + + // Only repeated table headers and footer cells are layed out multiple + // times. Mark duplicate headers as artifacts, since they have no + // semantic meaning in the tag tree, which doesn't use page breaks for + // it's semantic structure. + if table_ctx.contains(cell) { + start_artifact(gc, loc, ArtifactKind::Other); + } else { + push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); + } return; } else if let Some(link) = elem.to_packed::() { let link_id = gc.tags.next_link_id(); From 50cd81ee1f65ce8fdfa1897991bb4770e671e93c Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 2 Jul 2025 23:44:44 +0200 Subject: [PATCH 29/76] feat: generate headers attribute table cells - fix marking repeated headers/footers as artifacts - fix table row grouping with empty cells --- Cargo.lock | 5 +- .../typst-library/src/layout/grid/resolve.rs | 23 +- crates/typst-library/src/model/table.rs | 72 ++++- crates/typst-library/src/pdf/accessibility.rs | 12 +- crates/typst-pdf/src/tags.rs | 258 ++++++++++++++---- 5 files changed, 279 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ad90fb38..4c92cf823 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1384,6 +1384,7 @@ dependencies = [ "rustybuzz", "siphasher", "skrifa", + "smallvec", "subsetter", "tiny-skia-path", "xmp-writer", @@ -2449,9 +2450,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "spin" diff --git a/crates/typst-library/src/layout/grid/resolve.rs b/crates/typst-library/src/layout/grid/resolve.rs index 0de5a6b9c..49f9e0edd 100644 --- a/crates/typst-library/src/layout/grid/resolve.rs +++ b/crates/typst-library/src/layout/grid/resolve.rs @@ -22,7 +22,7 @@ use typst_syntax::Span; use typst_utils::NonZeroExt; use crate::introspection::SplitLocator; -use crate::model::TableCellKind; +use crate::model::{TableCellKind, TableHeaderScope}; /// Convert a grid to a cell grid. #[typst_macros::time(span = elem.span())] @@ -1213,11 +1213,13 @@ impl<'x> CellGridResolver<'_, '_, 'x> { // a non-empty row. let mut first_available_row = 0; - let mut cell_kind: Smart = Smart::Auto; + // The cell kind is currently only used for tagged PDF. + let cell_kind; let (header_footer_items, simple_item) = match child { - ResolvableGridChild::Header { repeat, level, span, items, .. } => { - cell_kind = Smart::Custom(TableCellKind::Header); + ResolvableGridChild::Header { repeat, level, span, items } => { + cell_kind = + Smart::Custom(TableCellKind::Header(level, TableHeaderScope::Column)); row_group_data = Some(RowGroupData { range: None, @@ -1245,7 +1247,7 @@ impl<'x> CellGridResolver<'_, '_, 'x> { (Some(items), None) } - ResolvableGridChild::Footer { repeat, span, items, .. } => { + ResolvableGridChild::Footer { repeat, span, items } => { if footer.is_some() { bail!(span, "cannot have more than one footer"); } @@ -1270,6 +1272,8 @@ impl<'x> CellGridResolver<'_, '_, 'x> { (Some(items), None) } ResolvableGridChild::Item(item) => { + cell_kind = Smart::Custom(TableCellKind::Data); + if matches!(item, ResolvableGridItem::Cell(_)) { *at_least_one_cell = true; } @@ -1556,8 +1560,11 @@ impl<'x> CellGridResolver<'_, '_, 'x> { // Cells themselves, unfortunately, still have to. assert!(resolved_cells[*local_auto_index].is_none()); let kind = match row_group.kind { - RowGroupKind::Header => TableCellKind::Header, - RowGroupKind::Footer => TableCellKind::Header, + RowGroupKind::Header => TableCellKind::Header( + NonZeroU32::ONE, + TableHeaderScope::default(), + ), + RowGroupKind::Footer => TableCellKind::Footer, }; resolved_cells[*local_auto_index] = Some(Entry::Cell(self.resolve_cell( @@ -1691,8 +1698,6 @@ impl<'x> CellGridResolver<'_, '_, 'x> { y, 1, Span::detached(), - // FIXME: empty cells will within header and footer rows - // will prevent row group tags. Smart::Auto, )?)) } diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index b10bfb002..f8fe76918 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -8,8 +8,8 @@ use typst_utils::NonZeroExt; use crate::diag::{bail, HintedStrResult, HintedString, SourceResult}; use crate::engine::Engine; use crate::foundations::{ - cast, elem, scope, Content, NativeElement, Packed, Show, Smart, StyleChain, - TargetElem, + cast, dict, elem, scope, Content, Dict, NativeElement, Packed, Show, Smart, + StyleChain, TargetElem, }; use crate::html::{attr, tag, HtmlAttrs, HtmlElem, HtmlTag}; use crate::introspection::{Locatable, Locator}; @@ -814,9 +814,6 @@ pub struct TableCell { // TODO: feature gate pub kind: Smart, - // TODO: feature gate - pub header_scope: Smart, - /// Whether rows spanned by this cell can be placed in different pages. /// When equal to `{auto}`, a cell spanning only fixed-size rows is /// unbreakable, while a cell spanning at least one `{auto}`-sized row is @@ -855,17 +852,64 @@ impl From for TableCell { } } -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] -pub enum TableHeaderScope { - Both, - Column, - Row, -} - -#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Cast)] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] pub enum TableCellKind { - Header, + Header(NonZeroU32, TableHeaderScope), Footer, #[default] Data, } + +cast! { + TableCellKind, + self => match self { + Self::Header(level, scope) => dict! { "level" => level, "scope" => scope }.into_value(), + Self::Footer => "footer".into_value(), + Self::Data => "data".into_value(), + }, + "header" => Self::Header(NonZeroU32::ONE, TableHeaderScope::default()), + "footer" => Self::Footer, + "data" => Self::Data, + mut dict: Dict => { + // TODO: have a `pdf.header` function instead? + #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] + enum HeaderKind { + Header, + } + dict.take("kind")?.cast::()?; + let level = dict.take("level").ok().map(|v| v.cast()).transpose()?; + let scope = dict.take("scope").ok().map(|v| v.cast()).transpose()?; + dict.finish(&["kind", "level", "scope"])?; + Self::Header(level.unwrap_or(NonZeroU32::ONE), scope.unwrap_or_default()) + }, +} + +/// The scope of a table header cell. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum TableHeaderScope { + /// The header cell refers to both the row and the column. + Both, + /// The header cell refers to the column. + #[default] + Column, + /// The header cell refers to the row. + Row, +} + +impl TableHeaderScope { + pub fn refers_to_column(&self) -> bool { + match self { + TableHeaderScope::Both => true, + TableHeaderScope::Column => true, + TableHeaderScope::Row => false, + } + } + + pub fn refers_to_row(&self) -> bool { + match self { + TableHeaderScope::Both => true, + TableHeaderScope::Column => false, + TableHeaderScope::Row => true, + } + } +} diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index a5df131d6..7ec52f8cb 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -5,6 +5,7 @@ use crate::diag::SourceResult; use crate::engine::Engine; use crate::foundations::{Content, Packed, Show, StyleChain}; use crate::introspection::Locatable; +use crate::model::TableHeaderScope; // TODO: docs #[elem(Locatable, Show)] @@ -177,17 +178,6 @@ pub enum ListNumbering { UpperAlpha, } -/// The scope of a table header cell. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum TableHeaderScope { - /// The header cell refers to the row. - Row, - /// The header cell refers to the column. - Column, - /// The header cell refers to both the row and the column. - Both, -} - /// Mark content as a PDF artifact. /// TODO: maybe generalize this and use it to mark html elements with `aria-hidden="true"`? #[elem(Locatable, Show)] diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags.rs index 815b752e6..9f49024f1 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags.rs @@ -5,8 +5,8 @@ use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellSpan, TableDataCell, - TableHeaderCell, Tag, TagBuilder, TagGroup, TagKind, TagTree, + ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellHeaders, TableCellSpan, + TableDataCell, TableHeaderCell, Tag, TagBuilder, TagGroup, TagId, TagKind, TagTree, }; use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain}; use typst_library::introspection::Location; @@ -27,12 +27,22 @@ pub(crate) struct Tags { /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. pub(crate) placeholders: Vec>, pub(crate) in_artifact: Option<(Location, ArtifactKind)>, + /// Used to group multiple link annotations using quad points. pub(crate) link_id: LinkId, + /// Used to generate IDs referenced in table `Headers` attributes. + /// The IDs must be document wide unique. + pub(crate) table_id: TableId, /// The output. pub(crate) tree: Vec, } +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct TableId(u32); + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct LinkId(u32); + pub(crate) struct StackEntry { pub(crate) loc: Location, pub(crate) kind: StackEntryKind, @@ -125,6 +135,7 @@ impl OutlineCtx { } pub(crate) struct TableCtx { + id: TableId, table: Packed, rows: Vec>, } @@ -146,6 +157,14 @@ impl GridCell { } } + fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + fn into_cell(self) -> Option { if let Self::Cell(v) = self { Some(v) @@ -157,25 +176,56 @@ impl GridCell { #[derive(Clone)] struct TableCtxCell { + x: u32, + y: u32, rowspan: NonZeroUsize, colspan: NonZeroUsize, - kind: TableCellKind, - header_scope: Smart, + kind: Smart, + headers: TableCellHeaders, nodes: Vec, } +impl TableCtxCell { + fn unwrap_kind(&self) -> TableCellKind { + self.kind.unwrap_or_else(|| unreachable!()) + } +} + impl TableCtx { - fn new(table: Packed) -> Self { - Self { table: table.clone(), rows: Vec::new() } + fn new(id: TableId, table: Packed) -> Self { + Self { id, table: table.clone(), rows: Vec::new() } + } + + fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { + let cell = self.rows.get(y)?.get(x)?; + self.resolve_cell(cell) + } + + fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { + let cell = self.rows.get_mut(y)?.get_mut(x)?; + match cell { + GridCell::Cell(cell) => { + // HACK: Workaround for the second mutable borrow when resolving + // the spanned cell. + Some(unsafe { std::mem::transmute(cell) }) + } + &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), + GridCell::Missing => None, + } } fn contains(&self, cell: &Packed) -> bool { let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + self.get(x, y).is_some() + } - let Some(row) = self.rows.get(y) else { return false }; - let Some(cell) = row.get(x) else { return false }; - !matches!(cell, GridCell::Missing) + fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { + match cell { + GridCell::Cell(cell) => Some(cell), + &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), + GridCell::Missing => None, + } } fn insert(&mut self, cell: Packed, nodes: Vec) { @@ -184,15 +234,6 @@ impl TableCtx { let rowspan = cell.rowspan(StyleChain::default()); let colspan = cell.colspan(StyleChain::default()); let kind = cell.kind(StyleChain::default()); - let header_scope = cell.header_scope(StyleChain::default()); - - // The explicit cell kind takes precedence, but if it is `auto` and a - // scope was specified, make this a header cell. - let kind = match (kind, header_scope) { - (Smart::Custom(kind), _) => kind, - (Smart::Auto, Smart::Custom(_)) => TableCellKind::Header, - (Smart::Auto, Smart::Auto) => TableCellKind::Data, - }; // Extend the table grid to fit this cell. let required_height = y + rowspan.get(); @@ -213,39 +254,80 @@ impl TableCtx { } } - self.rows[y][x] = - GridCell::Cell(TableCtxCell { rowspan, colspan, kind, header_scope, nodes }); + self.rows[y][x] = GridCell::Cell(TableCtxCell { + x: x as u32, + y: y as u32, + rowspan, + colspan, + kind, + headers: TableCellHeaders::NONE, + nodes, + }); } - fn build_table(self, mut nodes: Vec) -> Vec { + fn build_table(mut self, mut nodes: Vec) -> Vec { // Table layouting ensures that there are no overlapping cells, and that // any gaps left by the user are filled with empty cells. + if self.rows.is_empty() { + return nodes; + } + let height = self.rows.len(); + let width = self.rows[0].len(); // Only generate row groups such as `THead`, `TFoot`, and `TBody` if // there are no rows with mixed cell kinds. - let mut mixed_row_kinds = false; + let mut gen_row_groups = true; let row_kinds = (self.rows.iter()) .map(|row| { row.iter() - .filter_map(|cell| match cell { - GridCell::Cell(cell) => Some(cell), - &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), - GridCell::Missing => None, - }) + .filter_map(|cell| self.resolve_cell(cell)) .map(|cell| cell.kind) - .reduce(|a, b| { - if a != b { - mixed_row_kinds = true; + .fold(Smart::Auto, |a, b| { + if let Smart::Custom(TableCellKind::Header(_, scope)) = b { + gen_row_groups &= scope == TableHeaderScope::Column; } - a + if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { + gen_row_groups &= a == b; + } + a.or(b) }) .unwrap_or(TableCellKind::Data) }) .collect::>(); - let Some(mut chunk_kind) = row_kinds.first().copied() else { - return nodes; - }; + // Fixup all missing cell kinds. + for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { + let default_kind = + if gen_row_groups { row_kind } else { TableCellKind::Data }; + for cell in row.iter_mut() { + let Some(cell) = cell.as_cell_mut() else { continue }; + cell.kind = cell.kind.or(Smart::Custom(default_kind)); + } + } + + // Explicitly set the headers attribute for cells. + for x in 0..width { + let mut column_header = None; + for y in 0..height { + self.resolve_cell_headers( + (x, y), + &mut column_header, + TableHeaderScope::refers_to_column, + ); + } + } + for y in 0..height { + let mut row_header = None; + for x in 0..width { + self.resolve_cell_headers( + (x, y), + &mut row_header, + TableHeaderScope::refers_to_row, + ); + } + } + + let mut chunk_kind = row_kinds[0]; let mut row_chunk = Vec::new(); for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { let row_nodes = row @@ -253,38 +335,44 @@ impl TableCtx { .filter_map(|cell| { let cell = cell.into_cell()?; let span = TableCellSpan { - rows: cell.rowspan.get() as i32, - cols: cell.colspan.get() as i32, + rows: cell.rowspan.try_into().unwrap(), + cols: cell.colspan.try_into().unwrap(), }; - let tag = match cell.kind { - TableCellKind::Header => { - let scope = match cell.header_scope { - Smart::Custom(scope) => table_header_scope(scope), - Smart::Auto => krilla::tagging::TableHeaderScope::Column, - }; - TagKind::TH(TableHeaderCell::new(scope).with_span(span)) - } - TableCellKind::Footer | TableCellKind::Data => { - TagKind::TD(TableDataCell::new().with_span(span)) + let tag = match cell.unwrap_kind() { + TableCellKind::Header(_, scope) => { + let id = table_cell_id(self.id, cell.x, cell.y); + let scope = table_header_scope(scope); + TagKind::TH( + TableHeaderCell::new(scope) + .with_span(span) + .with_headers(cell.headers), + ) + .with_id(Some(id)) } + TableCellKind::Footer | TableCellKind::Data => TagKind::TD( + TableDataCell::new() + .with_span(span) + .with_headers(cell.headers), + ) + .into(), }; - Some(TagNode::Group(tag.into(), cell.nodes)) + Some(TagNode::Group(tag, cell.nodes)) }) .collect(); let row = TagNode::Group(TagKind::TR.into(), row_nodes); // Push the `TR` tags directly. - if mixed_row_kinds { + if !gen_row_groups { nodes.push(row); continue; } // Generate row groups. - if row_kind != chunk_kind { + if !should_group_rows(chunk_kind, row_kind) { let tag = match chunk_kind { - TableCellKind::Header => TagKind::THead, + TableCellKind::Header(..) => TagKind::THead, TableCellKind::Footer => TagKind::TFoot, TableCellKind::Data => TagKind::TBody, }; @@ -297,7 +385,7 @@ impl TableCtx { if !row_chunk.is_empty() { let tag = match chunk_kind { - TableCellKind::Header => TagKind::THead, + TableCellKind::Header(..) => TagKind::THead, TableCellKind::Footer => TagKind::TFoot, TableCellKind::Data => TagKind::TBody, }; @@ -306,6 +394,56 @@ impl TableCtx { nodes } + + fn resolve_cell_headers( + &mut self, + (x, y): (usize, usize), + current_header: &mut Option<(NonZeroU32, TagId)>, + refers_to_dir: F, + ) where + F: Fn(&TableHeaderScope) -> bool, + { + let table_id = self.id; + let Some(cell) = self.get_mut(x, y) else { return }; + + if let Some((prev_level, cell_id)) = current_header.clone() { + // The `Headers` attribute is also set for parent headers. + let mut is_parent_header = true; + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + is_parent_header = prev_level < level; + } + } + + if is_parent_header && !cell.headers.ids.contains(&cell_id) { + cell.headers.ids.push(cell_id.clone()); + } + } + + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + let tag_id = table_cell_id(table_id, x as u32, y as u32); + *current_header = Some((level, tag_id)); + } + } + } +} + +fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { + match (a, b) { + (TableCellKind::Header(..), TableCellKind::Header(..)) => true, + (TableCellKind::Footer, TableCellKind::Footer) => true, + (TableCellKind::Data, TableCellKind::Data) => true, + (_, _) => false, + } +} + +fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { + let mut bytes = [0; 12]; + bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); + bytes[4..8].copy_from_slice(&x.to_ne_bytes()); + bytes[8..12].copy_from_slice(&y.to_ne_bytes()); + TagId::from_bytes(&bytes) } #[derive(Clone)] @@ -317,9 +455,6 @@ pub(crate) enum TagNode { Placeholder(Placeholder), } -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct LinkId(u32); - #[derive(Clone, Copy)] pub(crate) struct Placeholder(usize); @@ -332,6 +467,7 @@ impl Tags { tree: Vec::new(), link_id: LinkId(0), + table_id: TableId(0), } } @@ -400,6 +536,11 @@ impl Tags { self.link_id.0 += 1; self.link_id } + + fn next_table_id(&mut self) -> TableId { + self.table_id.0 += 1; + self.table_id + } } /// Automatically calls [`Surface::end_tagged`] when dropped. @@ -530,7 +671,9 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { } else if let Some(_) = elem.to_packed::() { TagKind::Caption.into() } else if let Some(table) = elem.to_packed::() { - push_stack(gc, loc, StackEntryKind::Table(TableCtx::new(table.clone()))); + let table_id = gc.tags.next_table_id(); + let ctx = TableCtx::new(table_id, table.clone()); + push_stack(gc, loc, StackEntryKind::Table(ctx)); return; } else if let Some(cell) = elem.to_packed::() { let parent = gc.tags.stack.last_mut().expect("table"); @@ -543,6 +686,11 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { // semantic meaning in the tag tree, which doesn't use page breaks for // it's semantic structure. if table_ctx.contains(cell) { + // TODO: currently the first layouted cell is picked to be part of + // the tag tree, for repeating footers this will be the cell on the + // first page. Maybe it should be the cell on the last page, but that + // would require more changes in the layouting code, or a pre-pass + // on the frames to figure out if there are other footers following. start_artifact(gc, loc, ArtifactKind::Other); } else { push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); From 377dc87325795943f0c0dc6ca1047d2d40f3c264 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 11:22:22 +0200 Subject: [PATCH 30/76] refactor: split up pdf tagging code into multiple modules --- crates/typst-pdf/src/{tags.rs => tags/mod.rs} | 833 +++++------------- crates/typst-pdf/src/tags/outline.rs | 74 ++ crates/typst-pdf/src/tags/table.rs | 330 +++++++ 3 files changed, 630 insertions(+), 607 deletions(-) rename crates/typst-pdf/src/{tags.rs => tags/mod.rs} (51%) create mode 100644 crates/typst-pdf/src/tags/outline.rs create mode 100644 crates/typst-pdf/src/tags/table.rs diff --git a/crates/typst-pdf/src/tags.rs b/crates/typst-pdf/src/tags/mod.rs similarity index 51% rename from crates/typst-pdf/src/tags.rs rename to crates/typst-pdf/src/tags/mod.rs index 9f49024f1..99b52d555 100644 --- a/crates/typst-pdf/src/tags.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -1,621 +1,30 @@ use std::cell::OnceCell; -use std::num::{NonZeroU32, NonZeroUsize}; +use std::num::NonZeroU32; use ecow::EcoString; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, TableCellHeaders, TableCellSpan, - TableDataCell, TableHeaderCell, Tag, TagBuilder, TagGroup, TagId, TagKind, TagTree, + ArtifactType, ContentTag, Identifier, Node, SpanTag, Tag, TagBuilder, TagGroup, + TagKind, TagTree, }; -use typst_library::foundations::{Content, LinkMarker, Packed, Smart, StyleChain}; +use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, - OutlineEntry, TableCell, TableCellKind, TableElem, TableHeaderScope, + OutlineEntry, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; use crate::link::LinkAnnotation; +use crate::tags::outline::OutlineCtx; +use crate::tags::table::TableCtx; -pub(crate) struct Tags { - /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec, - /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. - pub(crate) placeholders: Vec>, - pub(crate) in_artifact: Option<(Location, ArtifactKind)>, - /// Used to group multiple link annotations using quad points. - pub(crate) link_id: LinkId, - /// Used to generate IDs referenced in table `Headers` attributes. - /// The IDs must be document wide unique. - pub(crate) table_id: TableId, - - /// The output. - pub(crate) tree: Vec, -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct TableId(u32); - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub(crate) struct LinkId(u32); - -pub(crate) struct StackEntry { - pub(crate) loc: Location, - pub(crate) kind: StackEntryKind, - pub(crate) nodes: Vec, -} - -pub(crate) enum StackEntryKind { - Standard(Tag), - Outline(OutlineCtx), - OutlineEntry(Packed), - Table(TableCtx), - TableCell(Packed), - Link(LinkId, Packed), -} - -impl StackEntryKind { - pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { - if let Self::Standard(v) = self { - Some(v) - } else { - None - } - } -} - -pub(crate) struct OutlineCtx { - stack: Vec, -} - -pub(crate) struct OutlineSection { - entries: Vec, -} - -impl OutlineSection { - const fn new() -> Self { - OutlineSection { entries: Vec::new() } - } - - fn push(&mut self, entry: TagNode) { - self.entries.push(entry); - } - - fn into_tag(self) -> TagNode { - TagNode::Group(TagKind::TOC.into(), self.entries) - } -} - -impl OutlineCtx { - fn new() -> Self { - Self { stack: Vec::new() } - } - - fn insert( - &mut self, - outline_nodes: &mut Vec, - entry: Packed, - nodes: Vec, - ) { - let expected_len = entry.level.get() - 1; - if self.stack.len() < expected_len { - self.stack.resize_with(expected_len, || OutlineSection::new()); - } else { - while self.stack.len() > expected_len { - self.finish_section(outline_nodes); - } - } - - let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); - self.push(outline_nodes, section_entry); - } - - fn finish_section(&mut self, outline_nodes: &mut Vec) { - let sub_section = self.stack.pop().unwrap().into_tag(); - self.push(outline_nodes, sub_section); - } - - fn push(&mut self, outline_nodes: &mut Vec, entry: TagNode) { - match self.stack.last_mut() { - Some(section) => section.push(entry), - None => outline_nodes.push(entry), - } - } - - fn build_outline(mut self, mut outline_nodes: Vec) -> Vec { - while self.stack.len() > 0 { - self.finish_section(&mut outline_nodes); - } - outline_nodes - } -} - -pub(crate) struct TableCtx { - id: TableId, - table: Packed, - rows: Vec>, -} - -#[derive(Clone, Default)] -enum GridCell { - Cell(TableCtxCell), - Spanned(usize, usize), - #[default] - Missing, -} - -impl GridCell { - fn as_cell(&self) -> Option<&TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) - } else { - None - } - } - - fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { - if let Self::Cell(v) = self { - Some(v) - } else { - None - } - } - - fn into_cell(self) -> Option { - if let Self::Cell(v) = self { - Some(v) - } else { - None - } - } -} - -#[derive(Clone)] -struct TableCtxCell { - x: u32, - y: u32, - rowspan: NonZeroUsize, - colspan: NonZeroUsize, - kind: Smart, - headers: TableCellHeaders, - nodes: Vec, -} - -impl TableCtxCell { - fn unwrap_kind(&self) -> TableCellKind { - self.kind.unwrap_or_else(|| unreachable!()) - } -} - -impl TableCtx { - fn new(id: TableId, table: Packed) -> Self { - Self { id, table: table.clone(), rows: Vec::new() } - } - - fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { - let cell = self.rows.get(y)?.get(x)?; - self.resolve_cell(cell) - } - - fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { - let cell = self.rows.get_mut(y)?.get_mut(x)?; - match cell { - GridCell::Cell(cell) => { - // HACK: Workaround for the second mutable borrow when resolving - // the spanned cell. - Some(unsafe { std::mem::transmute(cell) }) - } - &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), - GridCell::Missing => None, - } - } - - fn contains(&self, cell: &Packed) -> bool { - let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - self.get(x, y).is_some() - } - - fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { - match cell { - GridCell::Cell(cell) => Some(cell), - &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), - GridCell::Missing => None, - } - } - - fn insert(&mut self, cell: Packed, nodes: Vec) { - let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); - let rowspan = cell.rowspan(StyleChain::default()); - let colspan = cell.colspan(StyleChain::default()); - let kind = cell.kind(StyleChain::default()); - - // Extend the table grid to fit this cell. - let required_height = y + rowspan.get(); - let required_width = x + colspan.get(); - if self.rows.len() < required_height { - self.rows - .resize(required_height, vec![GridCell::Missing; required_width]); - } - let row = &mut self.rows[y]; - if row.len() < required_width { - row.resize_with(required_width, || GridCell::Missing); - } - - // Store references to the cell for all spanned cells. - for i in y..y + rowspan.get() { - for j in x..x + colspan.get() { - self.rows[i][j] = GridCell::Spanned(x, y); - } - } - - self.rows[y][x] = GridCell::Cell(TableCtxCell { - x: x as u32, - y: y as u32, - rowspan, - colspan, - kind, - headers: TableCellHeaders::NONE, - nodes, - }); - } - - fn build_table(mut self, mut nodes: Vec) -> Vec { - // Table layouting ensures that there are no overlapping cells, and that - // any gaps left by the user are filled with empty cells. - if self.rows.is_empty() { - return nodes; - } - let height = self.rows.len(); - let width = self.rows[0].len(); - - // Only generate row groups such as `THead`, `TFoot`, and `TBody` if - // there are no rows with mixed cell kinds. - let mut gen_row_groups = true; - let row_kinds = (self.rows.iter()) - .map(|row| { - row.iter() - .filter_map(|cell| self.resolve_cell(cell)) - .map(|cell| cell.kind) - .fold(Smart::Auto, |a, b| { - if let Smart::Custom(TableCellKind::Header(_, scope)) = b { - gen_row_groups &= scope == TableHeaderScope::Column; - } - if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { - gen_row_groups &= a == b; - } - a.or(b) - }) - .unwrap_or(TableCellKind::Data) - }) - .collect::>(); - - // Fixup all missing cell kinds. - for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { - let default_kind = - if gen_row_groups { row_kind } else { TableCellKind::Data }; - for cell in row.iter_mut() { - let Some(cell) = cell.as_cell_mut() else { continue }; - cell.kind = cell.kind.or(Smart::Custom(default_kind)); - } - } - - // Explicitly set the headers attribute for cells. - for x in 0..width { - let mut column_header = None; - for y in 0..height { - self.resolve_cell_headers( - (x, y), - &mut column_header, - TableHeaderScope::refers_to_column, - ); - } - } - for y in 0..height { - let mut row_header = None; - for x in 0..width { - self.resolve_cell_headers( - (x, y), - &mut row_header, - TableHeaderScope::refers_to_row, - ); - } - } - - let mut chunk_kind = row_kinds[0]; - let mut row_chunk = Vec::new(); - for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { - let row_nodes = row - .into_iter() - .filter_map(|cell| { - let cell = cell.into_cell()?; - let span = TableCellSpan { - rows: cell.rowspan.try_into().unwrap(), - cols: cell.colspan.try_into().unwrap(), - }; - let tag = match cell.unwrap_kind() { - TableCellKind::Header(_, scope) => { - let id = table_cell_id(self.id, cell.x, cell.y); - let scope = table_header_scope(scope); - TagKind::TH( - TableHeaderCell::new(scope) - .with_span(span) - .with_headers(cell.headers), - ) - .with_id(Some(id)) - } - TableCellKind::Footer | TableCellKind::Data => TagKind::TD( - TableDataCell::new() - .with_span(span) - .with_headers(cell.headers), - ) - .into(), - }; - - Some(TagNode::Group(tag, cell.nodes)) - }) - .collect(); - - let row = TagNode::Group(TagKind::TR.into(), row_nodes); - - // Push the `TR` tags directly. - if !gen_row_groups { - nodes.push(row); - continue; - } - - // Generate row groups. - if !should_group_rows(chunk_kind, row_kind) { - let tag = match chunk_kind { - TableCellKind::Header(..) => TagKind::THead, - TableCellKind::Footer => TagKind::TFoot, - TableCellKind::Data => TagKind::TBody, - }; - nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); - - chunk_kind = row_kind; - } - row_chunk.push(row); - } - - if !row_chunk.is_empty() { - let tag = match chunk_kind { - TableCellKind::Header(..) => TagKind::THead, - TableCellKind::Footer => TagKind::TFoot, - TableCellKind::Data => TagKind::TBody, - }; - nodes.push(TagNode::Group(tag.into(), row_chunk)); - } - - nodes - } - - fn resolve_cell_headers( - &mut self, - (x, y): (usize, usize), - current_header: &mut Option<(NonZeroU32, TagId)>, - refers_to_dir: F, - ) where - F: Fn(&TableHeaderScope) -> bool, - { - let table_id = self.id; - let Some(cell) = self.get_mut(x, y) else { return }; - - if let Some((prev_level, cell_id)) = current_header.clone() { - // The `Headers` attribute is also set for parent headers. - let mut is_parent_header = true; - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - is_parent_header = prev_level < level; - } - } - - if is_parent_header && !cell.headers.ids.contains(&cell_id) { - cell.headers.ids.push(cell_id.clone()); - } - } - - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - let tag_id = table_cell_id(table_id, x as u32, y as u32); - *current_header = Some((level, tag_id)); - } - } - } -} - -fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { - match (a, b) { - (TableCellKind::Header(..), TableCellKind::Header(..)) => true, - (TableCellKind::Footer, TableCellKind::Footer) => true, - (TableCellKind::Data, TableCellKind::Data) => true, - (_, _) => false, - } -} - -fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { - let mut bytes = [0; 12]; - bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); - bytes[4..8].copy_from_slice(&x.to_ne_bytes()); - bytes[8..12].copy_from_slice(&y.to_ne_bytes()); - TagId::from_bytes(&bytes) -} - -#[derive(Clone)] -pub(crate) enum TagNode { - Group(Tag, Vec), - Leaf(Identifier), - /// Allows inserting a placeholder into the tag tree. - /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. - Placeholder(Placeholder), -} - -#[derive(Clone, Copy)] -pub(crate) struct Placeholder(usize); - -impl Tags { - pub(crate) fn new() -> Self { - Self { - stack: Vec::new(), - placeholders: Vec::new(), - in_artifact: None, - - tree: Vec::new(), - link_id: LinkId(0), - table_id: TableId(0), - } - } - - pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { - let idx = self.placeholders.len(); - self.placeholders.push(OnceCell::new()); - Placeholder(idx) - } - - pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { - self.placeholders[placeholder.0] - .set(node) - .map_err(|_| ()) - .expect("placeholder to be uninitialized"); - } - - pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { - self.placeholders[placeholder.0] - .take() - .expect("initialized placeholder node") - } - - /// Returns the current parent's list of children and the structure type ([Tag]). - /// In case of the document root the structure type will be `None`. - pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { - self.stack.last_mut().map(|e| &mut e.kind) - } - - pub(crate) fn push(&mut self, node: TagNode) { - if let Some(entry) = self.stack.last_mut() { - entry.nodes.push(node); - } else { - self.tree.push(node); - } - } - - pub(crate) fn build_tree(&mut self) -> TagTree { - let children = std::mem::take(&mut self.tree) - .into_iter() - .map(|node| self.resolve_node(node)) - .collect::>(); - TagTree::from(children) - } - - /// Resolves [`Placeholder`] nodes. - fn resolve_node(&mut self, node: TagNode) -> Node { - match node { - TagNode::Group(tag, nodes) => { - let children = nodes - .into_iter() - .map(|node| self.resolve_node(node)) - .collect::>(); - Node::Group(TagGroup::with_children(tag, children)) - } - TagNode::Leaf(identifier) => Node::Leaf(identifier), - TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), - } - } - - fn context_supports(&self, _tag: &StackEntryKind) -> bool { - // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ - true - } - - fn next_link_id(&mut self) -> LinkId { - self.link_id.0 += 1; - self.link_id - } - - fn next_table_id(&mut self) -> TableId { - self.table_id.0 += 1; - self.table_id - } -} - -/// Automatically calls [`Surface::end_tagged`] when dropped. -pub(crate) struct TagHandle<'a, 'b> { - surface: &'b mut Surface<'a>, -} - -impl Drop for TagHandle<'_, '_> { - fn drop(&mut self) { - self.surface.end_tagged(); - } -} - -impl<'a> TagHandle<'a, '_> { - pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { - &mut self.surface - } -} - -/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] -/// when dropped. -pub(crate) fn start_marked<'a, 'b>( - gc: &mut GlobalContext, - surface: &'b mut Surface<'a>, -) -> TagHandle<'a, 'b> { - start_content(gc, surface, ContentTag::Other) -} - -/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] -/// when dropped. -pub(crate) fn start_span<'a, 'b>( - gc: &mut GlobalContext, - surface: &'b mut Surface<'a>, - span: SpanTag, -) -> TagHandle<'a, 'b> { - start_content(gc, surface, ContentTag::Span(span)) -} - -fn start_content<'a, 'b>( - gc: &mut GlobalContext, - surface: &'b mut Surface<'a>, - content: ContentTag, -) -> TagHandle<'a, 'b> { - let content = if let Some((_, kind)) = gc.tags.in_artifact { - let ty = artifact_type(kind); - ContentTag::Artifact(ty) - } else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) { - // Mark any direct child of a table as an aritfact. Any real content - // will be wrapped inside a `TableCell`. - ContentTag::Artifact(ArtifactType::Other) - } else { - content - }; - let id = surface.start_tagged(content); - gc.tags.push(TagNode::Leaf(id)); - TagHandle { surface } -} - -/// Add all annotations that were found in the page frame. -pub(crate) fn add_annotations( - gc: &mut GlobalContext, - page: &mut Page, - annotations: Vec, -) { - for annotation in annotations.into_iter() { - let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = - annotation; - let annot = krilla::annotation::Annotation::new_link( - krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), - alt, - ); - let annot_id = page.add_tagged_annotation(annot); - gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); - } -} +mod outline; +mod table; pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { if gc.tags.in_artifact.is_some() { @@ -773,16 +182,226 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { gc.tags.push(node); } -fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { - gc.tags.in_artifact = Some((loc, kind)); +/// Add all annotations that were found in the page frame. +pub(crate) fn add_annotations( + gc: &mut GlobalContext, + page: &mut Page, + annotations: Vec, +) { + for annotation in annotations.into_iter() { + let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = + annotation; + let annot = krilla::annotation::Annotation::new_link( + krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), + alt, + ); + let annot_id = page.add_tagged_annotation(annot); + gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); + } } -fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { - match scope { - TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, - TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, - TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, +pub(crate) struct Tags { + /// The intermediary stack of nested tag groups. + pub(crate) stack: Vec, + /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. + pub(crate) placeholders: Vec>, + pub(crate) in_artifact: Option<(Location, ArtifactKind)>, + /// Used to group multiple link annotations using quad points. + pub(crate) link_id: LinkId, + /// Used to generate IDs referenced in table `Headers` attributes. + /// The IDs must be document wide unique. + pub(crate) table_id: TableId, + + /// The output. + pub(crate) tree: Vec, +} + +impl Tags { + pub(crate) fn new() -> Self { + Self { + stack: Vec::new(), + placeholders: Vec::new(), + in_artifact: None, + + tree: Vec::new(), + link_id: LinkId(0), + table_id: TableId(0), + } } + + pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { + let idx = self.placeholders.len(); + self.placeholders.push(OnceCell::new()); + Placeholder(idx) + } + + pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { + self.placeholders[placeholder.0] + .set(node) + .map_err(|_| ()) + .expect("placeholder to be uninitialized"); + } + + pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { + self.placeholders[placeholder.0] + .take() + .expect("initialized placeholder node") + } + + /// Returns the current parent's list of children and the structure type ([Tag]). + /// In case of the document root the structure type will be `None`. + pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { + self.stack.last_mut().map(|e| &mut e.kind) + } + + pub(crate) fn push(&mut self, node: TagNode) { + if let Some(entry) = self.stack.last_mut() { + entry.nodes.push(node); + } else { + self.tree.push(node); + } + } + + pub(crate) fn build_tree(&mut self) -> TagTree { + let children = std::mem::take(&mut self.tree) + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + TagTree::from(children) + } + + /// Resolves [`Placeholder`] nodes. + fn resolve_node(&mut self, node: TagNode) -> Node { + match node { + TagNode::Group(tag, nodes) => { + let children = nodes + .into_iter() + .map(|node| self.resolve_node(node)) + .collect::>(); + Node::Group(TagGroup::with_children(tag, children)) + } + TagNode::Leaf(identifier) => Node::Leaf(identifier), + TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), + } + } + + fn context_supports(&self, _tag: &StackEntryKind) -> bool { + // TODO: generate using: https://pdfa.org/resource/iso-ts-32005-hierarchical-inclusion-rules/ + true + } + + fn next_link_id(&mut self) -> LinkId { + self.link_id.0 += 1; + self.link_id + } + + fn next_table_id(&mut self) -> TableId { + self.table_id.0 += 1; + self.table_id + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct TableId(u32); + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct LinkId(u32); + +pub(crate) struct StackEntry { + pub(crate) loc: Location, + pub(crate) kind: StackEntryKind, + pub(crate) nodes: Vec, +} + +pub(crate) enum StackEntryKind { + Standard(Tag), + Outline(OutlineCtx), + OutlineEntry(Packed), + Table(TableCtx), + TableCell(Packed), + Link(LinkId, Packed), +} + +impl StackEntryKind { + pub(crate) fn as_standard_mut(&mut self) -> Option<&mut Tag> { + if let Self::Standard(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +pub(crate) enum TagNode { + Group(Tag, Vec), + Leaf(Identifier), + /// Allows inserting a placeholder into the tag tree. + /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. + Placeholder(Placeholder), +} + +#[derive(Clone, Copy)] +pub(crate) struct Placeholder(usize); + +/// Automatically calls [`Surface::end_tagged`] when dropped. +pub(crate) struct TagHandle<'a, 'b> { + surface: &'b mut Surface<'a>, +} + +impl Drop for TagHandle<'_, '_> { + fn drop(&mut self) { + self.surface.end_tagged(); + } +} + +impl<'a> TagHandle<'a, '_> { + pub(crate) fn surface<'c>(&'c mut self) -> &'c mut Surface<'a> { + self.surface + } +} + +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_marked<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, +) -> TagHandle<'a, 'b> { + start_content(gc, surface, ContentTag::Other) +} + +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_span<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + span: SpanTag, +) -> TagHandle<'a, 'b> { + start_content(gc, surface, ContentTag::Span(span)) +} + +fn start_content<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + content: ContentTag, +) -> TagHandle<'a, 'b> { + let content = if let Some((_, kind)) = gc.tags.in_artifact { + let ty = artifact_type(kind); + ContentTag::Artifact(ty) + } else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) { + // Mark any direct child of a table as an aritfact. Any real content + // will be wrapped inside a `TableCell`. + ContentTag::Artifact(ArtifactType::Other) + } else { + content + }; + let id = surface.start_tagged(content); + gc.tags.push(TagNode::Leaf(id)); + TagHandle { surface } +} + +fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { + gc.tags.in_artifact = Some((loc, kind)); } fn artifact_type(kind: ArtifactKind) -> ArtifactType { diff --git a/crates/typst-pdf/src/tags/outline.rs b/crates/typst-pdf/src/tags/outline.rs new file mode 100644 index 000000000..9fbeb8dcb --- /dev/null +++ b/crates/typst-pdf/src/tags/outline.rs @@ -0,0 +1,74 @@ +use krilla::tagging::TagKind; +use typst_library::foundations::Packed; +use typst_library::model::OutlineEntry; + +use crate::tags::TagNode; + +pub(crate) struct OutlineCtx { + stack: Vec, +} + +impl OutlineCtx { + pub(crate) fn new() -> Self { + Self { stack: Vec::new() } + } + + pub(crate) fn insert( + &mut self, + outline_nodes: &mut Vec, + entry: Packed, + nodes: Vec, + ) { + let expected_len = entry.level.get() - 1; + if self.stack.len() < expected_len { + self.stack.resize_with(expected_len, OutlineSection::new); + } else { + while self.stack.len() > expected_len { + self.finish_section(outline_nodes); + } + } + + let section_entry = TagNode::Group(TagKind::TOCI.into(), nodes); + self.push(outline_nodes, section_entry); + } + + fn finish_section(&mut self, outline_nodes: &mut Vec) { + let sub_section = self.stack.pop().unwrap().into_tag(); + self.push(outline_nodes, sub_section); + } + + fn push(&mut self, outline_nodes: &mut Vec, entry: TagNode) { + match self.stack.last_mut() { + Some(section) => section.push(entry), + None => outline_nodes.push(entry), + } + } + + pub(crate) fn build_outline( + mut self, + mut outline_nodes: Vec, + ) -> Vec { + while !self.stack.is_empty() { + self.finish_section(&mut outline_nodes); + } + outline_nodes + } +} + +pub(crate) struct OutlineSection { + entries: Vec, +} + +impl OutlineSection { + const fn new() -> Self { + OutlineSection { entries: Vec::new() } + } + + fn push(&mut self, entry: TagNode) { + self.entries.push(entry); + } + + fn into_tag(self) -> TagNode { + TagNode::Group(TagKind::TOC.into(), self.entries) + } +} diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs new file mode 100644 index 000000000..240da4c33 --- /dev/null +++ b/crates/typst-pdf/src/tags/table.rs @@ -0,0 +1,330 @@ +use std::num::{NonZeroU32, NonZeroUsize}; + +use krilla::tagging::{ + TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, + TagKind, +}; +use typst_library::foundations::{Packed, Smart, StyleChain}; +use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; + +use crate::tags::{TableId, TagNode}; + +pub(crate) struct TableCtx { + pub(crate) id: TableId, + pub(crate) table: Packed, + rows: Vec>, +} + +impl TableCtx { + pub(crate) fn new(id: TableId, table: Packed) -> Self { + Self { id, table: table.clone(), rows: Vec::new() } + } + + fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { + let cell = self.rows.get(y)?.get(x)?; + self.resolve_cell(cell) + } + + fn get_mut(&mut self, x: usize, y: usize) -> Option<&mut TableCtxCell> { + let cell = self.rows.get_mut(y)?.get_mut(x)?; + match cell { + GridCell::Cell(cell) => { + // HACK: Workaround for the second mutable borrow when resolving + // the spanned cell. + Some(unsafe { std::mem::transmute(cell) }) + } + &mut GridCell::Spanned(x, y) => self.rows[y][x].as_cell_mut(), + GridCell::Missing => None, + } + } + + pub(crate) fn contains(&self, cell: &Packed) -> bool { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + self.get(x, y).is_some() + } + + fn resolve_cell<'a>(&'a self, cell: &'a GridCell) -> Option<&'a TableCtxCell> { + match cell { + GridCell::Cell(cell) => Some(cell), + &GridCell::Spanned(x, y) => self.rows[y][x].as_cell(), + GridCell::Missing => None, + } + } + + pub(crate) fn insert(&mut self, cell: Packed, nodes: Vec) { + let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); + let rowspan = cell.rowspan(StyleChain::default()); + let colspan = cell.colspan(StyleChain::default()); + let kind = cell.kind(StyleChain::default()); + + // Extend the table grid to fit this cell. + let required_height = y + rowspan.get(); + let required_width = x + colspan.get(); + if self.rows.len() < required_height { + self.rows + .resize(required_height, vec![GridCell::Missing; required_width]); + } + let row = &mut self.rows[y]; + if row.len() < required_width { + row.resize_with(required_width, || GridCell::Missing); + } + + // Store references to the cell for all spanned cells. + for i in y..y + rowspan.get() { + for j in x..x + colspan.get() { + self.rows[i][j] = GridCell::Spanned(x, y); + } + } + + self.rows[y][x] = GridCell::Cell(TableCtxCell { + x: x as u32, + y: y as u32, + rowspan, + colspan, + kind, + headers: TableCellHeaders::NONE, + nodes, + }); + } + + pub(crate) fn build_table(mut self, mut nodes: Vec) -> Vec { + // Table layouting ensures that there are no overlapping cells, and that + // any gaps left by the user are filled with empty cells. + if self.rows.is_empty() { + return nodes; + } + let height = self.rows.len(); + let width = self.rows[0].len(); + + // Only generate row groups such as `THead`, `TFoot`, and `TBody` if + // there are no rows with mixed cell kinds. + let mut gen_row_groups = true; + let row_kinds = (self.rows.iter()) + .map(|row| { + row.iter() + .filter_map(|cell| self.resolve_cell(cell)) + .map(|cell| cell.kind) + .fold(Smart::Auto, |a, b| { + if let Smart::Custom(TableCellKind::Header(_, scope)) = b { + gen_row_groups &= scope == TableHeaderScope::Column; + } + if let (Smart::Custom(a), Smart::Custom(b)) = (a, b) { + gen_row_groups &= a == b; + } + a.or(b) + }) + .unwrap_or(TableCellKind::Data) + }) + .collect::>(); + + // Fixup all missing cell kinds. + for (row, row_kind) in self.rows.iter_mut().zip(row_kinds.iter().copied()) { + let default_kind = + if gen_row_groups { row_kind } else { TableCellKind::Data }; + for cell in row.iter_mut() { + let Some(cell) = cell.as_cell_mut() else { continue }; + cell.kind = cell.kind.or(Smart::Custom(default_kind)); + } + } + + // Explicitly set the headers attribute for cells. + for x in 0..width { + let mut column_header = None; + for y in 0..height { + self.resolve_cell_headers( + (x, y), + &mut column_header, + TableHeaderScope::refers_to_column, + ); + } + } + for y in 0..height { + let mut row_header = None; + for x in 0..width { + self.resolve_cell_headers( + (x, y), + &mut row_header, + TableHeaderScope::refers_to_row, + ); + } + } + + let mut chunk_kind = row_kinds[0]; + let mut row_chunk = Vec::new(); + for (row, row_kind) in self.rows.into_iter().zip(row_kinds) { + let row_nodes = row + .into_iter() + .filter_map(|cell| { + let cell = cell.into_cell()?; + let span = TableCellSpan { + rows: cell.rowspan.try_into().unwrap(), + cols: cell.colspan.try_into().unwrap(), + }; + let tag = match cell.unwrap_kind() { + TableCellKind::Header(_, scope) => { + let id = table_cell_id(self.id, cell.x, cell.y); + let scope = table_header_scope(scope); + TagKind::TH( + TableHeaderCell::new(scope) + .with_span(span) + .with_headers(cell.headers), + ) + .with_id(Some(id)) + } + TableCellKind::Footer | TableCellKind::Data => TagKind::TD( + TableDataCell::new() + .with_span(span) + .with_headers(cell.headers), + ) + .into(), + }; + + Some(TagNode::Group(tag, cell.nodes)) + }) + .collect(); + + let row = TagNode::Group(TagKind::TR.into(), row_nodes); + + // Push the `TR` tags directly. + if !gen_row_groups { + nodes.push(row); + continue; + } + + // Generate row groups. + if !should_group_rows(chunk_kind, row_kind) { + let tag = match chunk_kind { + TableCellKind::Header(..) => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), std::mem::take(&mut row_chunk))); + + chunk_kind = row_kind; + } + row_chunk.push(row); + } + + if !row_chunk.is_empty() { + let tag = match chunk_kind { + TableCellKind::Header(..) => TagKind::THead, + TableCellKind::Footer => TagKind::TFoot, + TableCellKind::Data => TagKind::TBody, + }; + nodes.push(TagNode::Group(tag.into(), row_chunk)); + } + + nodes + } + + fn resolve_cell_headers( + &mut self, + (x, y): (usize, usize), + current_header: &mut Option<(NonZeroU32, TagId)>, + refers_to_dir: F, + ) where + F: Fn(&TableHeaderScope) -> bool, + { + let table_id = self.id; + let Some(cell) = self.get_mut(x, y) else { return }; + + if let Some((prev_level, cell_id)) = current_header.clone() { + // The `Headers` attribute is also set for parent headers. + let mut is_parent_header = true; + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + is_parent_header = prev_level < level; + } + } + + if is_parent_header && !cell.headers.ids.contains(&cell_id) { + cell.headers.ids.push(cell_id.clone()); + } + } + + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + let tag_id = table_cell_id(table_id, x as u32, y as u32); + *current_header = Some((level, tag_id)); + } + } + } +} + +#[derive(Clone, Default)] +enum GridCell { + Cell(TableCtxCell), + Spanned(usize, usize), + #[default] + Missing, +} + +impl GridCell { + fn as_cell(&self) -> Option<&TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn as_cell_mut(&mut self) -> Option<&mut TableCtxCell> { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } + + fn into_cell(self) -> Option { + if let Self::Cell(v) = self { + Some(v) + } else { + None + } + } +} + +#[derive(Clone)] +struct TableCtxCell { + x: u32, + y: u32, + rowspan: NonZeroUsize, + colspan: NonZeroUsize, + kind: Smart, + headers: TableCellHeaders, + nodes: Vec, +} + +impl TableCtxCell { + fn unwrap_kind(&self) -> TableCellKind { + self.kind.unwrap_or_else(|| unreachable!()) + } +} + +fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { + match (a, b) { + (TableCellKind::Header(..), TableCellKind::Header(..)) => true, + (TableCellKind::Footer, TableCellKind::Footer) => true, + (TableCellKind::Data, TableCellKind::Data) => true, + (_, _) => false, + } +} + +fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { + let mut bytes = [0; 12]; + bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); + bytes[4..8].copy_from_slice(&x.to_ne_bytes()); + bytes[8..12].copy_from_slice(&y.to_ne_bytes()); + TagId::from_bytes(&bytes) +} + +fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { + match scope { + TableHeaderScope::Both => krilla::tagging::TableHeaderScope::Both, + TableHeaderScope::Column => krilla::tagging::TableHeaderScope::Column, + TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, + } +} From 0bc39338a1bec42e8b64c3b259a960bc9604d372 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 13:56:10 +0200 Subject: [PATCH 31/76] fix: handle some edge cases instead of panicking --- crates/typst-pdf/src/convert.rs | 2 +- crates/typst-pdf/src/link.rs | 5 +- crates/typst-pdf/src/tags/mod.rs | 127 ++++++++++++++++++++--------- crates/typst-pdf/src/tags/table.rs | 4 +- 4 files changed, 94 insertions(+), 44 deletions(-) diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index a8a7e88b2..eadcc6274 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -293,7 +293,7 @@ pub(crate) fn handle_frame( handle_image(gc, fc, image, *size, surface, *span)? } FrameItem::Link(dest, size) => handle_link(fc, gc, dest, *size), - FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, elem), + FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, elem)?, FrameItem::Tag(Tag::End(loc, _)) => tags::handle_end(gc, *loc), } diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index 32949068b..eef5421cc 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -8,7 +8,7 @@ use typst_library::layout::{Abs, Point, Position, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; -use crate::tags::{self, Placeholder, StackEntryKind, TagNode}; +use crate::tags::{self, Placeholder, TagNode}; use crate::util::{AbsExt, PointExt}; pub(crate) struct LinkAnnotation { @@ -49,8 +49,7 @@ pub(crate) fn handle_link( } }; - let entry = gc.tags.stack.last_mut().expect("a link parent"); - let StackEntryKind::Link(link_id, ref link) = entry.kind else { + let Some((link_id, link)) = gc.tags.find_parent_link() else { unreachable!("expected a link parent") }; let alt = link.alt.as_ref().map(EcoString::to_string); diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 99b52d555..1cff7f92e 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -2,12 +2,14 @@ use std::cell::OnceCell; use std::num::NonZeroU32; use ecow::EcoString; +use krilla::configure::Validator; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, Tag, TagBuilder, TagGroup, - TagKind, TagTree, + ArtifactType, ContentTag, Identifier, Node, SpanTag, TableDataCell, Tag, TagBuilder, + TagGroup, TagKind, TagTree, }; +use typst_library::diag::SourceResult; use typst_library::foundations::{Content, LinkMarker, Packed, StyleChain}; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; @@ -26,21 +28,21 @@ use crate::tags::table::TableCtx; mod outline; mod table; -pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { +pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResult<()> { if gc.tags.in_artifact.is_some() { // Don't nest artifacts - return; + return Ok(()); } - let loc = elem.location().unwrap(); + let loc = elem.location().expect("elem to be locatable"); if let Some(artifact) = elem.to_packed::() { let kind = artifact.kind(StyleChain::default()); start_artifact(gc, loc, kind); - return; + return Ok(()); } else if let Some(_) = elem.to_packed::() { start_artifact(gc, loc, ArtifactKind::Other); - return; + return Ok(()); } let tag: Tag = if let Some(pdf_tag) = elem.to_packed::() { @@ -54,11 +56,11 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { let name = heading.body.plain_text().to_string(); TagKind::Hn(level, Some(name)).into() } else if let Some(_) = elem.to_packed::() { - push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new())); - return; + push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; + return Ok(()); } else if let Some(entry) = elem.to_packed::() { - push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone())); - return; + push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()))?; + return Ok(()); } else if let Some(_) = elem.to_packed::() { let alt = None; // TODO TagKind::Figure.with_alt_text(alt) @@ -73,7 +75,7 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { if figure_tag.alt_text.is_none() { figure_tag.alt_text = alt; } - return; + return Ok(()); } else { TagKind::Figure.with_alt_text(alt) } @@ -82,19 +84,16 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { } else if let Some(table) = elem.to_packed::() { let table_id = gc.tags.next_table_id(); let ctx = TableCtx::new(table_id, table.clone()); - push_stack(gc, loc, StackEntryKind::Table(ctx)); - return; + push_stack(gc, loc, StackEntryKind::Table(ctx))?; + return Ok(()); } else if let Some(cell) = elem.to_packed::() { - let parent = gc.tags.stack.last_mut().expect("table"); - let StackEntryKind::Table(table_ctx) = &mut parent.kind else { - unreachable!("expected table") - }; + let table_ctx = gc.tags.parent_table(); // Only repeated table headers and footer cells are layed out multiple // times. Mark duplicate headers as artifacts, since they have no // semantic meaning in the tag tree, which doesn't use page breaks for // it's semantic structure. - if table_ctx.contains(cell) { + if table_ctx.is_some_and(|ctx| ctx.contains(cell)) { // TODO: currently the first layouted cell is picked to be part of // the tag tree, for repeating footers this will be the cell on the // first page. Maybe it should be the cell on the last page, but that @@ -102,26 +101,38 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) { // on the frames to figure out if there are other footers following. start_artifact(gc, loc, ArtifactKind::Other); } else { - push_stack(gc, loc, StackEntryKind::TableCell(cell.clone())); + push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()))?; } - return; + return Ok(()); } else if let Some(link) = elem.to_packed::() { let link_id = gc.tags.next_link_id(); - push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone())); - return; + push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; + return Ok(()); } else { - return; + return Ok(()); }; - push_stack(gc, loc, StackEntryKind::Standard(tag)); + push_stack(gc, loc, StackEntryKind::Standard(tag))?; + + Ok(()) } -fn push_stack(gc: &mut GlobalContext, loc: Location, kind: StackEntryKind) { +fn push_stack( + gc: &mut GlobalContext, + loc: Location, + kind: StackEntryKind, +) -> SourceResult<()> { if !gc.tags.context_supports(&kind) { - // TODO: error or warning? + if gc.options.standards.config.validator() == Validator::UA1 { + // TODO: error + } else { + // TODO: warning + } } gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); + + Ok(()) } pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { @@ -143,13 +154,20 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { TagNode::Group(TagKind::TOC.into(), nodes) } StackEntryKind::OutlineEntry(outline_entry) => { - let parent = gc.tags.stack.last_mut().expect("outline"); - let StackEntryKind::Outline(outline_ctx) = &mut parent.kind else { - unreachable!("expected outline") + let parent = gc.tags.stack.last_mut().and_then(|parent| { + let ctx = parent.kind.as_outline_mut()?; + Some((&mut parent.nodes, ctx)) + }); + let Some((parent_nodes, outline_ctx)) = parent else { + // PDF/UA compliance of the structure hierarchy is checked + // elsewhere. While this doesn't make a lot of sense, just + // avoid crashing here. + let tag = TagKind::TOCI.into(); + gc.tags.push(TagNode::Group(tag, entry.nodes)); + return; }; - outline_ctx.insert(&mut parent.nodes, outline_entry, entry.nodes); - + outline_ctx.insert(parent_nodes, outline_entry, entry.nodes); return; } StackEntryKind::Table(ctx) => { @@ -158,13 +176,16 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { TagNode::Group(TagKind::Table(summary).into(), nodes) } StackEntryKind::TableCell(cell) => { - let parent = gc.tags.stack.last_mut().expect("table"); - let StackEntryKind::Table(table_ctx) = &mut parent.kind else { - unreachable!("expected table") + let Some(table_ctx) = gc.tags.parent_table() else { + // PDF/UA compliance of the structure hierarchy is checked + // elsewhere. While this doesn't make a lot of sense, just + // avoid crashing here. + let tag = TagKind::TD(TableDataCell::new()).into(); + gc.tags.push(TagNode::Group(tag, entry.nodes)); + return; }; table_ctx.insert(cell, entry.nodes); - return; } StackEntryKind::Link(_, link) => { @@ -248,12 +269,18 @@ impl Tags { .expect("initialized placeholder node") } - /// Returns the current parent's list of children and the structure type ([Tag]). - /// In case of the document root the structure type will be `None`. pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { self.stack.last_mut().map(|e| &mut e.kind) } + pub(crate) fn parent_table(&mut self) -> Option<&mut TableCtx> { + self.parent()?.as_table_mut() + } + + pub(crate) fn find_parent_link(&self) -> Option<(LinkId, &Packed)> { + self.stack.iter().rev().find_map(|entry| entry.kind.as_link()) + } + pub(crate) fn push(&mut self, node: TagNode) { if let Some(entry) = self.stack.last_mut() { entry.nodes.push(node); @@ -330,6 +357,30 @@ impl StackEntryKind { None } } + + pub(crate) fn as_outline_mut(&mut self) -> Option<&mut OutlineCtx> { + if let Self::Outline(v) = self { + Some(v) + } else { + None + } + } + + pub(crate) fn as_table_mut(&mut self) -> Option<&mut TableCtx> { + if let Self::Table(v) = self { + Some(v) + } else { + None + } + } + + pub(crate) fn as_link(&self) -> Option<(LinkId, &Packed)> { + if let Self::Link(id, link) = self { + Some((*id, link)) + } else { + None + } + } } #[derive(Clone)] diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 240da4c33..ad67c4846 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -159,8 +159,8 @@ impl TableCtx { .filter_map(|cell| { let cell = cell.into_cell()?; let span = TableCellSpan { - rows: cell.rowspan.try_into().unwrap(), - cols: cell.colspan.try_into().unwrap(), + rows: cell.rowspan.try_into().unwrap_or(NonZeroU32::MAX), + cols: cell.colspan.try_into().unwrap_or(NonZeroU32::MAX), }; let tag = match cell.unwrap_kind() { TableCellKind::Header(_, scope) => { From f324accff908e8ffaae8d00eacd3040d18b0ff1e Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 13:56:29 +0200 Subject: [PATCH 32/76] feat: generate paragraphs --- crates/typst-pdf/src/tags/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 1cff7f92e..a1a2c5e98 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -15,7 +15,7 @@ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, - OutlineEntry, TableCell, TableElem, + OutlineEntry, ParElem, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; @@ -108,6 +108,8 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResu let link_id = gc.tags.next_link_id(); push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; return Ok(()); + } else if let Some(_) = elem.to_packed::() { + TagKind::P.into() } else { return Ok(()); }; From 7892a8c726aac142dc599c2ba0825241932b2c7c Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 14:12:15 +0200 Subject: [PATCH 33/76] chore: update krilla --- crates/typst-pdf/src/tags/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index ad67c4846..26df629fc 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -318,7 +318,7 @@ fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); bytes[4..8].copy_from_slice(&x.to_ne_bytes()); bytes[8..12].copy_from_slice(&y.to_ne_bytes()); - TagId::from_bytes(&bytes) + TagId::from_slice(&bytes) } fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { From 8e10356234788248b617ff5c38622b05bbf9bea4 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 14:14:00 +0200 Subject: [PATCH 34/76] refactor: use krilla as git dependency --- Cargo.lock | 2 ++ Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c92cf823..8e84af212 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1367,6 +1367,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd" dependencies = [ "base64", "bumpalo", @@ -1395,6 +1396,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd" dependencies = [ "flate2", "fontdb", diff --git a/Cargo.toml b/Cargo.toml index 7029c389d..42e334ce5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,8 +73,8 @@ image = { version = "0.25.5", default-features = false, features = ["png", "jpeg indexmap = { version = "2", features = ["serde"] } infer = { version = "0.19.0", default-features = false } kamadak-exif = "0.6" -krilla = { path = "../krilla/crates/krilla", default-features = false, features = ["raster-images", "comemo", "rayon"] } -krilla-svg = { path = "../krilla/crates/krilla-svg" } +krilla = { git = "https://github.com/saecki/krilla", branch = "tag-attributes", default-features = false, features = ["raster-images", "comemo", "rayon"] } +krilla-svg = { git = "https://github.com/saecki/krilla", branch = "tag-attributes" } kurbo = "0.11" libfuzzer-sys = "0.4" lipsum = "0.9" From 254aadccfc67f207b46d3865bd9805fc9924b1c0 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 15:57:37 +0200 Subject: [PATCH 35/76] docs: fix comment --- crates/typst-library/src/model/outline.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 1e177130d..adca57830 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -311,7 +311,7 @@ impl LocalName for Packed { const KEY: &'static str = "outline"; } -/// Only used to mark +/// Only used to delimit the outline in tagged PDF. #[elem(Locatable, Show)] pub struct OutlineBody { #[required] From 3d4d5489349f0d13707903462d0f245e19b54298 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 3 Jul 2025 18:38:54 +0200 Subject: [PATCH 36/76] feat: [WIP] generate alt text for ref elements --- .../typst-library/src/model/bibliography.rs | 4 +-- crates/typst-library/src/model/footnote.rs | 4 +-- crates/typst-library/src/model/outline.rs | 36 +++++++++++++------ crates/typst-library/src/model/reference.rs | 9 +++-- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index 3b14c8cbf..ec9d66774 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -877,7 +877,7 @@ impl<'a> Generator<'a> { renderer.display_elem_child(elem, &mut None, false)?; if let Some(location) = first_occurrences.get(item.key.as_str()) { let dest = Destination::Location(*location); - // TODO: accept user supplied alt text + // TODO(accessibility): generate alt text content = content.linked(dest, None); } StrResult::Ok(content) @@ -1013,7 +1013,7 @@ impl ElemRenderer<'_> { if let Some(hayagriva::ElemMeta::Entry(i)) = elem.meta { if let Some(location) = (self.link)(i) { let dest = Destination::Location(location); - // TODO: accept user supplied alt text + // TODO(accessibility): generate alt text content = content.linked(dest, None); } } diff --git a/crates/typst-library/src/model/footnote.rs b/crates/typst-library/src/model/footnote.rs index 872827d90..f1eeb0c3e 100644 --- a/crates/typst-library/src/model/footnote.rs +++ b/crates/typst-library/src/model/footnote.rs @@ -147,7 +147,7 @@ impl Show for Packed { let sup = SuperElem::new(num).pack().spanned(span); let loc = loc.variant(1); // Add zero-width weak spacing to make the footnote "sticky". - // TODO: accept user supplied alt text + // TODO(accessibility): generate alt text Ok(HElem::hole().pack() + sup.linked(Destination::Location(loc), None)) } } @@ -297,7 +297,7 @@ impl Show for Packed { let sup = SuperElem::new(num) .pack() .spanned(span) - // TODO: accept user supplied alt text + // TODO(accessibility): generate alt text .linked(Destination::Location(loc), None) .located(loc.variant(1)); diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index adca57830..8e2f18da6 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -2,7 +2,7 @@ use std::num::NonZeroUsize; use std::str::FromStr; use comemo::{Track, Tracked}; -use ecow::eco_format; +use ecow::{eco_format, EcoString}; use smallvec::SmallVec; use typst_syntax::Span; use typst_utils::{Get, NonZeroExt}; @@ -23,7 +23,7 @@ use crate::layout::{ }; use crate::math::EquationElem; use crate::model::{Destination, HeadingElem, NumberingPattern, ParElem, Refable}; -use crate::text::{LocalName, SpaceElem, TextElem}; +use crate::text::{LocalName, SmartQuoteElem, SmartQuotes, SpaceElem, TextElem}; /// A table of contents, figures, or other elements. /// @@ -435,18 +435,11 @@ impl Show for Packed { let context = Context::new(None, Some(styles)); let context = context.track(); - // TODO: prefix should be wrapped in a `Lbl` structure element + // TODO(accessibility): prefix should be wrapped in a `Lbl` structure element let prefix = self.prefix(engine, context, span)?; let body = self.body().at(span)?; let page = self.page(engine, context, span)?; - let alt = { - // TODO: accept user supplied alt text - let prefix = prefix.as_ref().map(|p| p.plain_text()).unwrap_or_default(); - let body = body.plain_text(); - let page_str = PageElem::local_name_in(styles); - let page_nr = page.plain_text(); - eco_format!("{prefix} \"{body}\", {page_str} {page_nr}") - }; + let alt = alt_text(styles, &prefix, &body, &page); let inner = self.inner(context, span, body, page)?; let block = if self.element.is::() { let body = prefix.unwrap_or_default() + inner; @@ -704,6 +697,27 @@ cast! { v: Content => v.unpack::().map_err(|_| "expected outline entry")? } +fn alt_text( + styles: StyleChain, + prefix: &Option, + body: &Content, + page: &Content, +) -> EcoString { + let prefix = prefix.as_ref().map(|p| p.plain_text()).unwrap_or_default(); + let body = body.plain_text(); + let page_str = PageElem::local_name_in(styles); + let page_nr = page.plain_text(); + let quotes = SmartQuotes::get( + SmartQuoteElem::quotes_in(styles), + TextElem::lang_in(styles), + TextElem::region_in(styles), + SmartQuoteElem::alternative_in(styles), + ); + let open = quotes.double_open; + let close = quotes.double_close; + eco_format!("{prefix} {open}{body}{close} {page_str} {page_nr}",) +} + /// Measures the width of a prefix. fn measure_prefix( engine: &mut Engine, diff --git a/crates/typst-library/src/model/reference.rs b/crates/typst-library/src/model/reference.rs index b04c57c4a..2b35a826e 100644 --- a/crates/typst-library/src/model/reference.rs +++ b/crates/typst-library/src/model/reference.rs @@ -338,13 +338,18 @@ fn show_reference( Smart::Custom(Some(supplement)) => supplement.resolve(engine, styles, [elem])?, }; + let alt = { + let supplement = supplement.plain_text(); + let numbering = numbers.plain_text(); + eco_format!("{supplement} {numbering}",) + }; + let mut content = numbers; if !supplement.is_empty() { content = supplement + TextElem::packed("\u{a0}") + content; } - // TODO: accept user supplied alt text - Ok(content.linked(Destination::Location(loc), None)) + Ok(content.linked(Destination::Location(loc), Some(alt))) } /// Turn a reference into a citation. From 4dceb7f5efb0319359b25d887fd47ebe680c1d4b Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 4 Jul 2025 10:37:46 +0200 Subject: [PATCH 37/76] refactor: update krilla --- Cargo.lock | 4 ++-- crates/typst-pdf/src/convert.rs | 2 +- crates/typst-pdf/src/tags/table.rs | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8e84af212..7a0daeb6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1367,7 +1367,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#a13326781d3959896a468b79dd80741bd3ab2398" dependencies = [ "base64", "bumpalo", @@ -1396,7 +1396,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#a13326781d3959896a468b79dd80741bd3ab2398" dependencies = [ "flate2", "fontdb", diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index eadcc6274..19f2a096e 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -598,7 +598,7 @@ fn convert_error( let span = to_span(*loc); error!(span, "{prefix} duplicate tag id") } - ValidationError::UnknownHeaderTagId(_id, loc) => { + ValidationError::UnknownTagId(_id, loc) => { // TODO: display the id and better error message let span = to_span(*loc); error!(span, "{prefix} unknown header tag id") diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 26df629fc..57effd02e 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -1,8 +1,7 @@ use std::num::{NonZeroU32, NonZeroUsize}; use krilla::tagging::{ - TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, - TagKind, + TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagIdRefs, TagKind, }; use typst_library::foundations::{Packed, Smart, StyleChain}; use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; @@ -84,7 +83,7 @@ impl TableCtx { rowspan, colspan, kind, - headers: TableCellHeaders::NONE, + headers: TagIdRefs::NONE, nodes, }); } @@ -294,7 +293,7 @@ struct TableCtxCell { rowspan: NonZeroUsize, colspan: NonZeroUsize, kind: Smart, - headers: TableCellHeaders, + headers: TagIdRefs, nodes: Vec, } From 157e0fa1427d604efb0754da9237f0abcf35c479 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 4 Jul 2025 15:56:39 +0200 Subject: [PATCH 38/76] fix: generate cell id with correct indices --- crates/typst-pdf/src/tags/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 57effd02e..eb13a1391 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -245,7 +245,7 @@ impl TableCtx { if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { if refers_to_dir(&scope) { - let tag_id = table_cell_id(table_id, x as u32, y as u32); + let tag_id = table_cell_id(table_id, cell.x, cell.y); *current_header = Some((level, tag_id)); } } From 58c6729df40c303e429986e312472c1999c3dfbd Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 7 Jul 2025 10:52:20 +0200 Subject: [PATCH 39/76] feat: generate human readable table cell IDs in almost all real-world cases these IDs require less memory than the binary IDs used before, and they are also require less storage in PDF files, since binary data is encoded in hex escape sequences, taking up 4 bytes per byte of data. --- Cargo.lock | 5 +++-- crates/typst-pdf/Cargo.toml | 1 + crates/typst-pdf/src/tags/table.rs | 10 +++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a0daeb6b..96ae7eec0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1367,7 +1367,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#a13326781d3959896a468b79dd80741bd3ab2398" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2556c404d19746f9385b4a4e26e910d9e625c1db" dependencies = [ "base64", "bumpalo", @@ -1396,7 +1396,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#a13326781d3959896a468b79dd80741bd3ab2398" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2556c404d19746f9385b4a4e26e910d9e625c1db" dependencies = [ "flate2", "fontdb", @@ -3139,6 +3139,7 @@ dependencies = [ "krilla", "krilla-svg", "serde", + "smallvec", "typst-assets", "typst-library", "typst-macros", diff --git a/crates/typst-pdf/Cargo.toml b/crates/typst-pdf/Cargo.toml index 5745d0530..b96a28029 100644 --- a/crates/typst-pdf/Cargo.toml +++ b/crates/typst-pdf/Cargo.toml @@ -27,6 +27,7 @@ infer = { workspace = true } krilla = { workspace = true } krilla-svg = { workspace = true } serde = { workspace = true } +smallvec = { workspace = true } [lints] workspace = true diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index eb13a1391..c27f95a12 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -1,8 +1,10 @@ +use std::io::Write as _; use std::num::{NonZeroU32, NonZeroUsize}; use krilla::tagging::{ TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagIdRefs, TagKind, }; +use smallvec::SmallVec; use typst_library::foundations::{Packed, Smart, StyleChain}; use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; @@ -313,11 +315,9 @@ fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { } fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { - let mut bytes = [0; 12]; - bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); - bytes[4..8].copy_from_slice(&x.to_ne_bytes()); - bytes[8..12].copy_from_slice(&y.to_ne_bytes()); - TagId::from_slice(&bytes) + let mut buf = SmallVec::new(); + _ = write!(&mut buf, "{}x{x}y{y}", table_id.0); + TagId::from_smallvec(buf) } fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { From b0d3c2dca4871e716241e09734340f45b1b2cd7a Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 7 Jul 2025 12:28:53 +0200 Subject: [PATCH 40/76] test: table header id generation --- Cargo.lock | 28 +++- Cargo.toml | 1 + crates/typst-pdf/Cargo.toml | 4 + crates/typst-pdf/src/tags/mod.rs | 18 +-- crates/typst-pdf/src/tags/outline.rs | 7 +- crates/typst-pdf/src/tags/table.rs | 203 ++++++++++++++++++++++++--- 6 files changed, 224 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96ae7eec0..6ab36ee3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -592,6 +592,12 @@ dependencies = [ "syn", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "dirs" version = "6.0.0" @@ -1367,7 +1373,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2556c404d19746f9385b4a4e26e910d9e625c1db" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9" dependencies = [ "base64", "bumpalo", @@ -1396,7 +1402,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2556c404d19746f9385b4a4e26e910d9e625c1db" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9" dependencies = [ "flate2", "fontdb", @@ -1982,6 +1988,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + [[package]] name = "proc-macro2" version = "1.0.93" @@ -3131,6 +3147,7 @@ dependencies = [ name = "typst-pdf" version = "0.13.1" dependencies = [ + "az", "bytemuck", "comemo", "ecow", @@ -3138,6 +3155,7 @@ dependencies = [ "infer", "krilla", "krilla-svg", + "pretty_assertions", "serde", "smallvec", "typst-assets", @@ -3802,6 +3820,12 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.7.5" diff --git a/Cargo.toml b/Cargo.toml index 42e334ce5..b9aefde09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,6 +92,7 @@ phf = { version = "0.11", features = ["macros"] } pixglyph = "0.6" png = "0.17" portable-atomic = "1.6" +pretty_assertions = "1.4.1" proc-macro2 = "1" pulldown-cmark = "0.9" qcms = "0.3.0" diff --git a/crates/typst-pdf/Cargo.toml b/crates/typst-pdf/Cargo.toml index b96a28029..e9a987dbf 100644 --- a/crates/typst-pdf/Cargo.toml +++ b/crates/typst-pdf/Cargo.toml @@ -19,6 +19,7 @@ typst-macros = { workspace = true } typst-syntax = { workspace = true } typst-timing = { workspace = true } typst-utils = { workspace = true } +az = { workspace = true } bytemuck = { workspace = true } comemo = { workspace = true } ecow = { workspace = true } @@ -29,5 +30,8 @@ krilla-svg = { workspace = true } serde = { workspace = true } smallvec = { workspace = true } +[dev-dependencies] +pretty_assertions = { workspace = true } + [lints] workspace = true diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index a1a2c5e98..5aa4d3904 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -83,7 +83,8 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResu TagKind::Caption.into() } else if let Some(table) = elem.to_packed::() { let table_id = gc.tags.next_table_id(); - let ctx = TableCtx::new(table_id, table.clone()); + let summary = table.summary(StyleChain::default()).map(EcoString::into); + let ctx = TableCtx::new(table_id, summary); push_stack(gc, loc, StackEntryKind::Table(ctx))?; return Ok(()); } else if let Some(cell) = elem.to_packed::() { @@ -151,10 +152,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { let node = match entry.kind { StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), - StackEntryKind::Outline(ctx) => { - let nodes = ctx.build_outline(entry.nodes); - TagNode::Group(TagKind::TOC.into(), nodes) - } + StackEntryKind::Outline(ctx) => ctx.build_outline(entry.nodes), StackEntryKind::OutlineEntry(outline_entry) => { let parent = gc.tags.stack.last_mut().and_then(|parent| { let ctx = parent.kind.as_outline_mut()?; @@ -172,11 +170,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { outline_ctx.insert(parent_nodes, outline_entry, entry.nodes); return; } - StackEntryKind::Table(ctx) => { - let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into); - let nodes = ctx.build_table(entry.nodes); - TagNode::Group(TagKind::Table(summary).into(), nodes) - } + StackEntryKind::Table(ctx) => ctx.build_table(entry.nodes), StackEntryKind::TableCell(cell) => { let Some(table_ctx) = gc.tags.parent_table() else { // PDF/UA compliance of the structure hierarchy is checked @@ -385,7 +379,7 @@ impl StackEntryKind { } } -#[derive(Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] pub(crate) enum TagNode { Group(Tag, Vec), Leaf(Identifier), @@ -394,7 +388,7 @@ pub(crate) enum TagNode { Placeholder(Placeholder), } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) struct Placeholder(usize); /// Automatically calls [`Surface::end_tagged`] when dropped. diff --git a/crates/typst-pdf/src/tags/outline.rs b/crates/typst-pdf/src/tags/outline.rs index 9fbeb8dcb..946ad6168 100644 --- a/crates/typst-pdf/src/tags/outline.rs +++ b/crates/typst-pdf/src/tags/outline.rs @@ -44,14 +44,11 @@ impl OutlineCtx { } } - pub(crate) fn build_outline( - mut self, - mut outline_nodes: Vec, - ) -> Vec { + pub(crate) fn build_outline(mut self, mut outline_nodes: Vec) -> TagNode { while !self.stack.is_empty() { self.finish_section(&mut outline_nodes); } - outline_nodes + TagNode::Group(TagKind::TOC.into(), outline_nodes) } } diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index c27f95a12..21387ad5d 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -1,24 +1,25 @@ use std::io::Write as _; -use std::num::{NonZeroU32, NonZeroUsize}; +use std::num::NonZeroU32; +use az::SaturatingAs; use krilla::tagging::{ TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagIdRefs, TagKind, }; use smallvec::SmallVec; use typst_library::foundations::{Packed, Smart, StyleChain}; -use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; +use typst_library::model::{TableCell, TableCellKind, TableHeaderScope}; use crate::tags::{TableId, TagNode}; pub(crate) struct TableCtx { pub(crate) id: TableId, - pub(crate) table: Packed, + pub(crate) summary: Option, rows: Vec>, } impl TableCtx { - pub(crate) fn new(id: TableId, table: Packed) -> Self { - Self { id, table: table.clone(), rows: Vec::new() } + pub(crate) fn new(id: TableId, summary: Option) -> Self { + Self { id, summary, rows: Vec::new() } } fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { @@ -80,21 +81,21 @@ impl TableCtx { } self.rows[y][x] = GridCell::Cell(TableCtxCell { - x: x as u32, - y: y as u32, - rowspan, - colspan, + x: x.saturating_as(), + y: y.saturating_as(), + rowspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX), + colspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX), kind, headers: TagIdRefs::NONE, nodes, }); } - pub(crate) fn build_table(mut self, mut nodes: Vec) -> Vec { + pub(crate) fn build_table(mut self, mut nodes: Vec) -> TagNode { // Table layouting ensures that there are no overlapping cells, and that // any gaps left by the user are filled with empty cells. if self.rows.is_empty() { - return nodes; + return TagNode::Group(TagKind::Table(self.summary).into(), nodes); } let height = self.rows.len(); let width = self.rows[0].len(); @@ -159,10 +160,7 @@ impl TableCtx { .into_iter() .filter_map(|cell| { let cell = cell.into_cell()?; - let span = TableCellSpan { - rows: cell.rowspan.try_into().unwrap_or(NonZeroU32::MAX), - cols: cell.colspan.try_into().unwrap_or(NonZeroU32::MAX), - }; + let span = TableCellSpan { rows: cell.rowspan, cols: cell.colspan }; let tag = match cell.unwrap_kind() { TableCellKind::Header(_, scope) => { let id = table_cell_id(self.id, cell.x, cell.y); @@ -217,7 +215,7 @@ impl TableCtx { nodes.push(TagNode::Group(tag.into(), row_chunk)); } - nodes + TagNode::Group(TagKind::Table(self.summary).into(), nodes) } fn resolve_cell_headers( @@ -292,8 +290,8 @@ impl GridCell { struct TableCtxCell { x: u32, y: u32, - rowspan: NonZeroUsize, - colspan: NonZeroUsize, + rowspan: NonZeroU32, + colspan: NonZeroU32, kind: Smart, headers: TagIdRefs, nodes: Vec, @@ -327,3 +325,172 @@ fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderSc TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, } } + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + use typst_library::foundations::Content; + + use super::*; + + #[track_caller] + fn test(table: TableCtx, exp_tag: TagNode) { + let tag = table.build_table(Vec::new()); + assert_eq!(tag, exp_tag); + } + + #[track_caller] + fn table(cells: [TableCell; SIZE]) -> TableCtx { + let mut table = TableCtx::new(TableId(324), Some("summary".into())); + for cell in cells { + table.insert(Packed::new(cell), Vec::new()); + } + + table + } + + #[track_caller] + fn header_cell(x: usize, y: usize, level: u32, scope: TableHeaderScope) -> TableCell { + TableCell::new(Content::default()) + .with_x(Smart::Custom(x)) + .with_y(Smart::Custom(y)) + .with_kind(Smart::Custom(TableCellKind::Header( + NonZeroU32::new(level).unwrap(), + scope, + ))) + } + + fn cell(x: usize, y: usize) -> TableCell { + TableCell::new(Content::default()) + .with_x(Smart::Custom(x)) + .with_y(Smart::Custom(y)) + .with_kind(Smart::Custom(TableCellKind::Data)) + } + + fn table_tag(nodes: [TagNode; SIZE]) -> TagNode { + let tag = TagKind::Table(Some("summary".into())); + TagNode::Group(tag.into(), nodes.into()) + } + + fn header(nodes: [TagNode; SIZE]) -> TagNode { + TagNode::Group(TagKind::THead.into(), nodes.into()) + } + + fn body(nodes: [TagNode; SIZE]) -> TagNode { + TagNode::Group(TagKind::TBody.into(), nodes.into()) + } + + fn row(nodes: [TagNode; SIZE]) -> TagNode { + TagNode::Group(TagKind::TR.into(), nodes.into()) + } + + fn header_cell_tag( + x: u32, + y: u32, + scope: TableHeaderScope, + headers: [(u32, u32); SIZE], + ) -> TagNode { + let scope = table_header_scope(scope); + let id = table_cell_id(TableId(324), x, y); + let ids = headers + .map(|(x, y)| table_cell_id(TableId(324), x, y)) + .into_iter() + .collect(); + TagNode::Group( + TagKind::TH(TableHeaderCell::new(scope).with_headers(TagIdRefs { ids })) + .with_id(Some(id)), + Vec::new(), + ) + } + + fn cell_tag(headers: [(u32, u32); SIZE]) -> TagNode { + let ids = headers + .map(|(x, y)| table_cell_id(TableId(324), x, y)) + .into_iter() + .collect(); + TagNode::Group( + TagKind::TD(TableDataCell::new().with_headers(TagIdRefs { ids })).into(), + Vec::new(), + ) + } + + #[test] + fn simple_table() { + #[rustfmt::skip] + let table = table([ + header_cell(0, 0, 1, TableHeaderScope::Column), + header_cell(1, 0, 1, TableHeaderScope::Column), + header_cell(2, 0, 1, TableHeaderScope::Column), + + cell(0, 1), + cell(1, 1), + cell(2, 1), + + cell(0, 2), + cell(1, 2), + cell(2, 2), + ]); + + #[rustfmt::skip] + let tag = table_tag([ + header([row([ + header_cell_tag(0, 0, TableHeaderScope::Column, []), + header_cell_tag(1, 0, TableHeaderScope::Column, []), + header_cell_tag(2, 0, TableHeaderScope::Column, []), + ])]), + body([ + row([ + cell_tag([(0, 0)]), + cell_tag([(1, 0)]), + cell_tag([(2, 0)]), + ]), + row([ + cell_tag([(0, 0)]), + cell_tag([(1, 0)]), + cell_tag([(2, 0)]), + ]), + ]), + ]); + + test(table, tag); + } + + #[test] + fn header_row_and_column() { + #[rustfmt::skip] + let table = table([ + header_cell(0, 0, 1, TableHeaderScope::Column), + header_cell(1, 0, 1, TableHeaderScope::Column), + header_cell(2, 0, 1, TableHeaderScope::Column), + + header_cell(0, 1, 1, TableHeaderScope::Row), + cell(1, 1), + cell(2, 1), + + header_cell(0, 2, 1, TableHeaderScope::Row), + cell(1, 2), + cell(2, 2), + ]); + + #[rustfmt::skip] + let tag = table_tag([ + row([ + header_cell_tag(0, 0, TableHeaderScope::Column, []), + header_cell_tag(1, 0, TableHeaderScope::Column, []), + header_cell_tag(2, 0, TableHeaderScope::Column, []), + ]), + row([ + header_cell_tag(0, 1, TableHeaderScope::Row, [(0, 0)]), + cell_tag([(1, 0), (0, 1)]), + cell_tag([(2, 0), (0, 1)]), + ]), + row([ + header_cell_tag(0, 2, TableHeaderScope::Row, [(0, 0)]), + cell_tag([(1, 0), (0, 2)]), + cell_tag([(2, 0), (0, 2)]), + ]), + ]); + + test(table, tag); + } +} From 7d5b9a716f48df3f7e9ca35839defcc2ee131b33 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 7 Jul 2025 12:30:56 +0200 Subject: [PATCH 41/76] feat: wrap table cell content in a paragraph --- crates/typst-pdf/src/tags/table.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 21387ad5d..35c8e1b01 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -180,7 +180,10 @@ impl TableCtx { .into(), }; - Some(TagNode::Group(tag, cell.nodes)) + // Wrap content in a paragraph. + // TODO: maybe avoid nested paragraphs? + let par = TagNode::Group(TagKind::P.into(), cell.nodes); + Some(TagNode::Group(tag, vec![par])) }) .collect(); @@ -399,7 +402,7 @@ mod tests { TagNode::Group( TagKind::TH(TableHeaderCell::new(scope).with_headers(TagIdRefs { ids })) .with_id(Some(id)), - Vec::new(), + vec![TagNode::Group(TagKind::P.into(), Vec::new())], ) } @@ -410,7 +413,7 @@ mod tests { .collect(); TagNode::Group( TagKind::TD(TableDataCell::new().with_headers(TagIdRefs { ids })).into(), - Vec::new(), + vec![TagNode::Group(TagKind::P.into(), Vec::new())], ) } From 2445bb436117e8602b95a5c9706e5917a93c1cbd Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 8 Jul 2025 11:28:35 +0200 Subject: [PATCH 42/76] fix: table header hierarchy resolution --- crates/typst-pdf/src/tags/table.rs | 217 +++++++++++++++++++++-------- 1 file changed, 156 insertions(+), 61 deletions(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 35c8e1b01..d75808149 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -133,7 +133,7 @@ impl TableCtx { // Explicitly set the headers attribute for cells. for x in 0..width { - let mut column_header = None; + let mut column_header = Vec::new(); for y in 0..height { self.resolve_cell_headers( (x, y), @@ -143,7 +143,7 @@ impl TableCtx { } } for y in 0..height { - let mut row_header = None; + let mut row_header = Vec::new(); for x in 0..width { self.resolve_cell_headers( (x, y), @@ -224,7 +224,7 @@ impl TableCtx { fn resolve_cell_headers( &mut self, (x, y): (usize, usize), - current_header: &mut Option<(NonZeroU32, TagId)>, + current_header: &mut Vec<(NonZeroU32, TagId)>, refers_to_dir: F, ) where F: Fn(&TableHeaderScope) -> bool, @@ -232,26 +232,24 @@ impl TableCtx { let table_id = self.id; let Some(cell) = self.get_mut(x, y) else { return }; - if let Some((prev_level, cell_id)) = current_header.clone() { - // The `Headers` attribute is also set for parent headers. - let mut is_parent_header = true; - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - is_parent_header = prev_level < level; - } - } + let mut new_header = None; + if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { + if refers_to_dir(&scope) { + // Remove all headers that are the same or a lower level. + while current_header.pop_if(|(l, _)| *l >= level).is_some() {} - if is_parent_header && !cell.headers.ids.contains(&cell_id) { + let tag_id = table_cell_id(table_id, cell.x, cell.y); + new_header = Some((level, tag_id)); + } + } + + if let Some((_, cell_id)) = current_header.last() { + if !cell.headers.ids.contains(&cell_id) { cell.headers.ids.push(cell_id.clone()); } } - if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { - if refers_to_dir(&scope) { - let tag_id = table_cell_id(table_id, cell.x, cell.y); - *current_header = Some((level, tag_id)); - } - } + current_header.extend(new_header); } } @@ -339,7 +337,7 @@ mod tests { #[track_caller] fn test(table: TableCtx, exp_tag: TagNode) { let tag = table.build_table(Vec::new()); - assert_eq!(tag, exp_tag); + assert_eq!(exp_tag, tag); } #[track_caller] @@ -348,12 +346,15 @@ mod tests { for cell in cells { table.insert(Packed::new(cell), Vec::new()); } - table } #[track_caller] - fn header_cell(x: usize, y: usize, level: u32, scope: TableHeaderScope) -> TableCell { + fn header_cell( + (x, y): (usize, usize), + level: u32, + scope: TableHeaderScope, + ) -> TableCell { TableCell::new(Content::default()) .with_x(Smart::Custom(x)) .with_y(Smart::Custom(y)) @@ -363,6 +364,14 @@ mod tests { ))) } + #[track_caller] + fn footer_cell(x: usize, y: usize) -> TableCell { + TableCell::new(Content::default()) + .with_x(Smart::Custom(x)) + .with_y(Smart::Custom(y)) + .with_kind(Smart::Custom(TableCellKind::Footer)) + } + fn cell(x: usize, y: usize) -> TableCell { TableCell::new(Content::default()) .with_x(Smart::Custom(x)) @@ -370,26 +379,36 @@ mod tests { .with_kind(Smart::Custom(TableCellKind::Data)) } + fn empty_cell(x: usize, y: usize) -> TableCell { + TableCell::new(Content::default()) + .with_x(Smart::Custom(x)) + .with_y(Smart::Custom(y)) + .with_kind(Smart::Auto) + } + fn table_tag(nodes: [TagNode; SIZE]) -> TagNode { let tag = TagKind::Table(Some("summary".into())); TagNode::Group(tag.into(), nodes.into()) } - fn header(nodes: [TagNode; SIZE]) -> TagNode { + fn thead(nodes: [TagNode; SIZE]) -> TagNode { TagNode::Group(TagKind::THead.into(), nodes.into()) } - fn body(nodes: [TagNode; SIZE]) -> TagNode { + fn tbody(nodes: [TagNode; SIZE]) -> TagNode { TagNode::Group(TagKind::TBody.into(), nodes.into()) } - fn row(nodes: [TagNode; SIZE]) -> TagNode { + fn tfoot(nodes: [TagNode; SIZE]) -> TagNode { + TagNode::Group(TagKind::TFoot.into(), nodes.into()) + } + + fn trow(nodes: [TagNode; SIZE]) -> TagNode { TagNode::Group(TagKind::TR.into(), nodes.into()) } - fn header_cell_tag( - x: u32, - y: u32, + fn th( + (x, y): (u32, u32), scope: TableHeaderScope, headers: [(u32, u32); SIZE], ) -> TagNode { @@ -406,7 +425,7 @@ mod tests { ) } - fn cell_tag(headers: [(u32, u32); SIZE]) -> TagNode { + fn td(headers: [(u32, u32); SIZE]) -> TagNode { let ids = headers .map(|(x, y)| table_cell_id(TableId(324), x, y)) .into_iter() @@ -421,9 +440,9 @@ mod tests { fn simple_table() { #[rustfmt::skip] let table = table([ - header_cell(0, 0, 1, TableHeaderScope::Column), - header_cell(1, 0, 1, TableHeaderScope::Column), - header_cell(2, 0, 1, TableHeaderScope::Column), + header_cell((0, 0), 1, TableHeaderScope::Column), + header_cell((1, 0), 1, TableHeaderScope::Column), + header_cell((2, 0), 1, TableHeaderScope::Column), cell(0, 1), cell(1, 1), @@ -436,21 +455,21 @@ mod tests { #[rustfmt::skip] let tag = table_tag([ - header([row([ - header_cell_tag(0, 0, TableHeaderScope::Column, []), - header_cell_tag(1, 0, TableHeaderScope::Column, []), - header_cell_tag(2, 0, TableHeaderScope::Column, []), + thead([trow([ + th((0, 0), TableHeaderScope::Column, []), + th((1, 0), TableHeaderScope::Column, []), + th((2, 0), TableHeaderScope::Column, []), ])]), - body([ - row([ - cell_tag([(0, 0)]), - cell_tag([(1, 0)]), - cell_tag([(2, 0)]), + tbody([ + trow([ + td([(0, 0)]), + td([(1, 0)]), + td([(2, 0)]), ]), - row([ - cell_tag([(0, 0)]), - cell_tag([(1, 0)]), - cell_tag([(2, 0)]), + trow([ + td([(0, 0)]), + td([(1, 0)]), + td([(2, 0)]), ]), ]), ]); @@ -462,35 +481,111 @@ mod tests { fn header_row_and_column() { #[rustfmt::skip] let table = table([ - header_cell(0, 0, 1, TableHeaderScope::Column), - header_cell(1, 0, 1, TableHeaderScope::Column), - header_cell(2, 0, 1, TableHeaderScope::Column), + header_cell((0, 0), 1, TableHeaderScope::Column), + header_cell((1, 0), 1, TableHeaderScope::Column), + header_cell((2, 0), 1, TableHeaderScope::Column), - header_cell(0, 1, 1, TableHeaderScope::Row), + header_cell((0, 1), 1, TableHeaderScope::Row), cell(1, 1), cell(2, 1), - header_cell(0, 2, 1, TableHeaderScope::Row), + header_cell((0, 2), 1, TableHeaderScope::Row), cell(1, 2), cell(2, 2), ]); #[rustfmt::skip] let tag = table_tag([ - row([ - header_cell_tag(0, 0, TableHeaderScope::Column, []), - header_cell_tag(1, 0, TableHeaderScope::Column, []), - header_cell_tag(2, 0, TableHeaderScope::Column, []), + trow([ + th((0, 0), TableHeaderScope::Column, []), + th((1, 0), TableHeaderScope::Column, []), + th((2, 0), TableHeaderScope::Column, []), ]), - row([ - header_cell_tag(0, 1, TableHeaderScope::Row, [(0, 0)]), - cell_tag([(1, 0), (0, 1)]), - cell_tag([(2, 0), (0, 1)]), + trow([ + th((0, 1), TableHeaderScope::Row, [(0, 0)]), + td([(1, 0), (0, 1)]), + td([(2, 0), (0, 1)]), ]), - row([ - header_cell_tag(0, 2, TableHeaderScope::Row, [(0, 0)]), - cell_tag([(1, 0), (0, 2)]), - cell_tag([(2, 0), (0, 2)]), + trow([ + th((0, 2), TableHeaderScope::Row, [(0, 0)]), + td([(1, 0), (0, 2)]), + td([(2, 0), (0, 2)]), + ]), + ]); + + test(table, tag); + } + + #[test] + fn complex_tables() { + #[rustfmt::skip] + let table = table([ + header_cell((0, 0), 1, TableHeaderScope::Column), + header_cell((1, 0), 1, TableHeaderScope::Column), + header_cell((2, 0), 1, TableHeaderScope::Column), + + header_cell((0, 1), 2, TableHeaderScope::Column), + header_cell((1, 1), 2, TableHeaderScope::Column), + header_cell((2, 1), 2, TableHeaderScope::Column), + + cell(0, 2), + empty_cell(1, 2), // the type of empty cells is inferred from the row + cell(2, 2), + + header_cell((0, 3), 2, TableHeaderScope::Column), + header_cell((1, 3), 2, TableHeaderScope::Column), + empty_cell(2, 3), // the type of empty cells is inferred from the row + + cell(0, 4), + cell(1, 4), + empty_cell(2, 4), + + empty_cell(0, 5), // the type of empty cells is inferred from the row + footer_cell(1, 5), + footer_cell(2, 5), + ]); + + #[rustfmt::skip] + let tag = table_tag([ + thead([ + trow([ + th((0, 0), TableHeaderScope::Column, []), + th((1, 0), TableHeaderScope::Column, []), + th((2, 0), TableHeaderScope::Column, []), + ]), + trow([ + th((0, 1), TableHeaderScope::Column, [(0, 0)]), + th((1, 1), TableHeaderScope::Column, [(1, 0)]), + th((2, 1), TableHeaderScope::Column, [(2, 0)]), + ]), + ]), + tbody([ + trow([ + td([(0, 1)]), + td([(1, 1)]), + td([(2, 1)]), + ]), + ]), + thead([ + trow([ + th((0, 3), TableHeaderScope::Column, [(0, 0)]), + th((1, 3), TableHeaderScope::Column, [(1, 0)]), + th((2, 3), TableHeaderScope::Column, [(2, 0)]), + ]), + ]), + tbody([ + trow([ + td([(0, 3)]), + td([(1, 3)]), + td([(2, 3)]), + ]), + ]), + tfoot([ + trow([ + td([(0, 3)]), + td([(1, 3)]), + td([(2, 3)]), + ]), ]), ]); From 070a0faf5c69b6a03b4b043b94bb4984367c1428 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 8 Jul 2025 14:14:21 +0200 Subject: [PATCH 43/76] fixup! test: table header id generation --- crates/typst-pdf/src/tags/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index d75808149..177de9624 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -84,7 +84,7 @@ impl TableCtx { x: x.saturating_as(), y: y.saturating_as(), rowspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX), - colspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX), + colspan: colspan.try_into().unwrap_or(NonZeroU32::MAX), kind, headers: TagIdRefs::NONE, nodes, From edd213074f476b283374c4e6bcb39ede5cf17e39 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 8 Jul 2025 14:14:37 +0200 Subject: [PATCH 44/76] refactor: remove general api to set cell kind and add pdf.(header|data)-cell --- .../typst-library/src/layout/grid/resolve.rs | 4 +- crates/typst-library/src/model/table.rs | 71 ++---------------- crates/typst-library/src/pdf/accessibility.rs | 74 ++++++++++++++++++- crates/typst-library/src/pdf/mod.rs | 2 + crates/typst-pdf/src/tags/mod.rs | 2 +- crates/typst-pdf/src/tags/table.rs | 9 ++- 6 files changed, 86 insertions(+), 76 deletions(-) diff --git a/crates/typst-library/src/layout/grid/resolve.rs b/crates/typst-library/src/layout/grid/resolve.rs index 49f9e0edd..0724280b0 100644 --- a/crates/typst-library/src/layout/grid/resolve.rs +++ b/crates/typst-library/src/layout/grid/resolve.rs @@ -22,7 +22,7 @@ use typst_syntax::Span; use typst_utils::NonZeroExt; use crate::introspection::SplitLocator; -use crate::model::{TableCellKind, TableHeaderScope}; +use crate::pdf::{TableCellKind, TableHeaderScope}; /// Convert a grid to a cell grid. #[typst_macros::time(span = elem.span())] @@ -226,7 +226,7 @@ impl ResolvableCell for Packed { let breakable = cell.breakable(styles).unwrap_or(breakable); let fill = cell.fill(styles).unwrap_or_else(|| fill.clone()); - let kind = cell.kind(styles).or(kind); + let kind = cell.kind().copied().unwrap_or_default().or(kind); let cell_stroke = cell.stroke(styles); let stroke_overridden = diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index f8fe76918..7aacf07fa 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -2,14 +2,13 @@ use std::num::{NonZeroU32, NonZeroUsize}; use std::sync::Arc; use ecow::EcoString; -use typst_macros::Cast; use typst_utils::NonZeroExt; use crate::diag::{bail, HintedStrResult, HintedString, SourceResult}; use crate::engine::Engine; use crate::foundations::{ - cast, dict, elem, scope, Content, Dict, NativeElement, Packed, Show, Smart, - StyleChain, TargetElem, + cast, elem, scope, Content, NativeElement, Packed, Show, Smart, StyleChain, + TargetElem, }; use crate::html::{attr, tag, HtmlAttrs, HtmlElem, HtmlTag}; use crate::introspection::{Locatable, Locator}; @@ -20,6 +19,7 @@ use crate::layout::{ TrackSizings, }; use crate::model::Figurable; +use crate::pdf::TableCellKind; use crate::text::LocalName; use crate::visualize::{Paint, Stroke}; @@ -811,7 +811,8 @@ pub struct TableCell { #[fold] pub stroke: Sides>>>, - // TODO: feature gate + #[internal] + #[synthesized] pub kind: Smart, /// Whether rows spanned by this cell can be placed in different pages. @@ -851,65 +852,3 @@ impl From for TableCell { value.unpack::().unwrap_or_else(Self::new) } } - -#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] -pub enum TableCellKind { - Header(NonZeroU32, TableHeaderScope), - Footer, - #[default] - Data, -} - -cast! { - TableCellKind, - self => match self { - Self::Header(level, scope) => dict! { "level" => level, "scope" => scope }.into_value(), - Self::Footer => "footer".into_value(), - Self::Data => "data".into_value(), - }, - "header" => Self::Header(NonZeroU32::ONE, TableHeaderScope::default()), - "footer" => Self::Footer, - "data" => Self::Data, - mut dict: Dict => { - // TODO: have a `pdf.header` function instead? - #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] - enum HeaderKind { - Header, - } - dict.take("kind")?.cast::()?; - let level = dict.take("level").ok().map(|v| v.cast()).transpose()?; - let scope = dict.take("scope").ok().map(|v| v.cast()).transpose()?; - dict.finish(&["kind", "level", "scope"])?; - Self::Header(level.unwrap_or(NonZeroU32::ONE), scope.unwrap_or_default()) - }, -} - -/// The scope of a table header cell. -#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Cast)] -pub enum TableHeaderScope { - /// The header cell refers to both the row and the column. - Both, - /// The header cell refers to the column. - #[default] - Column, - /// The header cell refers to the row. - Row, -} - -impl TableHeaderScope { - pub fn refers_to_column(&self) -> bool { - match self { - TableHeaderScope::Both => true, - TableHeaderScope::Column => true, - TableHeaderScope::Row => false, - } - } - - pub fn refers_to_row(&self) -> bool { - match self { - TableHeaderScope::Both => true, - TableHeaderScope::Column => false, - TableHeaderScope::Row => true, - } - } -} diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 7ec52f8cb..9399c1c60 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -1,11 +1,14 @@ +use std::num::NonZeroU32; + use ecow::EcoString; -use typst_macros::{cast, elem, Cast}; +use typst_macros::{cast, elem, func, Cast}; +use typst_utils::NonZeroExt; use crate::diag::SourceResult; use crate::engine::Engine; -use crate::foundations::{Content, Packed, Show, StyleChain}; +use crate::foundations::{Content, NativeElement, Packed, Show, Smart, StyleChain}; use crate::introspection::Locatable; -use crate::model::TableHeaderScope; +use crate::model::TableCell; // TODO: docs #[elem(Locatable, Show)] @@ -210,3 +213,68 @@ impl Show for Packed { Ok(self.body.clone()) } } + +// TODO: feature gate +/// Explicity define this cell as a header cell. +#[func] +pub fn header_cell( + #[named] + #[default(NonZeroU32::ONE)] + level: NonZeroU32, + #[named] + #[default] + scope: TableHeaderScope, + /// The table cell. + cell: TableCell, +) -> Content { + cell.with_kind(Smart::Custom(TableCellKind::Header(level, scope))) + .pack() +} + +// TODO: feature gate +/// Explicity define this cell as a data cell. +#[func] +pub fn data_cell( + /// The table cell. + cell: TableCell, +) -> Content { + cell.with_kind(Smart::Custom(TableCellKind::Data)).pack() +} + +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +pub enum TableCellKind { + Header(NonZeroU32, TableHeaderScope), + Footer, + #[default] + Data, +} + +/// The scope of a table header cell. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum TableHeaderScope { + /// The header cell refers to both the row and the column. + Both, + /// The header cell refers to the column. + #[default] + Column, + /// The header cell refers to the row. + Row, +} + +impl TableHeaderScope { + pub fn refers_to_column(&self) -> bool { + match self { + TableHeaderScope::Both => true, + TableHeaderScope::Column => true, + TableHeaderScope::Row => false, + } + } + + pub fn refers_to_row(&self) -> bool { + match self { + TableHeaderScope::Both => true, + TableHeaderScope::Column => false, + TableHeaderScope::Row => true, + } + } +} diff --git a/crates/typst-library/src/pdf/mod.rs b/crates/typst-library/src/pdf/mod.rs index 952e7fe32..8a0d40b9c 100644 --- a/crates/typst-library/src/pdf/mod.rs +++ b/crates/typst-library/src/pdf/mod.rs @@ -15,5 +15,7 @@ pub fn module() -> Module { pdf.define_elem::(); pdf.define_elem::(); pdf.define_elem::(); + pdf.define_func::(); + pdf.define_func::(); Module::new("pdf", pdf) } diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 5aa4d3904..fd9e9394a 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -181,7 +181,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { return; }; - table_ctx.insert(cell, entry.nodes); + table_ctx.insert(&cell, entry.nodes); return; } StackEntryKind::Link(_, link) => { diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 177de9624..fc3ab8a5d 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -7,7 +7,8 @@ use krilla::tagging::{ }; use smallvec::SmallVec; use typst_library::foundations::{Packed, Smart, StyleChain}; -use typst_library::model::{TableCell, TableCellKind, TableHeaderScope}; +use typst_library::model::TableCell; +use typst_library::pdf::{TableCellKind, TableHeaderScope}; use crate::tags::{TableId, TagNode}; @@ -54,12 +55,12 @@ impl TableCtx { } } - pub(crate) fn insert(&mut self, cell: Packed, nodes: Vec) { + pub(crate) fn insert(&mut self, cell: &TableCell, nodes: Vec) { let x = cell.x(StyleChain::default()).unwrap_or_else(|| unreachable!()); let y = cell.y(StyleChain::default()).unwrap_or_else(|| unreachable!()); let rowspan = cell.rowspan(StyleChain::default()); let colspan = cell.colspan(StyleChain::default()); - let kind = cell.kind(StyleChain::default()); + let kind = cell.kind().copied().expect("kind to be set after layouting"); // Extend the table grid to fit this cell. let required_height = y + rowspan.get(); @@ -344,7 +345,7 @@ mod tests { fn table(cells: [TableCell; SIZE]) -> TableCtx { let mut table = TableCtx::new(TableId(324), Some("summary".into())); for cell in cells { - table.insert(Packed::new(cell), Vec::new()); + table.insert(&cell, Vec::new()); } table } From 8998676acb70d46f0a138b89bdff1f7781c662ab Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 9 Jul 2025 01:08:34 +0200 Subject: [PATCH 45/76] feat: group artifacts span one artifact tag across all content inside an artifact --- crates/typst-pdf/src/convert.rs | 8 +++-- crates/typst-pdf/src/tags/mod.rs | 53 +++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 06f076f9a..09f3c2c5f 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -107,6 +107,8 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul let mut surface = page.surface(); let mut fc = FrameContext::new(typst_page.frame.size()); + tags::page_start(gc, &mut surface); + handle_frame( &mut fc, &typst_page.frame, @@ -115,6 +117,8 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul gc, )?; + tags::page_end(gc, &mut surface); + surface.finish(); tags::add_annotations(gc, &mut page, fc.link_annotations); @@ -293,8 +297,8 @@ pub(crate) fn handle_frame( handle_image(gc, fc, image, *size, surface, *span)? } FrameItem::Link(dest, size) => handle_link(fc, gc, dest, *size), - FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, elem)?, - FrameItem::Tag(Tag::End(loc, _)) => tags::handle_end(gc, *loc), + FrameItem::Tag(Tag::Start(elem)) => tags::handle_start(gc, surface, elem)?, + FrameItem::Tag(Tag::End(loc, _)) => tags::handle_end(gc, surface, *loc), } fc.pop(); diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 59768ee07..1e82ee8b1 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -31,7 +31,11 @@ use crate::tags::table::TableCtx; mod outline; mod table; -pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResult<()> { +pub(crate) fn handle_start( + gc: &mut GlobalContext, + surface: &mut Surface, + elem: &Content, +) -> SourceResult<()> { if gc.tags.in_artifact.is_some() { // Don't nest artifacts return Ok(()); @@ -41,10 +45,10 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResu if let Some(artifact) = elem.to_packed::() { let kind = artifact.kind.get(StyleChain::default()); - start_artifact(gc, loc, kind); + start_artifact(gc, surface, loc, kind); return Ok(()); } else if let Some(_) = elem.to_packed::() { - start_artifact(gc, loc, ArtifactKind::Other); + start_artifact(gc, surface, loc, ArtifactKind::Other); return Ok(()); } @@ -103,7 +107,7 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResu // first page. Maybe it should be the cell on the last page, but that // would require more changes in the layouting code, or a pre-pass // on the frames to figure out if there are other footers following. - start_artifact(gc, loc, ArtifactKind::Other); + start_artifact(gc, surface, loc, ArtifactKind::Other); } else { push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()))?; } @@ -141,9 +145,10 @@ fn push_stack( Ok(()) } -pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { +pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Location) { if let Some((l, _)) = gc.tags.in_artifact { if l == loc { + surface.end_tagged(); gc.tags.in_artifact = None; } return; @@ -202,6 +207,20 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) { gc.tags.push(node); } +pub(crate) fn page_start(gc: &mut GlobalContext, surface: &mut Surface) { + if let Some((_, kind)) = gc.tags.in_artifact { + let ty = artifact_type(kind); + let id = surface.start_tagged(ContentTag::Artifact(ty)); + gc.tags.push(TagNode::Leaf(id)); + } +} + +pub(crate) fn page_end(gc: &mut GlobalContext, surface: &mut Surface) { + if gc.tags.in_artifact.is_some() { + surface.end_tagged(); + } +} + /// Add all annotations that were found in the page frame. pub(crate) fn add_annotations( gc: &mut GlobalContext, @@ -397,11 +416,16 @@ pub(crate) struct Placeholder(usize); /// Automatically calls [`Surface::end_tagged`] when dropped. pub(crate) struct TagHandle<'a, 'b> { surface: &'b mut Surface<'a>, + /// Whether this tag handle started the marked content sequence, and should + /// thus end it when it is dropped. + started: bool, } impl Drop for TagHandle<'_, '_> { fn drop(&mut self) { - self.surface.end_tagged(); + if self.started { + self.surface.end_tagged(); + } } } @@ -435,9 +459,8 @@ fn start_content<'a, 'b>( surface: &'b mut Surface<'a>, content: ContentTag, ) -> TagHandle<'a, 'b> { - let content = if let Some((_, kind)) = gc.tags.in_artifact { - let ty = artifact_type(kind); - ContentTag::Artifact(ty) + let content = if gc.tags.in_artifact.is_some() { + return TagHandle { surface, started: false }; } else if let Some(StackEntryKind::Table(_)) = gc.tags.stack.last().map(|e| &e.kind) { // Mark any direct child of a table as an aritfact. Any real content // will be wrapped inside a `TableCell`. @@ -447,10 +470,18 @@ fn start_content<'a, 'b>( }; let id = surface.start_tagged(content); gc.tags.push(TagNode::Leaf(id)); - TagHandle { surface } + TagHandle { surface, started: true } } -fn start_artifact(gc: &mut GlobalContext, loc: Location, kind: ArtifactKind) { +fn start_artifact( + gc: &mut GlobalContext, + surface: &mut Surface, + loc: Location, + kind: ArtifactKind, +) { + let ty = artifact_type(kind); + let id = surface.start_tagged(ContentTag::Artifact(ty)); + gc.tags.push(TagNode::Leaf(id)); gc.tags.in_artifact = Some((loc, kind)); } From 08719237c2877c5a3337b888bbc84c42fddcbdb0 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 9 Jul 2025 01:19:57 +0200 Subject: [PATCH 46/76] feat!: for now don't generate paragraphs --- crates/typst-pdf/src/tags/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 1e82ee8b1..20b31e1e4 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -18,7 +18,7 @@ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, - OutlineEntry, ParElem, TableCell, TableElem, + OutlineEntry, TableCell, TableElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; use typst_library::visualize::ImageElem; @@ -116,8 +116,6 @@ pub(crate) fn handle_start( let link_id = gc.tags.next_link_id(); push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; return Ok(()); - } else if let Some(_) = elem.to_packed::() { - TagKind::P.into() } else { return Ok(()); }; From df10cb8570defebda79f000c9ea4bd16eff81102 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 9 Jul 2025 02:09:28 +0200 Subject: [PATCH 47/76] feat: default to the url if no alt text is specified for a link --- crates/typst-library/src/model/link.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index 4d121e3c7..6203436e6 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -128,7 +128,10 @@ impl Show for Packed { } else { let alt = self.alt.get_cloned(styles); match &self.dest { - LinkTarget::Dest(dest) => body.linked(dest.clone(), alt), + LinkTarget::Dest(dest) => { + let url = || dest.as_url().map(|url| url.clone().into_inner()); + body.linked(dest.clone(), alt.or_else(url)) + } LinkTarget::Label(label) => { let elem = engine.introspector.query_label(*label).at(self.span())?; let dest = Destination::Location(elem.location().unwrap()); @@ -190,7 +193,15 @@ pub enum Destination { Location(Location), } -impl Destination {} +impl Destination { + pub fn as_url(&self) -> Option<&Url> { + if let Self::Url(v) = self { + Some(v) + } else { + None + } + } +} impl Repr for Destination { fn repr(&self) -> EcoString { From 0a0830ff93d7c92c3fe5bf882dbe0dd70e43b1aa Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 10 Jul 2025 14:35:41 +0200 Subject: [PATCH 48/76] fix: update krilla --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6ab36ee3f..c68d357ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1373,7 +1373,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#f4149ade70a23c38dcbe2db22a604a4714456b2a" dependencies = [ "base64", "bumpalo", @@ -1402,7 +1402,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#f4149ade70a23c38dcbe2db22a604a4714456b2a" dependencies = [ "flate2", "fontdb", From e0074d6e390248eb5933de29cb50f5f64f56f91a Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 11 Jul 2025 14:45:45 +0200 Subject: [PATCH 49/76] refactor: make TableCell::kind #[parse] instead of #[synthesized] --- crates/typst-library/src/layout/grid/resolve.rs | 4 ++-- crates/typst-library/src/model/table.rs | 2 +- crates/typst-pdf/src/tags/table.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/typst-library/src/layout/grid/resolve.rs b/crates/typst-library/src/layout/grid/resolve.rs index 0fc2cc13d..d25dc949a 100644 --- a/crates/typst-library/src/layout/grid/resolve.rs +++ b/crates/typst-library/src/layout/grid/resolve.rs @@ -226,7 +226,7 @@ impl ResolvableCell for Packed { let breakable = cell.breakable.get(styles).unwrap_or(breakable); let fill = cell.fill.get_cloned(styles).unwrap_or_else(|| fill.clone()); - let kind = cell.kind.unwrap_or_default().or(kind); + let kind = cell.kind.get(styles).or(kind); let cell_stroke = cell.stroke.resolve(styles); let stroke_overridden = @@ -271,7 +271,7 @@ impl ResolvableCell for Packed { }), ); cell.breakable.set(Smart::Custom(breakable)); - cell.kind = Some(kind); + cell.kind.set(kind); Cell { body: self.pack(), locator, diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index 7a638083d..578397b58 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -688,7 +688,7 @@ pub struct TableCell { pub stroke: Sides>>>, #[internal] - #[synthesized] + #[parse(Some(Smart::Auto))] pub kind: Smart, /// Whether rows spanned by this cell can be placed in different pages. diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index b1137820e..0bde00c94 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -60,7 +60,7 @@ impl TableCtx { let y = cell.y.get(StyleChain::default()).unwrap_or_else(|| unreachable!()); let rowspan = cell.rowspan.get(StyleChain::default()); let colspan = cell.colspan.get(StyleChain::default()); - let kind = cell.kind.expect("kind to be set after layouting"); + let kind = cell.kind.get(StyleChain::default()); // Extend the table grid to fit this cell. let required_height = y + rowspan.get(); From e3c0855a2bc5e971716986c3d6148b3cc1858fbc Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sat, 12 Jul 2025 14:15:13 +0200 Subject: [PATCH 50/76] fix: update krilla --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ef8c3d19..072dd595c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1373,7 +1373,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#f4149ade70a23c38dcbe2db22a604a4714456b2a" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#5ae27ecab2f74d7a5e58b962f04c85bc2662602e" dependencies = [ "base64", "bumpalo", @@ -1402,7 +1402,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#f4149ade70a23c38dcbe2db22a604a4714456b2a" +source = "git+https://github.com/saecki/krilla?branch=tag-attributes#5ae27ecab2f74d7a5e58b962f04c85bc2662602e" dependencies = [ "flate2", "fontdb", From a4957248135410d4b66b3ee17b17a4808f7783a8 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sat, 12 Jul 2025 20:02:28 +0200 Subject: [PATCH 51/76] feat: mark all shapes as artifacts --- crates/typst-pdf/src/shape.rs | 3 +- crates/typst-pdf/src/tags/mod.rs | 85 +++++++++++++++++++------------- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/crates/typst-pdf/src/shape.rs b/crates/typst-pdf/src/shape.rs index 3b52939da..8bff87f72 100644 --- a/crates/typst-pdf/src/shape.rs +++ b/crates/typst-pdf/src/shape.rs @@ -1,6 +1,7 @@ use krilla::geom::{Path, PathBuilder, Rect}; use krilla::surface::Surface; use typst_library::diag::SourceResult; +use typst_library::pdf::ArtifactKind; use typst_library::visualize::{Geometry, Shape}; use typst_syntax::Span; @@ -16,7 +17,7 @@ pub(crate) fn handle_shape( gc: &mut GlobalContext, span: Span, ) -> SourceResult<()> { - let mut handle = tags::start_marked(gc, surface); + let mut handle = tags::start_artifact(gc, surface, ArtifactKind::Other); let surface = handle.surface(); surface.set_location(span.into_raw().get()); diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 20b31e1e4..b12d9b601 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -45,10 +45,10 @@ pub(crate) fn handle_start( if let Some(artifact) = elem.to_packed::() { let kind = artifact.kind.get(StyleChain::default()); - start_artifact(gc, surface, loc, kind); + push_artifact(gc, surface, loc, kind); return Ok(()); } else if let Some(_) = elem.to_packed::() { - start_artifact(gc, surface, loc, ArtifactKind::Other); + push_artifact(gc, surface, loc, ArtifactKind::Other); return Ok(()); } @@ -107,7 +107,7 @@ pub(crate) fn handle_start( // first page. Maybe it should be the cell on the last page, but that // would require more changes in the layouting code, or a pre-pass // on the frames to figure out if there are other footers following. - start_artifact(gc, surface, loc, ArtifactKind::Other); + push_artifact(gc, surface, loc, ArtifactKind::Other); } else { push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()))?; } @@ -125,29 +125,10 @@ pub(crate) fn handle_start( Ok(()) } -fn push_stack( - gc: &mut GlobalContext, - loc: Location, - kind: StackEntryKind, -) -> SourceResult<()> { - if !gc.tags.context_supports(&kind) { - if gc.options.standards.config.validator() == Validator::UA1 { - // TODO: error - } else { - // TODO: warning - } - } - - gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); - - Ok(()) -} - pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Location) { if let Some((l, _)) = gc.tags.in_artifact { if l == loc { - surface.end_tagged(); - gc.tags.in_artifact = None; + pop_artifact(gc, surface); } return; } @@ -205,6 +186,41 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc gc.tags.push(node); } +fn push_stack( + gc: &mut GlobalContext, + loc: Location, + kind: StackEntryKind, +) -> SourceResult<()> { + if !gc.tags.context_supports(&kind) { + if gc.options.standards.config.validator() == Validator::UA1 { + // TODO: error + } else { + // TODO: warning + } + } + + gc.tags.stack.push(StackEntry { loc, kind, nodes: Vec::new() }); + + Ok(()) +} + +fn push_artifact( + gc: &mut GlobalContext, + surface: &mut Surface, + loc: Location, + kind: ArtifactKind, +) { + let ty = artifact_type(kind); + let id = surface.start_tagged(ContentTag::Artifact(ty)); + gc.tags.push(TagNode::Leaf(id)); + gc.tags.in_artifact = Some((loc, kind)); +} + +fn pop_artifact(gc: &mut GlobalContext, surface: &mut Surface) { + surface.end_tagged(); + gc.tags.in_artifact = None; +} + pub(crate) fn page_start(gc: &mut GlobalContext, surface: &mut Surface) { if let Some((_, kind)) = gc.tags.in_artifact { let ty = artifact_type(kind); @@ -452,6 +468,17 @@ pub(crate) fn start_span<'a, 'b>( start_content(gc, surface, ContentTag::Span(span)) } +/// Returns a [`TagHandle`] that automatically calls [`Surface::end_tagged`] +/// when dropped. +pub(crate) fn start_artifact<'a, 'b>( + gc: &mut GlobalContext, + surface: &'b mut Surface<'a>, + kind: ArtifactKind, +) -> TagHandle<'a, 'b> { + let ty = artifact_type(kind); + start_content(gc, surface, ContentTag::Artifact(ty)) +} + fn start_content<'a, 'b>( gc: &mut GlobalContext, surface: &'b mut Surface<'a>, @@ -471,18 +498,6 @@ fn start_content<'a, 'b>( TagHandle { surface, started: true } } -fn start_artifact( - gc: &mut GlobalContext, - surface: &mut Surface, - loc: Location, - kind: ArtifactKind, -) { - let ty = artifact_type(kind); - let id = surface.start_tagged(ContentTag::Artifact(ty)); - gc.tags.push(TagNode::Leaf(id)); - gc.tags.in_artifact = Some((loc, kind)); -} - fn artifact_type(kind: ArtifactKind) -> ArtifactType { match kind { ArtifactKind::Header => ArtifactType::Header, From eb9a3359d5ef84dc9819e081baf850c77e8eb666 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 17:27:02 +0200 Subject: [PATCH 52/76] feat: generate tags for lists and enums --- crates/typst-layout/src/lists.rs | 15 +++- crates/typst-layout/src/rules.rs | 20 +++-- crates/typst-library/src/model/outline.rs | 7 -- crates/typst-library/src/pdf/accessibility.rs | 40 +++++++++ crates/typst-pdf/src/tags/list.rs | 85 +++++++++++++++++++ crates/typst-pdf/src/tags/mod.rs | 72 +++++++++++++--- 6 files changed, 209 insertions(+), 30 deletions(-) create mode 100644 crates/typst-pdf/src/tags/list.rs diff --git a/crates/typst-layout/src/lists.rs b/crates/typst-layout/src/lists.rs index adb793fb9..5fb9337ff 100644 --- a/crates/typst-layout/src/lists.rs +++ b/crates/typst-layout/src/lists.rs @@ -7,6 +7,7 @@ use typst_library::introspection::Locator; use typst_library::layout::grid::resolve::{Cell, CellGrid}; use typst_library::layout::{Axes, Fragment, HAlignment, Regions, Sizing, VAlignment}; use typst_library::model::{EnumElem, ListElem, Numbering, ParElem, ParbreakElem}; +use typst_library::pdf::PdfMarkerTag; use typst_library::text::TextElem; use crate::grid::GridLayouter; @@ -48,12 +49,16 @@ pub fn layout_list( if !tight { body += ParbreakElem::shared(); } + let body = body.set(ListElem::depth, Depth(1)); cells.push(Cell::new(Content::empty(), locator.next(&()))); - cells.push(Cell::new(marker.clone(), locator.next(&marker.span()))); + cells.push(Cell::new( + PdfMarkerTag::ListItemLabel(marker.clone()), + locator.next(&marker.span()), + )); cells.push(Cell::new(Content::empty(), locator.next(&()))); cells.push(Cell::new( - body.set(ListElem::depth, Depth(1)), + PdfMarkerTag::ListItemBody(body), locator.next(&item.body.span()), )); } @@ -142,11 +147,13 @@ pub fn layout_enum( body += ParbreakElem::shared(); } + let body = body.set(EnumElem::parents, smallvec![number]); + cells.push(Cell::new(Content::empty(), locator.next(&()))); - cells.push(Cell::new(resolved, locator.next(&()))); + cells.push(Cell::new(PdfMarkerTag::ListItemLabel(resolved), locator.next(&()))); cells.push(Cell::new(Content::empty(), locator.next(&()))); cells.push(Cell::new( - body.set(EnumElem::parents, smallvec![number]), + PdfMarkerTag::ListItemBody(body), locator.next(&item.body.span()), )); number = diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 97c8c11ea..8d3e6da4f 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -20,11 +20,12 @@ use typst_library::math::EquationElem; use typst_library::model::{ Attribution, BibliographyElem, CiteElem, CiteGroup, CslSource, Destination, EmphElem, EnumElem, FigureCaption, FigureElem, FootnoteElem, FootnoteEntry, HeadingElem, - LinkElem, LinkTarget, ListElem, Outlinable, OutlineBody, OutlineElem, OutlineEntry, - ParElem, ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, - TermsElem, Works, + LinkElem, LinkTarget, ListElem, Outlinable, OutlineElem, OutlineEntry, ParElem, + ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, TermsElem, Works, +}; +use typst_library::pdf::{ + ArtifactElem, EmbedElem, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, }; -use typst_library::pdf::{ArtifactElem, EmbedElem, PdfTagElem}; use typst_library::text::{ DecoLine, Decoration, HighlightElem, ItalicToggle, LinebreakElem, LocalName, OverlineElem, RawElem, RawLine, ScriptKind, ShiftSettings, Smallcaps, SmallcapsElem, @@ -56,7 +57,6 @@ pub fn register(rules: &mut NativeRuleMap) { rules.register(Paged, FOOTNOTE_RULE); rules.register(Paged, FOOTNOTE_ENTRY_RULE); rules.register(Paged, OUTLINE_RULE); - rules.register(Paged, OUTLINE_BODY_RULE); rules.register(Paged, OUTLINE_ENTRY_RULE); rules.register(Paged, REF_RULE); rules.register(Paged, CITE_GROUP_RULE); @@ -108,6 +108,7 @@ pub fn register(rules: &mut NativeRuleMap) { rules.register(Paged, EMBED_RULE); rules.register(Paged, PDF_TAG_RULE); rules.register(Paged, PDF_ARTIFACT_RULE); + rules.register(Paged, PDF_MARKER_TAG_RULE); } const STRONG_RULE: ShowFn = |elem, _, styles| { @@ -466,13 +467,14 @@ const OUTLINE_RULE: ShowFn = |elem, engine, styles| { } // Wrap the entries into a marker for pdf tagging. - seq.push(OutlineBody::new(Content::sequence(entries)).pack()); + seq.push( + PdfMarkerTag::new(PdfMarkerTagKind::OutlineBody, Content::sequence(entries)) + .pack(), + ); Ok(Content::sequence(seq)) }; -const OUTLINE_BODY_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); - const OUTLINE_ENTRY_RULE: ShowFn = |elem, engine, styles| { let span = elem.span(); let context = Context::new(None, Some(styles)); @@ -931,3 +933,5 @@ const EMBED_RULE: ShowFn = |_, _, _| Ok(Content::empty()); const PDF_TAG_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); const PDF_ARTIFACT_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); + +const PDF_MARKER_TAG_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 2dbd33cc2..9421c9a4e 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -266,13 +266,6 @@ impl LocalName for Packed { const KEY: &'static str = "outline"; } -/// Only used to delimit the outline in tagged PDF. -#[elem(Locatable)] -pub struct OutlineBody { - #[required] - pub body: Content, -} - /// Defines how an outline is indented. #[derive(Debug, Clone, PartialEq, Hash)] pub enum OutlineIndent { diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 16a57e537..e4213542b 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -262,3 +262,43 @@ impl TableHeaderScope { } } } + +// Used to delimit content for tagged PDF. +#[elem(Locatable)] +pub struct PdfMarkerTag { + #[required] + pub kind: PdfMarkerTagKind, + #[required] + pub body: Content, +} + +macro_rules! pdf_marker_tag { + ($(#[doc = $doc:expr] $variant:ident,)+) => { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Cast)] + pub enum PdfMarkerTagKind { + $( + #[doc = $doc] + $variant + ),+ + } + + impl PdfMarkerTag { + $( + #[doc = $doc] + #[allow(non_snake_case)] + pub fn $variant(body: Content) -> Content { + Self::new(PdfMarkerTagKind::$variant, body).pack() + } + )+ + } + } +} + +pdf_marker_tag! { + /// `TOC` + OutlineBody, + /// `Lbl` (marker) of the list item + ListItemLabel, + /// `LBody` of the enum item + ListItemBody, +} diff --git a/crates/typst-pdf/src/tags/list.rs b/crates/typst-pdf/src/tags/list.rs new file mode 100644 index 000000000..99f1254cc --- /dev/null +++ b/crates/typst-pdf/src/tags/list.rs @@ -0,0 +1,85 @@ +use krilla::tagging::{ListNumbering, TagKind}; + +use crate::tags::TagNode; + +pub(crate) struct ListCtx { + numbering: ListNumbering, + items: Vec, +} + +struct ListItem { + label: Vec, + body: Option>, + sub_list: Option, +} + +impl ListCtx { + pub(crate) fn new(numbering: ListNumbering) -> Self { + Self { numbering, items: Vec::new() } + } + + pub(crate) fn push_label(&mut self, nodes: Vec) { + self.items.push(ListItem { label: nodes, body: None, sub_list: None }); + } + + pub(crate) fn push_body(&mut self, mut nodes: Vec) { + let item = self.items.last_mut().expect("ListItemLabel"); + + // Nested lists are expected to have the following structure: + // + // Typst code + // ``` + // - a + // - b + // - c + // - d + // - e + // ``` + // + // Structure tree + // ``` + // + //
  • + // `-` + // `a` + //
  • + // `-` + // `b` + // + //
  • + // `-` + // `c` + //
  • + // `-` + // `d` + //
  • + // `-` + // `d` + // ``` + // + // So move the nested list out of the list item. + if let [_, TagNode::Group(tag, _)] = nodes.as_slice() { + if matches!(tag.kind, TagKind::L(_)) { + item.sub_list = nodes.pop(); + } + } + + item.body = Some(nodes); + } + + pub(crate) fn build_list(self, mut nodes: Vec) -> TagNode { + for item in self.items.into_iter() { + nodes.push(TagNode::Group( + TagKind::LI.into(), + vec![ + TagNode::Group(TagKind::Lbl.into(), item.label), + TagNode::Group(TagKind::LBody.into(), item.body.unwrap_or_default()), + ], + )); + if let Some(sub_list) = item.sub_list { + nodes.push(sub_list); + } + } + TagNode::Group(TagKind::L(self.numbering).into(), nodes) + } +} diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index b12d9b601..d7fe24f78 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -6,8 +6,8 @@ use krilla::configure::Validator; use krilla::page::Page; use krilla::surface::Surface; use krilla::tagging::{ - ArtifactType, ContentTag, Identifier, Node, SpanTag, TableDataCell, Tag, TagBuilder, - TagGroup, TagKind, TagTree, + ArtifactType, ContentTag, Identifier, ListNumbering, Node, SpanTag, TableDataCell, + Tag, TagBuilder, TagGroup, TagKind, TagTree, }; use typst_library::diag::SourceResult; use typst_library::foundations::{ @@ -17,17 +17,21 @@ use typst_library::foundations::{ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ - Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineBody, + Destination, EnumElem, FigureCaption, FigureElem, HeadingElem, ListElem, Outlinable, OutlineEntry, TableCell, TableElem, }; -use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind}; +use typst_library::pdf::{ + ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, PdfTagKind, +}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; use crate::link::LinkAnnotation; +use crate::tags::list::ListCtx; use crate::tags::outline::OutlineCtx; use crate::tags::table::TableCtx; +mod list; mod outline; mod table; @@ -58,16 +62,32 @@ pub(crate) fn handle_start( PdfTagKind::Part => TagKind::Part.into(), _ => todo!(), } - } else if let Some(heading) = elem.to_packed::() { - let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); - let name = heading.body.plain_text().to_string(); - TagKind::Hn(level, Some(name)).into() - } else if let Some(_) = elem.to_packed::() { - push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; - return Ok(()); + } else if let Some(tag) = elem.to_packed::() { + match tag.kind { + PdfMarkerTagKind::OutlineBody => { + push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; + return Ok(()); + } + PdfMarkerTagKind::ListItemLabel => { + push_stack(gc, loc, StackEntryKind::ListItemLabel)?; + return Ok(()); + } + PdfMarkerTagKind::ListItemBody => { + push_stack(gc, loc, StackEntryKind::ListItemBody)?; + return Ok(()); + } + } } else if let Some(entry) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()))?; return Ok(()); + } else if let Some(_list) = elem.to_packed::() { + let numbering = ListNumbering::Circle; // TODO: infer numbering from `list.marker` + push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; + return Ok(()); + } else if let Some(_enumeration) = elem.to_packed::() { + let numbering = ListNumbering::Decimal; // TODO: infer numbering from `enum.numbering` + push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; + return Ok(()); } else if let Some(_) = elem.to_packed::() { let alt = None; // TODO TagKind::Figure.with_alt_text(alt) @@ -112,6 +132,10 @@ pub(crate) fn handle_start( push_stack(gc, loc, StackEntryKind::TableCell(cell.clone()))?; } return Ok(()); + } else if let Some(heading) = elem.to_packed::() { + let level = heading.level().try_into().unwrap_or(NonZeroU32::MAX); + let name = heading.body.plain_text().to_string(); + TagKind::Hn(level, Some(name)).into() } else if let Some(link) = elem.to_packed::() { let link_id = gc.tags.next_link_id(); push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; @@ -171,6 +195,17 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc table_ctx.insert(&cell, entry.nodes); return; } + StackEntryKind::List(list) => list.build_list(entry.nodes), + StackEntryKind::ListItemLabel => { + let list_ctx = gc.tags.parent_list().expect("parent list"); + list_ctx.push_label(entry.nodes); + return; + } + StackEntryKind::ListItemBody => { + let list_ctx = gc.tags.parent_list().expect("parent list"); + list_ctx.push_body(entry.nodes); + return; + } StackEntryKind::Link(_, link) => { let alt = link.alt.as_ref().map(EcoString::to_string); let tag = TagKind::Link.with_alt_text(alt); @@ -309,6 +344,10 @@ impl Tags { self.parent()?.as_table_mut() } + pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> { + self.parent()?.as_list_mut() + } + pub(crate) fn find_parent_link(&self) -> Option<(LinkId, &Packed)> { self.stack.iter().rev().find_map(|entry| entry.kind.as_link()) } @@ -378,6 +417,9 @@ pub(crate) enum StackEntryKind { OutlineEntry(Packed), Table(TableCtx), TableCell(Packed), + List(ListCtx), + ListItemLabel, + ListItemBody, Link(LinkId, Packed), } @@ -406,6 +448,14 @@ impl StackEntryKind { } } + pub(crate) fn as_list_mut(&mut self) -> Option<&mut ListCtx> { + if let Self::List(v) = self { + Some(v) + } else { + None + } + } + pub(crate) fn as_link(&self) -> Option<(LinkId, &Packed)> { if let Self::Link(id, link) = self { Some((*id, link)) From 484f633e27a41f58bdb56768524fb68c79773356 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 17:30:37 +0200 Subject: [PATCH 53/76] chore: remove left over file from merge --- crates/typst-library/src/html/mod.rs | 87 ---------------------------- 1 file changed, 87 deletions(-) delete mode 100644 crates/typst-library/src/html/mod.rs diff --git a/crates/typst-library/src/html/mod.rs b/crates/typst-library/src/html/mod.rs deleted file mode 100644 index 301c160ba..000000000 --- a/crates/typst-library/src/html/mod.rs +++ /dev/null @@ -1,87 +0,0 @@ -//! HTML output. - -mod dom; -mod typed; - -pub use self::dom::*; - -use ecow::EcoString; - -use crate::foundations::{elem, Content, Module, Scope}; -use crate::introspection::Locatable; - -/// Create a module with all HTML definitions. -pub fn module() -> Module { - let mut html = Scope::deduplicating(); - html.start_category(crate::Category::Html); - html.define_elem::(); - html.define_elem::(); - self::typed::define(&mut html); - Module::new("html", html) -} - -/// An HTML element that can contain Typst content. -/// -/// Typst's HTML export automatically generates the appropriate tags for most -/// elements. However, sometimes, it is desirable to retain more control. For -/// example, when using Typst to generate your blog, you could use this function -/// to wrap each article in an `
    ` tag. -/// -/// Typst is aware of what is valid HTML. A tag and its attributes must form -/// syntactically valid HTML. Some tags, like `meta` do not accept content. -/// Hence, you must not provide a body for them. We may add more checks in the -/// future, so be sure that you are generating valid HTML when using this -/// function. -/// -/// Normally, Typst will generate `html`, `head`, and `body` tags for you. If -/// you instead create them with this function, Typst will omit its own tags. -/// -/// ```typ -/// #html.elem("div", attrs: (style: "background: aqua"))[ -/// A div with _Typst content_ inside! -/// ] -/// ``` -#[elem(name = "elem", Locatable)] -pub struct HtmlElem { - /// The element's tag. - #[required] - pub tag: HtmlTag, - - /// The element's HTML attributes. - pub attrs: HtmlAttrs, - - /// The contents of the HTML element. - /// - /// The body can be arbitrary Typst content. - #[positional] - pub body: Option, -} - -impl HtmlElem { - /// Add an attribute to the element. - pub fn with_attr(mut self, attr: HtmlAttr, value: impl Into) -> Self { - self.attrs - .as_option_mut() - .get_or_insert_with(Default::default) - .push(attr, value); - self - } -} - -/// An element that lays out its content as an inline SVG. -/// -/// Sometimes, converting Typst content to HTML is not desirable. This can be -/// the case for plots and other content that relies on positioning and styling -/// to convey its message. -/// -/// This function allows you to use the Typst layout engine that would also be -/// used for PDF, SVG, and PNG export to render a part of your document exactly -/// how it would appear when exported in one of these formats. It embeds the -/// content as an inline SVG. -#[elem] -pub struct FrameElem { - /// The content that shall be laid out. - #[positional] - #[required] - pub body: Content, -} From b5c6f7132b5c184b9a9fa6f15d721e25773546de Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 17:31:48 +0200 Subject: [PATCH 54/76] feat!: remove unfinished manual tagging code for now --- crates/typst-layout/src/rules.rs | 7 +- crates/typst-library/src/pdf/accessibility.rs | 167 +----------------- crates/typst-library/src/pdf/mod.rs | 1 - crates/typst-pdf/src/tags/mod.rs | 12 +- 4 files changed, 4 insertions(+), 183 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 8d3e6da4f..ccdae8c68 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -23,9 +23,7 @@ use typst_library::model::{ LinkElem, LinkTarget, ListElem, Outlinable, OutlineElem, OutlineEntry, ParElem, ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, TermsElem, Works, }; -use typst_library::pdf::{ - ArtifactElem, EmbedElem, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, -}; +use typst_library::pdf::{ArtifactElem, EmbedElem, PdfMarkerTag, PdfMarkerTagKind}; use typst_library::text::{ DecoLine, Decoration, HighlightElem, ItalicToggle, LinebreakElem, LocalName, OverlineElem, RawElem, RawLine, ScriptKind, ShiftSettings, Smallcaps, SmallcapsElem, @@ -106,7 +104,6 @@ pub fn register(rules: &mut NativeRuleMap) { // PDF. rules.register(Paged, EMBED_RULE); - rules.register(Paged, PDF_TAG_RULE); rules.register(Paged, PDF_ARTIFACT_RULE); rules.register(Paged, PDF_MARKER_TAG_RULE); } @@ -930,8 +927,6 @@ const EQUATION_RULE: ShowFn = |elem, _, styles| { const EMBED_RULE: ShowFn = |_, _, _| Ok(Content::empty()); -const PDF_TAG_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); - const PDF_ARTIFACT_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); const PDF_MARKER_TAG_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index e4213542b..f987a7645 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -1,177 +1,12 @@ use std::num::NonZeroU32; -use ecow::EcoString; -use typst_macros::{cast, elem, func, Cast}; +use typst_macros::{elem, func, Cast}; use typst_utils::NonZeroExt; use crate::foundations::{Content, NativeElement, Smart}; use crate::introspection::Locatable; use crate::model::TableCell; -// TODO: docs -#[elem(Locatable)] -pub struct PdfTagElem { - #[default(PdfTagKind::NonStruct)] - pub kind: PdfTagKind, - - /// An alternate description. - pub alt: Option, - /// Exact replacement for this structure element and its children. - pub actual_text: Option, - /// The expanded form of an abbreviation/acronym. - pub expansion: Option, - - /// The content to underline. - #[required] - pub body: Content, -} - -// TODO: docs -/// PDF structure elements -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum PdfTagKind { - // grouping elements - /// (Part) - Part, - /// (Article) - Art, - /// (Section) - Sect, - /// (Division) - Div, - /// (Block quotation) - BlockQuote, - /// (Caption) - Caption, - /// (Table of contents) - TOC, - /// (Table of contents item) - TOCI, - /// (Index) - Index, - /// (Nonstructural element) - NonStruct, - /// (Private element) - Private, - - // paragraph like elements - /// (Heading) - H { title: Option }, - /// (Heading level 1) - H1 { title: Option }, - /// (Heading level 2) - H2 { title: Option }, - /// (Heading level 3) - H4 { title: Option }, - /// (Heading level 4) - H3 { title: Option }, - /// (Heading level 5) - H5 { title: Option }, - /// (Heading level 6) - H6 { title: Option }, - /// (Paragraph) - P, - - // list elements - /// (List) - L { numbering: ListNumbering }, - /// (List item) - LI, - /// (Label) - Lbl, - /// (List body) - LBody, - - // table elements - /// (Table) - Table, - /// (Table row) - TR, - /// (Table header) - TH { scope: TableHeaderScope }, - /// (Table data cell) - TD, - /// (Table header row group) - THead, - /// (Table body row group) - TBody, - /// (Table footer row group) - TFoot, - - // inline elements - /// (Span) - Span, - /// (Quotation) - Quote, - /// (Note) - Note, - /// (Reference) - Reference, - /// (Bibliography Entry) - BibEntry, - /// (Code) - Code, - /// (Link) - Link, - /// (Annotation) - Annot, - - /// (Ruby) - Ruby, - /// (Ruby base text) - RB, - /// (Ruby annotation text) - RT, - /// (Ruby punctuation) - RP, - - /// (Warichu) - Warichu, - /// (Warichu text) - WT, - /// (Warichu punctuation) - WP, - - /// (Figure) - Figure, - /// (Formula) - Formula, - /// (Form) - Form, -} - -cast! { - PdfTagKind, - self => match self { - PdfTagKind::Part => "part".into_value(), - _ => todo!(), - }, - "part" => Self::Part, - // TODO -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum ListNumbering { - /// No numbering. - None, - /// Solid circular bullets. - Disc, - /// Open circular bullets. - Circle, - /// Solid square bullets. - Square, - /// Decimal numbers. - Decimal, - /// Lowercase Roman numerals. - LowerRoman, - /// Uppercase Roman numerals. - UpperRoman, - /// Lowercase letters. - LowerAlpha, - /// Uppercase letters. - UpperAlpha, -} - /// Mark content as a PDF artifact. /// TODO: maybe generalize this and use it to mark html elements with `aria-hidden="true"`? #[elem(Locatable)] diff --git a/crates/typst-library/src/pdf/mod.rs b/crates/typst-library/src/pdf/mod.rs index 8a0d40b9c..869b20496 100644 --- a/crates/typst-library/src/pdf/mod.rs +++ b/crates/typst-library/src/pdf/mod.rs @@ -13,7 +13,6 @@ pub fn module() -> Module { let mut pdf = Scope::deduplicating(); pdf.start_category(crate::Category::Pdf); pdf.define_elem::(); - pdf.define_elem::(); pdf.define_elem::(); pdf.define_func::(); pdf.define_func::(); diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index d7fe24f78..e65f73aa0 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -20,9 +20,7 @@ use typst_library::model::{ Destination, EnumElem, FigureCaption, FigureElem, HeadingElem, ListElem, Outlinable, OutlineEntry, TableCell, TableElem, }; -use typst_library::pdf::{ - ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind, PdfTagElem, PdfTagKind, -}; +use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind}; use typst_library::visualize::ImageElem; use crate::convert::GlobalContext; @@ -56,13 +54,7 @@ pub(crate) fn handle_start( return Ok(()); } - let tag: Tag = if let Some(pdf_tag) = elem.to_packed::() { - let kind = pdf_tag.kind.get_ref(StyleChain::default()); - match kind { - PdfTagKind::Part => TagKind::Part.into(), - _ => todo!(), - } - } else if let Some(tag) = elem.to_packed::() { + let tag: Tag = if let Some(tag) = elem.to_packed::() { match tag.kind { PdfMarkerTagKind::OutlineBody => { push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; From 3c46056599105e6dee05241baa175a01b4da30c8 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 17:46:55 +0200 Subject: [PATCH 55/76] fix: public outline `entry.inner()` function --- crates/typst-layout/src/rules.rs | 2 +- crates/typst-library/src/model/outline.rs | 71 +++++++++++++---------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index ccdae8c68..f4392d2b5 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -496,7 +496,7 @@ const OUTLINE_ENTRY_RULE: ShowFn = |elem, engine, styles| { let close = quotes.double_close; eco_format!("{prefix} {open}{body}{close} {page_str} {page_nr}",) }; - let inner = elem.inner(context, span, body, page)?; + let inner = elem.build_inner(context, span, body, page)?; let block = if elem.element.is::() { let body = prefix.unwrap_or_default() + inner; BlockElem::new() diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 9421c9a4e..0f94b1f58 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -501,6 +501,47 @@ impl OutlineEntry { /// This includes the body, the fill, and page number. #[func(contextual)] pub fn inner( + &self, + engine: &mut Engine, + context: Tracked, + span: Span, + ) -> SourceResult { + let body = self.body().at(span)?; + let page = self.page(engine, context, span)?; + self.build_inner(context, span, body, page) + } + + /// The content which is displayed in place of the referred element at its + /// entry in the outline. For a heading, this is its + /// [`body`]($heading.body); for a figure a caption and for equations, it is + /// empty. + #[func] + pub fn body(&self) -> StrResult { + Ok(self.outlinable()?.body()) + } + + /// The page number of this entry's element, formatted with the numbering + /// set for the referenced page. + #[func(contextual)] + pub fn page( + &self, + engine: &mut Engine, + context: Tracked, + span: Span, + ) -> SourceResult { + let loc = self.element_location().at(span)?; + let styles = context.styles().at(span)?; + let numbering = engine + .introspector + .page_numbering(loc) + .cloned() + .unwrap_or_else(|| NumberingPattern::from_str("1").unwrap().into()); + Counter::new(CounterKey::Page).display_at_loc(engine, loc, styles, &numbering) + } +} + +impl OutlineEntry { + pub fn build_inner( &self, context: Tracked, span: Span, @@ -556,36 +597,6 @@ impl OutlineEntry { Ok(Content::sequence(seq)) } - /// The content which is displayed in place of the referred element at its - /// entry in the outline. For a heading, this is its - /// [`body`]($heading.body); for a figure a caption and for equations, it is - /// empty. - #[func] - pub fn body(&self) -> StrResult { - Ok(self.outlinable()?.body()) - } - - /// The page number of this entry's element, formatted with the numbering - /// set for the referenced page. - #[func(contextual)] - pub fn page( - &self, - engine: &mut Engine, - context: Tracked, - span: Span, - ) -> SourceResult { - let loc = self.element_location().at(span)?; - let styles = context.styles().at(span)?; - let numbering = engine - .introspector - .page_numbering(loc) - .cloned() - .unwrap_or_else(|| NumberingPattern::from_str("1").unwrap().into()); - Counter::new(CounterKey::Page).display_at_loc(engine, loc, styles, &numbering) - } -} - -impl OutlineEntry { fn outlinable(&self) -> StrResult<&dyn Outlinable> { self.element .with::() From e5e5fba418b028466ba2e02703e29402bdcb4f95 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 17:53:55 +0200 Subject: [PATCH 56/76] fix: revert making math elements Locatable --- crates/typst-library/src/math/mod.rs | 5 ++--- crates/typst-library/src/math/root.rs | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/typst-library/src/math/mod.rs b/crates/typst-library/src/math/mod.rs index 79e047cdd..3d39e2fd2 100644 --- a/crates/typst-library/src/math/mod.rs +++ b/crates/typst-library/src/math/mod.rs @@ -28,7 +28,6 @@ use typst_utils::singleton; use unicode_math_class::MathClass; use crate::foundations::{elem, Content, Module, NativeElement, Scope}; -use crate::introspection::Locatable; use crate::layout::{Em, HElem}; use crate::text::TextElem; @@ -111,7 +110,7 @@ pub fn module() -> Module { pub trait Mathy {} /// A math alignment point: `&`, `&&`. -#[elem(title = "Alignment Point", Mathy, Locatable)] +#[elem(title = "Alignment Point", Mathy)] pub struct AlignPointElem {} impl AlignPointElem { @@ -138,7 +137,7 @@ impl AlignPointElem { /// /// $x loves y and y loves 5$ /// ``` -#[elem(Mathy, Locatable)] +#[elem(Mathy)] pub struct ClassElem { /// The class to apply to the content. #[required] diff --git a/crates/typst-library/src/math/root.rs b/crates/typst-library/src/math/root.rs index 15ef8c55d..ad111700b 100644 --- a/crates/typst-library/src/math/root.rs +++ b/crates/typst-library/src/math/root.rs @@ -1,7 +1,6 @@ use typst_syntax::Span; use crate::foundations::{elem, func, Content, NativeElement}; -use crate::introspection::Locatable; use crate::math::Mathy; /// A square root. @@ -23,7 +22,7 @@ pub fn sqrt( /// ```example /// $ root(3, x) $ /// ``` -#[elem(Mathy, Locatable)] +#[elem(Mathy)] pub struct RootElem { /// Which root of the radicand to take. #[positional] From 2621c6416e5477af1e0599f093df0d520cd74f1b Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sun, 13 Jul 2025 18:03:55 +0200 Subject: [PATCH 57/76] feat!: revert making some elements Locatable --- crates/typst-library/src/layout/grid/mod.rs | 4 ++-- crates/typst-library/src/model/table.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/typst-library/src/layout/grid/mod.rs b/crates/typst-library/src/layout/grid/mod.rs index bf03d9387..b539d6e07 100644 --- a/crates/typst-library/src/layout/grid/mod.rs +++ b/crates/typst-library/src/layout/grid/mod.rs @@ -448,7 +448,7 @@ impl TryFrom for GridItem { /// If `repeat` is set to `true`, the header will be repeated across pages. For /// an example, refer to the [`table.header`]($table.header) element and the /// [`grid.stroke`]($grid.stroke) parameter. -#[elem(name = "header", title = "Grid Header", Locatable)] +#[elem(name = "header", title = "Grid Header")] pub struct GridHeader { /// Whether this header should be repeated across pages. #[default(true)] @@ -476,7 +476,7 @@ pub struct GridHeader { /// itself on every page of the table. /// /// No other grid cells may be placed after the footer. -#[elem(name = "footer", title = "Grid Footer", Locatable)] +#[elem(name = "footer", title = "Grid Footer")] pub struct GridFooter { /// Whether this footer should be repeated across pages. #[default(true)] diff --git a/crates/typst-library/src/model/table.rs b/crates/typst-library/src/model/table.rs index 578397b58..d1026a909 100644 --- a/crates/typst-library/src/model/table.rs +++ b/crates/typst-library/src/model/table.rs @@ -415,7 +415,7 @@ impl TryFrom for TableItem { /// [7.34], [57], [2], /// ) /// ``` -#[elem(name = "header", title = "Table Header", Locatable)] +#[elem(name = "header", title = "Table Header")] pub struct TableHeader { /// Whether this header should be repeated across pages. #[default(true)] @@ -445,7 +445,7 @@ pub struct TableHeader { /// totals, or other information that should be visible on every page. /// /// No other table cells may be placed after the footer. -#[elem(name = "footer", title = "Table Footer", Locatable)] +#[elem(name = "footer", title = "Table Footer")] pub struct TableFooter { /// Whether this footer should be repeated across pages. #[default(true)] @@ -488,7 +488,7 @@ pub struct TableFooter { /// [19:00], [Day 1 Attendee Mixer], /// ) /// ``` -#[elem(name = "hline", title = "Table Horizontal Line", Locatable)] +#[elem(name = "hline", title = "Table Horizontal Line")] pub struct TableHLine { /// The row above which the horizontal line is placed (zero-indexed). /// Functions identically to the `y` field in [`grid.hline`]($grid.hline.y). @@ -532,7 +532,7 @@ pub struct TableHLine { /// use the [table's `stroke`]($table.stroke) field or [`table.cell`'s /// `stroke`]($table.cell.stroke) field instead if the line you want to place is /// part of all your tables' designs. -#[elem(name = "vline", title = "Table Vertical Line", Locatable)] +#[elem(name = "vline", title = "Table Vertical Line")] pub struct TableVLine { /// The column before which the horizontal line is placed (zero-indexed). /// Functions identically to the `x` field in [`grid.vline`]($grid.vline). From e4021390a37547988d17ce0131ad60b7230a9a2d Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 14 Jul 2025 10:13:21 +0200 Subject: [PATCH 58/76] feat: don't wrap table cell content in paragraph --- crates/typst-pdf/src/tags/table.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 0bde00c94..b8522569a 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -180,11 +180,7 @@ impl TableCtx { ) .into(), }; - - // Wrap content in a paragraph. - // TODO: maybe avoid nested paragraphs? - let par = TagNode::Group(TagKind::P.into(), cell.nodes); - Some(TagNode::Group(tag, vec![par])) + Some(TagNode::Group(tag, cell.nodes)) }) .collect(); @@ -422,7 +418,7 @@ mod tests { TagNode::Group( TagKind::TH(TableHeaderCell::new(scope).with_headers(TagIdRefs { ids })) .with_id(Some(id)), - vec![TagNode::Group(TagKind::P.into(), Vec::new())], + Vec::new(), ) } @@ -433,7 +429,7 @@ mod tests { .collect(); TagNode::Group( TagKind::TD(TableDataCell::new().with_headers(TagIdRefs { ids })).into(), - vec![TagNode::Group(TagKind::P.into(), Vec::new())], + Vec::new(), ) } From 9bbfe4c14ac49a265dacb6fda66ae582197fa77a Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 14 Jul 2025 10:31:17 +0200 Subject: [PATCH 59/76] fix: make figure captions sibling elements if the caption is contained within the figure screen readers might ignore it --- crates/typst-layout/src/rules.rs | 2 +- crates/typst-library/src/pdf/accessibility.rs | 2 ++ crates/typst-pdf/src/tags/mod.rs | 12 ++++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index f4392d2b5..32fdbe1f9 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -294,7 +294,7 @@ const HEADING_RULE: ShowFn = |elem, engine, styles| { const FIGURE_RULE: ShowFn = |elem, _, styles| { let span = elem.span(); - let mut realized = elem.body.clone(); + let mut realized = PdfMarkerTag::FigureBody(elem.body.clone()); // Build the caption, if any. if let Some(caption) = elem.caption.get_cloned(styles) { diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index f987a7645..a2cea8ddc 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -132,6 +132,8 @@ macro_rules! pdf_marker_tag { pdf_marker_tag! { /// `TOC` OutlineBody, + /// `Figure` + FigureBody, /// `Lbl` (marker) of the list item ListItemLabel, /// `LBody` of the enum item diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index e65f73aa0..d83274a08 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -60,6 +60,7 @@ pub(crate) fn handle_start( push_stack(gc, loc, StackEntryKind::Outline(OutlineCtx::new()))?; return Ok(()); } + PdfMarkerTagKind::FigureBody => TagKind::Figure.into(), PdfMarkerTagKind::ListItemLabel => { push_stack(gc, loc, StackEntryKind::ListItemLabel)?; return Ok(()); @@ -81,8 +82,13 @@ pub(crate) fn handle_start( push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; return Ok(()); } else if let Some(_) = elem.to_packed::() { - let alt = None; // TODO - TagKind::Figure.with_alt_text(alt) + // Wrap the figure tag and the sibling caption in a container, if the + // caption is contained within the figure like recommended for tables + // screen readers might ignore it. + // TODO: maybe this could be a `NonStruct` tag? + TagKind::P.into() + } else if let Some(_) = elem.to_packed::() { + TagKind::Caption.into() } else if let Some(image) = elem.to_packed::() { let alt = image.alt.get_as_ref().map(|s| s.to_string()); @@ -98,8 +104,6 @@ pub(crate) fn handle_start( } else { TagKind::Figure.with_alt_text(alt) } - } else if let Some(_) = elem.to_packed::() { - TagKind::Caption.into() } else if let Some(table) = elem.to_packed::() { let table_id = gc.tags.next_table_id(); let summary = table.summary.get_as_ref().map(|s| s.to_string()); From e43b8bbb7f620d504eb80fe06e83d18149940dd8 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 14 Jul 2025 12:40:18 +0200 Subject: [PATCH 60/76] fix: out of bounds access when tagging table cells --- crates/typst-pdf/src/tags/table.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index b8522569a..f6e1556d6 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -16,11 +16,12 @@ pub(crate) struct TableCtx { pub(crate) id: TableId, pub(crate) summary: Option, rows: Vec>, + min_width: usize, } impl TableCtx { pub(crate) fn new(id: TableId, summary: Option) -> Self { - Self { id, summary, rows: Vec::new() } + Self { id, summary, rows: Vec::new(), min_width: 0 } } fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { @@ -64,14 +65,15 @@ impl TableCtx { // Extend the table grid to fit this cell. let required_height = y + rowspan.get(); - let required_width = x + colspan.get(); + self.min_width = self.min_width.max(x + colspan.get()); if self.rows.len() < required_height { self.rows - .resize(required_height, vec![GridCell::Missing; required_width]); + .resize(required_height, vec![GridCell::Missing; self.min_width]); } - let row = &mut self.rows[y]; - if row.len() < required_width { - row.resize_with(required_width, || GridCell::Missing); + for row in self.rows.iter_mut() { + if row.len() < self.min_width { + row.resize_with(self.min_width, || GridCell::Missing); + } } // Store references to the cell for all spanned cells. From 4b57373653903478bf8e8b3494796053a11fea3e Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 14 Jul 2025 13:18:43 +0200 Subject: [PATCH 61/76] feat: derive Debug for StackEntry --- crates/typst-pdf/src/tags/list.rs | 2 ++ crates/typst-pdf/src/tags/mod.rs | 6 ++++-- crates/typst-pdf/src/tags/outline.rs | 2 ++ crates/typst-pdf/src/tags/table.rs | 5 +++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/crates/typst-pdf/src/tags/list.rs b/crates/typst-pdf/src/tags/list.rs index 99f1254cc..4046cdcee 100644 --- a/crates/typst-pdf/src/tags/list.rs +++ b/crates/typst-pdf/src/tags/list.rs @@ -2,11 +2,13 @@ use krilla::tagging::{ListNumbering, TagKind}; use crate::tags::TagNode; +#[derive(Debug)] pub(crate) struct ListCtx { numbering: ListNumbering, items: Vec, } +#[derive(Debug)] struct ListItem { label: Vec, body: Option>, diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index d83274a08..8833d0cd1 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -395,18 +395,20 @@ impl Tags { } } -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub(crate) struct TableId(u32); -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub(crate) struct LinkId(u32); +#[derive(Debug)] pub(crate) struct StackEntry { pub(crate) loc: Location, pub(crate) kind: StackEntryKind, pub(crate) nodes: Vec, } +#[derive(Debug)] pub(crate) enum StackEntryKind { Standard(Tag), Outline(OutlineCtx), diff --git a/crates/typst-pdf/src/tags/outline.rs b/crates/typst-pdf/src/tags/outline.rs index 946ad6168..e809489f3 100644 --- a/crates/typst-pdf/src/tags/outline.rs +++ b/crates/typst-pdf/src/tags/outline.rs @@ -4,6 +4,7 @@ use typst_library::model::OutlineEntry; use crate::tags::TagNode; +#[derive(Debug)] pub(crate) struct OutlineCtx { stack: Vec, } @@ -52,6 +53,7 @@ impl OutlineCtx { } } +#[derive(Debug)] pub(crate) struct OutlineSection { entries: Vec, } diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index f6e1556d6..a4346460a 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -12,6 +12,7 @@ use typst_library::pdf::{TableCellKind, TableHeaderScope}; use crate::tags::{TableId, TagNode}; +#[derive(Debug)] pub(crate) struct TableCtx { pub(crate) id: TableId, pub(crate) summary: Option, @@ -252,7 +253,7 @@ impl TableCtx { } } -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] enum GridCell { Cell(TableCtxCell), Spanned(usize, usize), @@ -286,7 +287,7 @@ impl GridCell { } } -#[derive(Clone)] +#[derive(Clone, Debug)] struct TableCtxCell { x: u32, y: u32, From 0df9da7ce6aed421f7865594d7ffee720e26d500 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 14 Jul 2025 16:35:13 +0200 Subject: [PATCH 62/76] feat: generate tags for terms --- crates/typst-layout/src/rules.rs | 4 ++-- crates/typst-pdf/src/tags/mod.rs | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 32fdbe1f9..594f88496 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -179,9 +179,9 @@ const TERMS_RULE: ShowFn = |elem, _, styles| { for child in elem.children.iter() { let mut seq = vec![]; seq.extend(unpad.clone()); - seq.push(child.term.clone().strong()); + seq.push(PdfMarkerTag::ListItemLabel(child.term.clone().strong())); seq.push(separator.clone()); - seq.push(child.description.clone()); + seq.push(PdfMarkerTag::ListItemBody(child.description.clone())); // Text in wide term lists shall always turn into paragraphs. if !tight { diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 8833d0cd1..a2ad47d7d 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -18,7 +18,7 @@ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, EnumElem, FigureCaption, FigureElem, HeadingElem, ListElem, Outlinable, - OutlineEntry, TableCell, TableElem, + OutlineEntry, TableCell, TableElem, TermsElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind}; use typst_library::visualize::ImageElem; @@ -81,6 +81,10 @@ pub(crate) fn handle_start( let numbering = ListNumbering::Decimal; // TODO: infer numbering from `enum.numbering` push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; return Ok(()); + } else if let Some(_enumeration) = elem.to_packed::() { + let numbering = ListNumbering::None; + push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; + return Ok(()); } else if let Some(_) = elem.to_packed::() { // Wrap the figure tag and the sibling caption in a container, if the // caption is contained within the figure like recommended for tables From 451b0815ff356c4d13b29c528e389e253d61dfe5 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Mon, 14 Jul 2025 17:11:02 +0200 Subject: [PATCH 63/76] feat: mark numbering prefix of heading and outline as Lbl --- crates/typst-layout/src/rules.rs | 3 +-- crates/typst-library/src/model/outline.rs | 3 ++- crates/typst-library/src/pdf/accessibility.rs | 2 ++ crates/typst-pdf/src/tags/mod.rs | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 594f88496..77aa8034f 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -275,7 +275,7 @@ const HEADING_RULE: ShowFn = |elem, engine, styles| { let spacing = HElem::new(SPACING_TO_NUMBERING.into()).with_weak(true).pack(); - realized = numbering + spacing + realized; + realized = PdfMarkerTag::Label(numbering) + spacing + realized; } let block = if indent != Abs::zero() { @@ -477,7 +477,6 @@ const OUTLINE_ENTRY_RULE: ShowFn = |elem, engine, styles| { let context = Context::new(None, Some(styles)); let context = context.track(); - // TODO(accessibility): prefix should be wrapped in a `Lbl` structure element let prefix = elem.prefix(engine, context, span)?; let body = elem.body().at(span)?; let page = elem.page(engine, context, span)?; diff --git a/crates/typst-library/src/model/outline.rs b/crates/typst-library/src/model/outline.rs index 0f94b1f58..790dac6d2 100644 --- a/crates/typst-library/src/model/outline.rs +++ b/crates/typst-library/src/model/outline.rs @@ -21,6 +21,7 @@ use crate::layout::{ RepeatElem, Sides, }; use crate::model::{HeadingElem, NumberingPattern, ParElem, Refable}; +use crate::pdf::PdfMarkerTag; use crate::text::{LocalName, SpaceElem, TextElem}; /// A table of contents, figures, or other elements. @@ -493,7 +494,7 @@ impl OutlineEntry { let styles = context.styles().at(span)?; let numbers = outlinable.counter().display_at_loc(engine, loc, styles, numbering)?; - Ok(Some(outlinable.prefix(numbers))) + Ok(Some(PdfMarkerTag::Label(outlinable.prefix(numbers)))) } /// Creates the default inner content of the entry. diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index a2cea8ddc..732bac319 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -138,4 +138,6 @@ pdf_marker_tag! { ListItemLabel, /// `LBody` of the enum item ListItemBody, + /// A generic `Lbl` + Label, } diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index a2ad47d7d..d8eeb6faf 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -69,6 +69,7 @@ pub(crate) fn handle_start( push_stack(gc, loc, StackEntryKind::ListItemBody)?; return Ok(()); } + PdfMarkerTagKind::Label => TagKind::Lbl.into(), } } else if let Some(entry) = elem.to_packed::() { push_stack(gc, loc, StackEntryKind::OutlineEntry(entry.clone()))?; From 728d37efa06265f9d1dde811200f7ef7b773daeb Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 15 Jul 2025 16:54:15 +0200 Subject: [PATCH 64/76] feat: generate tags for footnotes --- crates/typst-layout/src/rules.rs | 14 ++++++-------- crates/typst-pdf/src/tags/mod.rs | 6 ++++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 77aa8034f..e0945a708 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -394,11 +394,11 @@ const FOOTNOTE_RULE: ShowFn = |elem, engine, styles| { let numbering = elem.numbering.get_ref(styles); let counter = Counter::of(FootnoteElem::ELEM); let num = counter.display_at_loc(engine, loc, styles, numbering)?; - let sup = SuperElem::new(num).pack().spanned(span); + let alt = num.plain_text(); + let sup = PdfMarkerTag::Label(SuperElem::new(num).pack().spanned(span)); let loc = loc.variant(1); // Add zero-width weak spacing to make the footnote "sticky". - // TODO(accessibility): generate alt text - Ok(HElem::hole().pack() + sup.linked(Destination::Location(loc), None)) + Ok(HElem::hole().pack() + sup.linked(Destination::Location(loc), Some(alt))) }; const FOOTNOTE_ENTRY_RULE: ShowFn = |elem, engine, styles| { @@ -415,11 +415,9 @@ const FOOTNOTE_ENTRY_RULE: ShowFn = |elem, engine, styles| { }; let num = counter.display_at_loc(engine, loc, styles, numbering)?; - let sup = SuperElem::new(num) - .pack() - .spanned(span) - // TODO(accessibility): generate alt text - .linked(Destination::Location(loc), None) + let alt = num.plain_text(); + let sup = PdfMarkerTag::Label(SuperElem::new(num).pack().spanned(span)) + .linked(Destination::Location(loc), Some(alt)) .located(loc.variant(1)); Ok(Content::sequence([ diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index d8eeb6faf..299a0261a 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -17,8 +17,8 @@ use typst_library::foundations::{ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ - Destination, EnumElem, FigureCaption, FigureElem, HeadingElem, ListElem, Outlinable, - OutlineEntry, TableCell, TableElem, TermsElem, + Destination, EnumElem, FigureCaption, FigureElem, FootnoteEntry, HeadingElem, + ListElem, Outlinable, OutlineEntry, TableCell, TableElem, TermsElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind}; use typst_library::visualize::ImageElem; @@ -141,6 +141,8 @@ pub(crate) fn handle_start( let link_id = gc.tags.next_link_id(); push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; return Ok(()); + } else if let Some(_) = elem.to_packed::() { + TagKind::Note.into() } else { return Ok(()); }; From cd5d91a82db902894075b8b0c39228716dc839fc Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Tue, 15 Jul 2025 17:23:11 +0200 Subject: [PATCH 65/76] fix: ensure link annotation object references are direct children of link tags --- crates/typst-pdf/src/link.rs | 6 +- crates/typst-pdf/src/tags/mod.rs | 136 +++++++++++++++++++------------ 2 files changed, 87 insertions(+), 55 deletions(-) diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index eef5421cc..d225e6d57 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -49,7 +49,7 @@ pub(crate) fn handle_link( } }; - let Some((link_id, link)) = gc.tags.find_parent_link() else { + let Some((link_id, link, link_nodes)) = gc.tags.stack.find_parent_link() else { unreachable!("expected a link parent") }; let alt = link.alt.as_ref().map(EcoString::to_string); @@ -68,8 +68,8 @@ pub(crate) fn handle_link( annotation.quad_points.extend_from_slice(&quadpoints); } _ => { - let placeholder = gc.tags.reserve_placeholder(); - gc.tags.push(TagNode::Placeholder(placeholder)); + let placeholder = gc.tags.placeholders.reserve(); + link_nodes.push(TagNode::Placeholder(placeholder)); fc.push_link_annotation(LinkAnnotation { id: link_id, placeholder, diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 299a0261a..2ae06c22f 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -1,5 +1,6 @@ use std::cell::OnceCell; use std::num::NonZeroU32; +use std::ops::{Deref, DerefMut}; use ecow::EcoString; use krilla::configure::Validator; @@ -97,7 +98,7 @@ pub(crate) fn handle_start( } else if let Some(image) = elem.to_packed::() { let alt = image.alt.get_as_ref().map(|s| s.to_string()); - let figure_tag = (gc.tags.parent()) + let figure_tag = (gc.tags.stack.parent()) .and_then(StackEntryKind::as_standard_mut) .filter(|tag| tag.kind == TagKind::Figure); if let Some(figure_tag) = figure_tag { @@ -116,7 +117,7 @@ pub(crate) fn handle_start( push_stack(gc, loc, StackEntryKind::Table(ctx))?; return Ok(()); } else if let Some(cell) = elem.to_packed::() { - let table_ctx = gc.tags.parent_table(); + let table_ctx = gc.tags.stack.parent_table(); // Only repeated table headers and footer cells are layed out multiple // times. Mark duplicate headers as artifacts, since they have no @@ -168,11 +169,8 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), StackEntryKind::Outline(ctx) => ctx.build_outline(entry.nodes), StackEntryKind::OutlineEntry(outline_entry) => { - let parent = gc.tags.stack.last_mut().and_then(|parent| { - let ctx = parent.kind.as_outline_mut()?; - Some((&mut parent.nodes, ctx)) - }); - let Some((parent_nodes, outline_ctx)) = parent else { + let Some((outline_ctx, outline_nodes)) = gc.tags.stack.parent_outline() + else { // PDF/UA compliance of the structure hierarchy is checked // elsewhere. While this doesn't make a lot of sense, just // avoid crashing here. @@ -181,12 +179,12 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc return; }; - outline_ctx.insert(parent_nodes, outline_entry, entry.nodes); + outline_ctx.insert(outline_nodes, outline_entry, entry.nodes); return; } StackEntryKind::Table(ctx) => ctx.build_table(entry.nodes), StackEntryKind::TableCell(cell) => { - let Some(table_ctx) = gc.tags.parent_table() else { + let Some(table_ctx) = gc.tags.stack.parent_table() else { // PDF/UA compliance of the structure hierarchy is checked // elsewhere. While this doesn't make a lot of sense, just // avoid crashing here. @@ -200,12 +198,12 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc } StackEntryKind::List(list) => list.build_list(entry.nodes), StackEntryKind::ListItemLabel => { - let list_ctx = gc.tags.parent_list().expect("parent list"); + let list_ctx = gc.tags.stack.parent_list().expect("parent list"); list_ctx.push_label(entry.nodes); return; } StackEntryKind::ListItemBody => { - let list_ctx = gc.tags.parent_list().expect("parent list"); + let list_ctx = gc.tags.stack.parent_list().expect("parent list"); list_ctx.push_body(entry.nodes); return; } @@ -287,15 +285,15 @@ pub(crate) fn add_annotations( alt, ); let annot_id = page.add_tagged_annotation(annot); - gc.tags.init_placeholder(placeholder, Node::Leaf(annot_id)); + gc.tags.placeholders.init(placeholder, Node::Leaf(annot_id)); } } pub(crate) struct Tags { /// The intermediary stack of nested tag groups. - pub(crate) stack: Vec, + pub(crate) stack: TagStack, /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. - pub(crate) placeholders: Vec>, + pub(crate) placeholders: Placeholders, pub(crate) in_artifact: Option<(Location, ArtifactKind)>, /// Used to group multiple link annotations using quad points. pub(crate) link_id: LinkId, @@ -310,8 +308,8 @@ pub(crate) struct Tags { impl Tags { pub(crate) fn new() -> Self { Self { - stack: Vec::new(), - placeholders: Vec::new(), + stack: TagStack(Vec::new()), + placeholders: Placeholders(Vec::new()), in_artifact: None, tree: Vec::new(), @@ -320,41 +318,6 @@ impl Tags { } } - pub(crate) fn reserve_placeholder(&mut self) -> Placeholder { - let idx = self.placeholders.len(); - self.placeholders.push(OnceCell::new()); - Placeholder(idx) - } - - pub(crate) fn init_placeholder(&mut self, placeholder: Placeholder, node: Node) { - self.placeholders[placeholder.0] - .set(node) - .map_err(|_| ()) - .expect("placeholder to be uninitialized"); - } - - pub(crate) fn take_placeholder(&mut self, placeholder: Placeholder) -> Node { - self.placeholders[placeholder.0] - .take() - .expect("initialized placeholder node") - } - - pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { - self.stack.last_mut().map(|e| &mut e.kind) - } - - pub(crate) fn parent_table(&mut self) -> Option<&mut TableCtx> { - self.parent()?.as_table_mut() - } - - pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> { - self.parent()?.as_list_mut() - } - - pub(crate) fn find_parent_link(&self) -> Option<(LinkId, &Packed)> { - self.stack.iter().rev().find_map(|entry| entry.kind.as_link()) - } - pub(crate) fn push(&mut self, node: TagNode) { if let Some(entry) = self.stack.last_mut() { entry.nodes.push(node); @@ -382,7 +345,7 @@ impl Tags { Node::Group(TagGroup::with_children(tag, children)) } TagNode::Leaf(identifier) => Node::Leaf(identifier), - TagNode::Placeholder(placeholder) => self.take_placeholder(placeholder), + TagNode::Placeholder(placeholder) => self.placeholders.take(placeholder), } } @@ -402,6 +365,75 @@ impl Tags { } } +pub(crate) struct TagStack(Vec); + +impl Deref for TagStack { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for TagStack { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl TagStack { + pub(crate) fn parent(&mut self) -> Option<&mut StackEntryKind> { + self.0.last_mut().map(|e| &mut e.kind) + } + + pub(crate) fn parent_table(&mut self) -> Option<&mut TableCtx> { + self.parent()?.as_table_mut() + } + + pub(crate) fn parent_list(&mut self) -> Option<&mut ListCtx> { + self.parent()?.as_list_mut() + } + + pub(crate) fn parent_outline( + &mut self, + ) -> Option<(&mut OutlineCtx, &mut Vec)> { + self.0.last_mut().and_then(|e| { + let ctx = e.kind.as_outline_mut()?; + Some((ctx, &mut e.nodes)) + }) + } + + pub(crate) fn find_parent_link( + &mut self, + ) -> Option<(LinkId, &LinkMarker, &mut Vec)> { + self.0.iter_mut().rev().find_map(|e| { + let (link_id, link) = e.kind.as_link()?; + Some((link_id, link.as_ref(), &mut e.nodes)) + }) + } +} + +pub(crate) struct Placeholders(Vec>); + +impl Placeholders { + pub(crate) fn reserve(&mut self) -> Placeholder { + let idx = self.0.len(); + self.0.push(OnceCell::new()); + Placeholder(idx) + } + + pub(crate) fn init(&mut self, placeholder: Placeholder, node: Node) { + self.0[placeholder.0] + .set(node) + .map_err(|_| ()) + .expect("placeholder to be uninitialized"); + } + + pub(crate) fn take(&mut self, placeholder: Placeholder) -> Node { + self.0[placeholder.0].take().expect("initialized placeholder node") + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub(crate) struct TableId(u32); From 153c5d3a4aea04c98eb587d012274999271f3122 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 16 Jul 2025 12:37:10 +0200 Subject: [PATCH 66/76] refactor: update krilla --- Cargo.lock | 4 ++-- Cargo.toml | 4 ++-- crates/typst-pdf/src/tags/table.rs | 29 +++++++++++------------------ 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be0c37ff9..7295c6d04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1373,7 +1373,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#5ae27ecab2f74d7a5e58b962f04c85bc2662602e" +source = "git+https://github.com/LaurenzV/krilla?branch=main#c0a456829bb63212470a6fa29d604dd9e051a9bd" dependencies = [ "base64", "bumpalo", @@ -1402,7 +1402,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/saecki/krilla?branch=tag-attributes#5ae27ecab2f74d7a5e58b962f04c85bc2662602e" +source = "git+https://github.com/LaurenzV/krilla?branch=main#c0a456829bb63212470a6fa29d604dd9e051a9bd" dependencies = [ "flate2", "fontdb", diff --git a/Cargo.toml b/Cargo.toml index f59b74e97..1e2bd70de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,8 +73,8 @@ image = { version = "0.25.5", default-features = false, features = ["png", "jpeg indexmap = { version = "2", features = ["serde"] } infer = { version = "0.19.0", default-features = false } kamadak-exif = "0.6" -krilla = { git = "https://github.com/saecki/krilla", branch = "tag-attributes", default-features = false, features = ["raster-images", "comemo", "rayon"] } -krilla-svg = { git = "https://github.com/saecki/krilla", branch = "tag-attributes" } +krilla = { git = "https://github.com/LaurenzV/krilla", branch = "main", default-features = false, features = ["raster-images", "comemo", "rayon"] } +krilla-svg = { git = "https://github.com/LaurenzV/krilla", branch = "main" } kurbo = "0.11" libfuzzer-sys = "0.4" lipsum = "0.9" diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index a4346460a..969440f2f 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -3,7 +3,7 @@ use std::num::NonZeroU32; use az::SaturatingAs; use krilla::tagging::{ - TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagIdRefs, TagKind, + TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagKind, }; use smallvec::SmallVec; use typst_library::foundations::{Packed, Smart, StyleChain}; @@ -90,7 +90,7 @@ impl TableCtx { rowspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX), colspan: colspan.try_into().unwrap_or(NonZeroU32::MAX), kind, - headers: TagIdRefs::NONE, + headers: SmallVec::new(), nodes, }); } @@ -244,8 +244,8 @@ impl TableCtx { } if let Some((_, cell_id)) = current_header.last() { - if !cell.headers.ids.contains(&cell_id) { - cell.headers.ids.push(cell_id.clone()); + if !cell.headers.contains(&cell_id) { + cell.headers.push(cell_id.clone()); } } @@ -294,7 +294,7 @@ struct TableCtxCell { rowspan: NonZeroU32, colspan: NonZeroU32, kind: Smart, - headers: TagIdRefs, + headers: SmallVec<[TagId; 1]>, nodes: Vec, } @@ -314,9 +314,9 @@ fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool { } fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { - let mut buf = SmallVec::new(); + let mut buf = SmallVec::<[u8; 32]>::new(); _ = write!(&mut buf, "{}x{x}y{y}", table_id.0); - TagId::from_smallvec(buf) + TagId::from(buf) } fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { @@ -414,24 +414,17 @@ mod tests { ) -> TagNode { let scope = table_header_scope(scope); let id = table_cell_id(TableId(324), x, y); - let ids = headers - .map(|(x, y)| table_cell_id(TableId(324), x, y)) - .into_iter() - .collect(); + let ids = headers.map(|(x, y)| table_cell_id(TableId(324), x, y)); TagNode::Group( - TagKind::TH(TableHeaderCell::new(scope).with_headers(TagIdRefs { ids })) - .with_id(Some(id)), + TagKind::TH(TableHeaderCell::new(scope).with_headers(ids)).with_id(Some(id)), Vec::new(), ) } fn td(headers: [(u32, u32); SIZE]) -> TagNode { - let ids = headers - .map(|(x, y)| table_cell_id(TableId(324), x, y)) - .into_iter() - .collect(); + let ids = headers.map(|(x, y)| table_cell_id(TableId(324), x, y)); TagNode::Group( - TagKind::TD(TableDataCell::new().with_headers(TagIdRefs { ids })).into(), + TagKind::TD(TableDataCell::new().with_headers(ids)).into(), Vec::new(), ) } From bf75ab858d7cd21a34595401d2a036afa573244f Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 16 Jul 2025 13:57:41 +0200 Subject: [PATCH 67/76] feat: better alt text for footnote links --- crates/typst-layout/src/rules.rs | 2 +- crates/typst-library/src/model/footnote.rs | 12 +++++++++++- crates/typst-library/src/pdf/accessibility.rs | 3 ++- crates/typst-library/translations/ar.txt | 1 + crates/typst-library/translations/bg.txt | 1 + crates/typst-library/translations/ca.txt | 1 + crates/typst-library/translations/cs.txt | 1 + crates/typst-library/translations/da.txt | 1 + crates/typst-library/translations/de.txt | 1 + crates/typst-library/translations/el.txt | 1 + crates/typst-library/translations/en.txt | 1 + crates/typst-library/translations/es.txt | 1 + crates/typst-library/translations/et.txt | 1 + crates/typst-library/translations/eu.txt | 1 + crates/typst-library/translations/fi.txt | 1 + crates/typst-library/translations/fr.txt | 1 + crates/typst-library/translations/gl.txt | 1 + crates/typst-library/translations/he.txt | 1 + crates/typst-library/translations/hr.txt | 1 + crates/typst-library/translations/hu.txt | 1 + crates/typst-library/translations/id.txt | 1 + crates/typst-library/translations/is.txt | 1 + crates/typst-library/translations/it.txt | 1 + crates/typst-library/translations/ja.txt | 1 + crates/typst-library/translations/la.txt | 1 + crates/typst-library/translations/lv.txt | 1 + crates/typst-library/translations/nb.txt | 1 + crates/typst-library/translations/nl.txt | 1 + crates/typst-library/translations/nn.txt | 1 + crates/typst-library/translations/pl.txt | 1 + crates/typst-library/translations/pt-PT.txt | 1 + crates/typst-library/translations/pt.txt | 1 + crates/typst-library/translations/ro.txt | 1 + crates/typst-library/translations/ru.txt | 1 + crates/typst-library/translations/sl.txt | 1 + crates/typst-library/translations/sq.txt | 1 + crates/typst-library/translations/sr.txt | 1 + crates/typst-library/translations/sv.txt | 1 + crates/typst-library/translations/tl.txt | 1 + crates/typst-library/translations/tr.txt | 1 + crates/typst-library/translations/uk.txt | 1 + crates/typst-library/translations/vi.txt | 1 + crates/typst-library/translations/zh-TW.txt | 1 + crates/typst-library/translations/zh.txt | 1 + 44 files changed, 55 insertions(+), 3 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 5fa5e7270..09c247e9a 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -384,7 +384,7 @@ const FOOTNOTE_RULE: ShowFn = |elem, engine, styles| { let numbering = elem.numbering.get_ref(styles); let counter = Counter::of(FootnoteElem::ELEM); let num = counter.display_at_loc(engine, loc, styles, numbering)?; - let alt = num.plain_text(); + let alt = FootnoteElem::alt_text(styles, &num.plain_text()); let sup = PdfMarkerTag::Label(SuperElem::new(num).pack().spanned(span)); let loc = loc.variant(1); // Add zero-width weak spacing to make the footnote "sticky". diff --git a/crates/typst-library/src/model/footnote.rs b/crates/typst-library/src/model/footnote.rs index 147a3f009..949bd8aba 100644 --- a/crates/typst-library/src/model/footnote.rs +++ b/crates/typst-library/src/model/footnote.rs @@ -1,6 +1,7 @@ use std::num::NonZeroUsize; use std::str::FromStr; +use ecow::{eco_format, EcoString}; use typst_utils::NonZeroExt; use crate::diag::{bail, StrResult}; @@ -12,7 +13,7 @@ use crate::foundations::{ use crate::introspection::{Count, CounterUpdate, Locatable, Location}; use crate::layout::{Abs, Em, Length, Ratio}; use crate::model::{Numbering, NumberingPattern, ParElem}; -use crate::text::{TextElem, TextSize}; +use crate::text::{LocalName, TextElem, TextSize}; use crate::visualize::{LineElem, Stroke}; /// A footnote. @@ -82,7 +83,16 @@ impl FootnoteElem { type FootnoteEntry; } +impl LocalName for Packed { + const KEY: &'static str = "footnote"; +} + impl FootnoteElem { + pub fn alt_text(styles: StyleChain, num: &str) -> EcoString { + let local_name = Packed::::local_name_in(styles); + eco_format!("{local_name} {num}") + } + /// Creates a new footnote that the passed content as its body. pub fn with_content(content: Content) -> Self { Self::new(FootnoteBody::Content(content)) diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 732bac319..142e7ff50 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -122,7 +122,8 @@ macro_rules! pdf_marker_tag { #[doc = $doc] #[allow(non_snake_case)] pub fn $variant(body: Content) -> Content { - Self::new(PdfMarkerTagKind::$variant, body).pack() + let span = body.span(); + Self::new(PdfMarkerTagKind::$variant, body).pack().spanned(span) } )+ } diff --git a/crates/typst-library/translations/ar.txt b/crates/typst-library/translations/ar.txt index 7af2aaa91..a39bf71a6 100644 --- a/crates/typst-library/translations/ar.txt +++ b/crates/typst-library/translations/ar.txt @@ -6,3 +6,4 @@ heading = الفصل outline = المحتويات raw = قائمة page = صفحة +# footnote = diff --git a/crates/typst-library/translations/bg.txt b/crates/typst-library/translations/bg.txt index e377af398..29aa03316 100644 --- a/crates/typst-library/translations/bg.txt +++ b/crates/typst-library/translations/bg.txt @@ -6,3 +6,4 @@ heading = Раздел outline = Съдържание raw = Приложение page = стр. +# footnote = diff --git a/crates/typst-library/translations/ca.txt b/crates/typst-library/translations/ca.txt index f02473293..880f83411 100644 --- a/crates/typst-library/translations/ca.txt +++ b/crates/typst-library/translations/ca.txt @@ -6,3 +6,4 @@ heading = Secció outline = Índex raw = Llistat page = pàgina +# footnote = diff --git a/crates/typst-library/translations/cs.txt b/crates/typst-library/translations/cs.txt index 417f1ab2e..f0986523f 100644 --- a/crates/typst-library/translations/cs.txt +++ b/crates/typst-library/translations/cs.txt @@ -6,3 +6,4 @@ heading = Kapitola outline = Obsah raw = Výpis page = strana +# footnote = diff --git a/crates/typst-library/translations/da.txt b/crates/typst-library/translations/da.txt index 4ceeda065..0ef36f3c2 100644 --- a/crates/typst-library/translations/da.txt +++ b/crates/typst-library/translations/da.txt @@ -6,3 +6,4 @@ heading = Afsnit outline = Indhold raw = Liste page = side +# footnote = diff --git a/crates/typst-library/translations/de.txt b/crates/typst-library/translations/de.txt index a9da1adb4..8d43f6706 100644 --- a/crates/typst-library/translations/de.txt +++ b/crates/typst-library/translations/de.txt @@ -6,3 +6,4 @@ heading = Abschnitt outline = Inhaltsverzeichnis raw = Listing page = Seite +footnote = Fußnote diff --git a/crates/typst-library/translations/el.txt b/crates/typst-library/translations/el.txt index 3853a45bb..05c8dd615 100644 --- a/crates/typst-library/translations/el.txt +++ b/crates/typst-library/translations/el.txt @@ -5,3 +5,4 @@ bibliography = Βιβλιογραφία heading = Κεφάλαιο outline = Περιεχόμενα raw = Παράθεση +# footnote = diff --git a/crates/typst-library/translations/en.txt b/crates/typst-library/translations/en.txt index fa2d65b91..21ae372a5 100644 --- a/crates/typst-library/translations/en.txt +++ b/crates/typst-library/translations/en.txt @@ -6,3 +6,4 @@ heading = Section outline = Contents raw = Listing page = page +footnote = Footnote diff --git a/crates/typst-library/translations/es.txt b/crates/typst-library/translations/es.txt index 8fe9929d8..0d95a3cb4 100644 --- a/crates/typst-library/translations/es.txt +++ b/crates/typst-library/translations/es.txt @@ -6,3 +6,4 @@ heading = Sección outline = Índice raw = Listado page = página +# footnote = diff --git a/crates/typst-library/translations/et.txt b/crates/typst-library/translations/et.txt index 588929052..0f1ea245b 100644 --- a/crates/typst-library/translations/et.txt +++ b/crates/typst-library/translations/et.txt @@ -6,3 +6,4 @@ heading = Peatükk outline = Sisukord raw = List page = lk. +# footnote = diff --git a/crates/typst-library/translations/eu.txt b/crates/typst-library/translations/eu.txt index d89f89b6f..257286873 100644 --- a/crates/typst-library/translations/eu.txt +++ b/crates/typst-library/translations/eu.txt @@ -6,3 +6,4 @@ heading = Atala outline = Aurkibidea raw = Kodea page = orria +# footnote = diff --git a/crates/typst-library/translations/fi.txt b/crates/typst-library/translations/fi.txt index edb88de8d..d0faa5e3d 100644 --- a/crates/typst-library/translations/fi.txt +++ b/crates/typst-library/translations/fi.txt @@ -6,3 +6,4 @@ heading = Osio outline = Sisällys raw = Esimerkki page = sivu +# footnote = diff --git a/crates/typst-library/translations/fr.txt b/crates/typst-library/translations/fr.txt index f8e27c9c0..4d08bf828 100644 --- a/crates/typst-library/translations/fr.txt +++ b/crates/typst-library/translations/fr.txt @@ -6,3 +6,4 @@ heading = Chapitre outline = Table des matières raw = Liste page = page +# footnote = diff --git a/crates/typst-library/translations/gl.txt b/crates/typst-library/translations/gl.txt index 49bf01b74..0f4918bc3 100644 --- a/crates/typst-library/translations/gl.txt +++ b/crates/typst-library/translations/gl.txt @@ -6,3 +6,4 @@ heading = Sección outline = Índice raw = Listado page = páxina +# footnote = diff --git a/crates/typst-library/translations/he.txt b/crates/typst-library/translations/he.txt index 5317c9278..c9b069c0c 100644 --- a/crates/typst-library/translations/he.txt +++ b/crates/typst-library/translations/he.txt @@ -6,3 +6,4 @@ heading = חלק outline = תוכן עניינים raw = קטע מקור page = עמוד +# footnote = diff --git a/crates/typst-library/translations/hr.txt b/crates/typst-library/translations/hr.txt index ea0754592..4243aa6d4 100644 --- a/crates/typst-library/translations/hr.txt +++ b/crates/typst-library/translations/hr.txt @@ -6,3 +6,4 @@ heading = Odjeljak outline = Sadržaj raw = Kôd page = str. +# footnote = diff --git a/crates/typst-library/translations/hu.txt b/crates/typst-library/translations/hu.txt index a88da3e54..fd7cb3485 100644 --- a/crates/typst-library/translations/hu.txt +++ b/crates/typst-library/translations/hu.txt @@ -6,3 +6,4 @@ heading = Fejezet outline = Tartalomjegyzék # raw = page = oldal +# footnote = diff --git a/crates/typst-library/translations/id.txt b/crates/typst-library/translations/id.txt index bea5ee18c..d3ce5818a 100644 --- a/crates/typst-library/translations/id.txt +++ b/crates/typst-library/translations/id.txt @@ -6,3 +6,4 @@ heading = Bagian outline = Daftar Isi raw = Kode page = halaman +# footnote = diff --git a/crates/typst-library/translations/is.txt b/crates/typst-library/translations/is.txt index 756c97700..b1bc8922a 100644 --- a/crates/typst-library/translations/is.txt +++ b/crates/typst-library/translations/is.txt @@ -6,3 +6,4 @@ heading = Kafli outline = Efnisyfirlit raw = Sýnishorn page = blaðsíða +# footnote = diff --git a/crates/typst-library/translations/it.txt b/crates/typst-library/translations/it.txt index 9f3c352db..9e282b0ff 100644 --- a/crates/typst-library/translations/it.txt +++ b/crates/typst-library/translations/it.txt @@ -6,3 +6,4 @@ heading = Sezione outline = Indice raw = Codice page = pag. +# footnote = diff --git a/crates/typst-library/translations/ja.txt b/crates/typst-library/translations/ja.txt index 484b20a62..8c01fb122 100644 --- a/crates/typst-library/translations/ja.txt +++ b/crates/typst-library/translations/ja.txt @@ -6,3 +6,4 @@ heading = 節 outline = 目次 raw = リスト page = ページ +# footnote = diff --git a/crates/typst-library/translations/la.txt b/crates/typst-library/translations/la.txt index d25517c2f..90912bf1a 100644 --- a/crates/typst-library/translations/la.txt +++ b/crates/typst-library/translations/la.txt @@ -6,3 +6,4 @@ heading = Caput outline = Index capitum raw = Exemplum page = charta +# footnote = diff --git a/crates/typst-library/translations/lv.txt b/crates/typst-library/translations/lv.txt index 4c6b86841..8d436fce1 100644 --- a/crates/typst-library/translations/lv.txt +++ b/crates/typst-library/translations/lv.txt @@ -6,3 +6,4 @@ heading = Sadaļa outline = Saturs raw = Saraksts page = lpp. +# footnote = diff --git a/crates/typst-library/translations/nb.txt b/crates/typst-library/translations/nb.txt index edf66b53f..0d718fd27 100644 --- a/crates/typst-library/translations/nb.txt +++ b/crates/typst-library/translations/nb.txt @@ -6,3 +6,4 @@ heading = Kapittel outline = Innhold raw = Utskrift page = side +# footnote = diff --git a/crates/typst-library/translations/nl.txt b/crates/typst-library/translations/nl.txt index 24b8315f0..d707031b5 100644 --- a/crates/typst-library/translations/nl.txt +++ b/crates/typst-library/translations/nl.txt @@ -6,3 +6,4 @@ heading = Hoofdstuk outline = Inhoudsopgave raw = Listing page = pagina +# footnote = diff --git a/crates/typst-library/translations/nn.txt b/crates/typst-library/translations/nn.txt index 2c2a27a76..7ccaae1cd 100644 --- a/crates/typst-library/translations/nn.txt +++ b/crates/typst-library/translations/nn.txt @@ -6,3 +6,4 @@ heading = Kapittel outline = Innhald raw = Utskrift page = side +# footnote = diff --git a/crates/typst-library/translations/pl.txt b/crates/typst-library/translations/pl.txt index cc8f4b36b..31a392b0d 100644 --- a/crates/typst-library/translations/pl.txt +++ b/crates/typst-library/translations/pl.txt @@ -6,3 +6,4 @@ heading = Sekcja outline = Spis treści raw = Program page = strona +# footnote = diff --git a/crates/typst-library/translations/pt-PT.txt b/crates/typst-library/translations/pt-PT.txt index 1d68ab858..56929b488 100644 --- a/crates/typst-library/translations/pt-PT.txt +++ b/crates/typst-library/translations/pt-PT.txt @@ -6,3 +6,4 @@ heading = Secção outline = Índice # raw = page = página +# footnote = diff --git a/crates/typst-library/translations/pt.txt b/crates/typst-library/translations/pt.txt index 398a75f37..3a579c73d 100644 --- a/crates/typst-library/translations/pt.txt +++ b/crates/typst-library/translations/pt.txt @@ -6,3 +6,4 @@ heading = Seção outline = Sumário raw = Listagem page = página +# footnote = diff --git a/crates/typst-library/translations/ro.txt b/crates/typst-library/translations/ro.txt index f5d44f726..89962e76a 100644 --- a/crates/typst-library/translations/ro.txt +++ b/crates/typst-library/translations/ro.txt @@ -7,3 +7,4 @@ outline = Cuprins # may be wrong raw = Listă page = pagina +# footnote = diff --git a/crates/typst-library/translations/ru.txt b/crates/typst-library/translations/ru.txt index 49cb34cb1..a9fab2548 100644 --- a/crates/typst-library/translations/ru.txt +++ b/crates/typst-library/translations/ru.txt @@ -6,3 +6,4 @@ heading = Раздел outline = Содержание raw = Листинг page = с. +# footnote = diff --git a/crates/typst-library/translations/sl.txt b/crates/typst-library/translations/sl.txt index 4c8a568ce..743639b7a 100644 --- a/crates/typst-library/translations/sl.txt +++ b/crates/typst-library/translations/sl.txt @@ -6,3 +6,4 @@ heading = Poglavje outline = Kazalo raw = Program page = stran +# footnote = diff --git a/crates/typst-library/translations/sq.txt b/crates/typst-library/translations/sq.txt index 11ba53212..37c0b2e51 100644 --- a/crates/typst-library/translations/sq.txt +++ b/crates/typst-library/translations/sq.txt @@ -6,3 +6,4 @@ heading = Kapitull outline = Përmbajtja raw = List page = faqe +# footnote = diff --git a/crates/typst-library/translations/sr.txt b/crates/typst-library/translations/sr.txt index e4e8f1272..2b6ee4021 100644 --- a/crates/typst-library/translations/sr.txt +++ b/crates/typst-library/translations/sr.txt @@ -6,3 +6,4 @@ heading = Поглавље outline = Садржај raw = Програм page = страна +# footnote = diff --git a/crates/typst-library/translations/sv.txt b/crates/typst-library/translations/sv.txt index 538f466b0..6ae8a582c 100644 --- a/crates/typst-library/translations/sv.txt +++ b/crates/typst-library/translations/sv.txt @@ -6,3 +6,4 @@ heading = Avsnitt outline = Innehåll raw = Kodlistning page = sida +# footnote = diff --git a/crates/typst-library/translations/tl.txt b/crates/typst-library/translations/tl.txt index 39cff5e36..e269d0289 100644 --- a/crates/typst-library/translations/tl.txt +++ b/crates/typst-library/translations/tl.txt @@ -6,3 +6,4 @@ heading = Seksyon outline = Talaan ng mga Nilalaman raw = Listahan # page = +# footnote = diff --git a/crates/typst-library/translations/tr.txt b/crates/typst-library/translations/tr.txt index f6e2cfe29..3e9b48675 100644 --- a/crates/typst-library/translations/tr.txt +++ b/crates/typst-library/translations/tr.txt @@ -6,3 +6,4 @@ heading = Bölüm outline = İçindekiler raw = Liste page = sayfa +# footnote = diff --git a/crates/typst-library/translations/uk.txt b/crates/typst-library/translations/uk.txt index 4794c3311..e87214bf5 100644 --- a/crates/typst-library/translations/uk.txt +++ b/crates/typst-library/translations/uk.txt @@ -6,3 +6,4 @@ heading = Розділ outline = Зміст raw = Лістинг page = c. +# footnote = diff --git a/crates/typst-library/translations/vi.txt b/crates/typst-library/translations/vi.txt index 8ccfdf02f..7b4aabfa6 100644 --- a/crates/typst-library/translations/vi.txt +++ b/crates/typst-library/translations/vi.txt @@ -7,3 +7,4 @@ outline = Mục lục # may be wrong raw = Chương trình page = trang +# footnote = diff --git a/crates/typst-library/translations/zh-TW.txt b/crates/typst-library/translations/zh-TW.txt index 4407f323e..e88753718 100644 --- a/crates/typst-library/translations/zh-TW.txt +++ b/crates/typst-library/translations/zh-TW.txt @@ -6,3 +6,4 @@ heading = 小節 outline = 目錄 raw = 程式 # page = +# footnote = diff --git a/crates/typst-library/translations/zh.txt b/crates/typst-library/translations/zh.txt index 32dc40107..a6f523ea0 100644 --- a/crates/typst-library/translations/zh.txt +++ b/crates/typst-library/translations/zh.txt @@ -6,3 +6,4 @@ heading = 小节 outline = 目录 raw = 代码 # page = +# footnote = From bc09df0c8b8eb939e01fa37ea3dae0bc15ab0790 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 16 Jul 2025 14:49:38 +0200 Subject: [PATCH 68/76] feat: insert footnotes after the reference in the reading order --- crates/typst-pdf/src/tags/mod.rs | 54 +++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 2ae06c22f..664c19477 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -1,4 +1,5 @@ use std::cell::OnceCell; +use std::collections::HashMap; use std::num::NonZeroU32; use std::ops::{Deref, DerefMut}; @@ -18,8 +19,8 @@ use typst_library::foundations::{ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ - Destination, EnumElem, FigureCaption, FigureElem, FootnoteEntry, HeadingElem, - ListElem, Outlinable, OutlineEntry, TableCell, TableElem, TermsElem, + Destination, EnumElem, FigureCaption, FigureElem, FootnoteElem, FootnoteEntry, + HeadingElem, ListElem, Outlinable, OutlineEntry, TableCell, TableElem, TermsElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind}; use typst_library::visualize::ImageElem; @@ -142,8 +143,13 @@ pub(crate) fn handle_start( let link_id = gc.tags.next_link_id(); push_stack(gc, loc, StackEntryKind::Link(link_id, link.clone()))?; return Ok(()); - } else if let Some(_) = elem.to_packed::() { - TagKind::Note.into() + } else if let Some(_) = elem.to_packed::() { + push_stack(gc, loc, StackEntryKind::FootNoteRef)?; + return Ok(()); + } else if let Some(entry) = elem.to_packed::() { + let footnote_loc = entry.note.location().unwrap(); + push_stack(gc, loc, StackEntryKind::FootNoteEntry(footnote_loc))?; + return Ok(()); } else { return Ok(()); }; @@ -217,6 +223,19 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc } node } + StackEntryKind::FootNoteRef => { + // transparently inset all children. + gc.tags.extend(entry.nodes); + gc.tags.push(TagNode::FootnoteEntry(loc)); + return; + } + StackEntryKind::FootNoteEntry(footnote_loc) => { + // Store footnotes separately so they can be inserted directly after + // the footnote reference in the reading order. + let tag = TagNode::Group(TagKind::Note.into(), entry.nodes); + gc.tags.footnotes.insert(footnote_loc, tag); + return; + } }; gc.tags.push(node); @@ -294,6 +313,11 @@ pub(crate) struct Tags { pub(crate) stack: TagStack, /// A list of placeholders corresponding to a [`TagNode::Placeholder`]. pub(crate) placeholders: Placeholders, + /// Footnotes are inserted directly after the footenote reference in the + /// reading order. Because of some layouting bugs, the entry might appear + /// before the reference in the text, so we only resolve them once tags + /// for the whole document are generated. + pub(crate) footnotes: HashMap, pub(crate) in_artifact: Option<(Location, ArtifactKind)>, /// Used to group multiple link annotations using quad points. pub(crate) link_id: LinkId, @@ -310,11 +334,13 @@ impl Tags { Self { stack: TagStack(Vec::new()), placeholders: Placeholders(Vec::new()), + footnotes: HashMap::new(), in_artifact: None, - tree: Vec::new(), link_id: LinkId(0), table_id: TableId(0), + + tree: Vec::new(), } } @@ -326,6 +352,14 @@ impl Tags { } } + pub(crate) fn extend(&mut self, nodes: impl IntoIterator) { + if let Some(entry) = self.stack.last_mut() { + entry.nodes.extend(nodes); + } else { + self.tree.extend(nodes); + } + } + pub(crate) fn build_tree(&mut self) -> TagTree { let children = std::mem::take(&mut self.tree) .into_iter() @@ -346,6 +380,10 @@ impl Tags { } TagNode::Leaf(identifier) => Node::Leaf(identifier), TagNode::Placeholder(placeholder) => self.placeholders.take(placeholder), + TagNode::FootnoteEntry(loc) => { + let node = self.footnotes.remove(&loc).expect("footnote"); + self.resolve_node(node) + } } } @@ -458,6 +496,11 @@ pub(crate) enum StackEntryKind { ListItemLabel, ListItemBody, Link(LinkId, Packed), + /// The footnote reference in the text. + FootNoteRef, + /// The footnote entry at the end of the page. Contains the [`Location`] of + /// the [`FootnoteElem`](typst_library::model::FootnoteElem). + FootNoteEntry(Location), } impl StackEntryKind { @@ -509,6 +552,7 @@ pub(crate) enum TagNode { /// Allows inserting a placeholder into the tag tree. /// Currently used for [`krilla::page::Page::add_tagged_annotation`]. Placeholder(Placeholder), + FootnoteEntry(Location), } #[derive(Clone, Copy, Debug, Eq, PartialEq)] From 66ca4dc9a06448a0dabaaad13a566dc639342e08 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 16 Jul 2025 12:33:16 +0200 Subject: [PATCH 69/76] feat: generate tags for quotes --- crates/typst-pdf/src/tags/mod.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 664c19477..c2555abd8 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -20,7 +20,8 @@ use typst_library::introspection::Location; use typst_library::layout::RepeatElem; use typst_library::model::{ Destination, EnumElem, FigureCaption, FigureElem, FootnoteElem, FootnoteEntry, - HeadingElem, ListElem, Outlinable, OutlineEntry, TableCell, TableElem, TermsElem, + HeadingElem, ListElem, Outlinable, OutlineEntry, QuoteElem, TableCell, TableElem, + TermsElem, }; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind}; use typst_library::visualize::ImageElem; @@ -150,6 +151,13 @@ pub(crate) fn handle_start( let footnote_loc = entry.note.location().unwrap(); push_stack(gc, loc, StackEntryKind::FootNoteEntry(footnote_loc))?; return Ok(()); + } else if let Some(quote) = elem.to_packed::() { + // TODO: should the attribution be handled somehow? + if quote.block.get(StyleChain::default()) { + TagKind::BlockQuote.into() + } else { + TagKind::InlineQuote.into() + } } else { return Ok(()); }; From 8d2c8712d5ab7db546e6e6fd940593d55989f134 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Wed, 16 Jul 2025 16:34:42 +0200 Subject: [PATCH 70/76] feat: wrap equations in Formula tags --- crates/typst-pdf/src/tags/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index c2555abd8..702f4b763 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -18,6 +18,7 @@ use typst_library::foundations::{ }; use typst_library::introspection::Location; use typst_library::layout::RepeatElem; +use typst_library::math::EquationElem; use typst_library::model::{ Destination, EnumElem, FigureCaption, FigureElem, FootnoteElem, FootnoteEntry, HeadingElem, ListElem, Outlinable, OutlineEntry, QuoteElem, TableCell, TableElem, @@ -112,6 +113,9 @@ pub(crate) fn handle_start( } else { TagKind::Figure.with_alt_text(alt) } + } else if let Some(_) = elem.to_packed::() { + // TODO: alt text + TagKind::Formula.into() } else if let Some(table) = elem.to_packed::() { let table_id = gc.tags.next_table_id(); let summary = table.summary.get_as_ref().map(|s| s.to_string()); From 0bd0dc6d92647a30a2c84b5530a341bd47d67c83 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 17 Jul 2025 16:10:30 +0200 Subject: [PATCH 71/76] feat: generate tags for bibliographies --- crates/typst-layout/src/rules.rs | 38 ++++++++++--------- crates/typst-library/src/pdf/accessibility.rs | 31 +++++++++++---- crates/typst-pdf/src/tags/list.rs | 14 +++++++ crates/typst-pdf/src/tags/mod.rs | 16 ++++++++ 4 files changed, 74 insertions(+), 25 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 09c247e9a..829a402d3 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -23,7 +23,7 @@ use typst_library::model::{ LinkElem, ListElem, Outlinable, OutlineElem, OutlineEntry, ParElem, ParbreakElem, QuoteElem, RefElem, StrongElem, TableCell, TableElem, TermsElem, Works, }; -use typst_library::pdf::{ArtifactElem, EmbedElem, PdfMarkerTag, PdfMarkerTagKind}; +use typst_library::pdf::{ArtifactElem, EmbedElem, PdfMarkerTag}; use typst_library::text::{ DecoLine, Decoration, HighlightElem, ItalicToggle, LinebreakElem, LocalName, OverlineElem, RawElem, RawLine, ScriptKind, ShiftSettings, Smallcaps, SmallcapsElem, @@ -452,10 +452,7 @@ const OUTLINE_RULE: ShowFn = |elem, engine, styles| { } // Wrap the entries into a marker for pdf tagging. - seq.push( - PdfMarkerTag::new(PdfMarkerTagKind::OutlineBody, Content::sequence(entries)) - .pack(), - ); + seq.push(PdfMarkerTag::OutlineBody(Content::sequence(entries))); Ok(Content::sequence(seq)) }; @@ -543,25 +540,29 @@ const BIBLIOGRAPHY_RULE: ShowFn = |elem, engine, styles| { let mut cells = vec![]; for (prefix, reference) in references { + let prefix = PdfMarkerTag::ListItemLabel(prefix.clone().unwrap_or_default()); cells.push(GridChild::Item(GridItem::Cell( - Packed::new(GridCell::new(prefix.clone().unwrap_or_default())) - .spanned(span), + Packed::new(GridCell::new(prefix)).spanned(span), ))); + + let reference = PdfMarkerTag::BibEntry(reference.clone()); cells.push(GridChild::Item(GridItem::Cell( - Packed::new(GridCell::new(reference.clone())).spanned(span), + Packed::new(GridCell::new(reference)).spanned(span), ))); } - seq.push( - GridElem::new(cells) - .with_columns(TrackSizings(smallvec![Sizing::Auto; 2])) - .with_column_gutter(TrackSizings(smallvec![COLUMN_GUTTER.into()])) - .with_row_gutter(TrackSizings(smallvec![row_gutter.into()])) - .pack() - .spanned(span), - ); + + let grid = GridElem::new(cells) + .with_columns(TrackSizings(smallvec![Sizing::Auto; 2])) + .with_column_gutter(TrackSizings(smallvec![COLUMN_GUTTER.into()])) + .with_row_gutter(TrackSizings(smallvec![row_gutter.into()])) + .pack() + .spanned(span); + // TODO(accessibility): infer list numbering from style? + seq.push(PdfMarkerTag::Bibliography(true, grid)); } else { + let mut body = vec![]; for (_, reference) in references { - let realized = reference.clone(); + let realized = PdfMarkerTag::BibEntry(reference.clone()); let block = if works.hanging_indent { let body = HElem::new((-INDENT).into()).pack() + realized; let inset = Sides::default() @@ -573,8 +574,9 @@ const BIBLIOGRAPHY_RULE: ShowFn = |elem, engine, styles| { BlockElem::new().with_body(Some(BlockBody::Content(realized))) }; - seq.push(block.pack().spanned(span)); + body.push(block.pack().spanned(span)); } + seq.push(PdfMarkerTag::Bibliography(false, Content::sequence(body))); } Ok(Content::sequence(seq)) diff --git a/crates/typst-library/src/pdf/accessibility.rs b/crates/typst-library/src/pdf/accessibility.rs index 142e7ff50..523b626a1 100644 --- a/crates/typst-library/src/pdf/accessibility.rs +++ b/crates/typst-library/src/pdf/accessibility.rs @@ -3,7 +3,10 @@ use std::num::NonZeroU32; use typst_macros::{elem, func, Cast}; use typst_utils::NonZeroExt; -use crate::foundations::{Content, NativeElement, Smart}; +use crate::diag::bail; +use crate::diag::SourceResult; +use crate::engine::Engine; +use crate::foundations::{Args, Construct, Content, NativeElement, Smart}; use crate::introspection::Locatable; use crate::model::TableCell; @@ -99,21 +102,28 @@ impl TableHeaderScope { } // Used to delimit content for tagged PDF. -#[elem(Locatable)] +#[elem(Locatable, Construct)] pub struct PdfMarkerTag { + #[internal] #[required] pub kind: PdfMarkerTagKind, #[required] pub body: Content, } +impl Construct for PdfMarkerTag { + fn construct(_: &mut Engine, args: &mut Args) -> SourceResult { + bail!(args.span, "cannot be constructed manually"); + } +} + macro_rules! pdf_marker_tag { - ($(#[doc = $doc:expr] $variant:ident,)+) => { - #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Cast)] + ($(#[doc = $doc:expr] $variant:ident$(($($name:ident: $ty:ident)+))?,)+) => { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum PdfMarkerTagKind { $( #[doc = $doc] - $variant + $variant $(($($ty),+))? ),+ } @@ -121,9 +131,12 @@ macro_rules! pdf_marker_tag { $( #[doc = $doc] #[allow(non_snake_case)] - pub fn $variant(body: Content) -> Content { + pub fn $variant($($($name: $ty,)+)? body: Content) -> Content { let span = body.span(); - Self::new(PdfMarkerTagKind::$variant, body).pack().spanned(span) + Self { + kind: PdfMarkerTagKind::$variant $(($($name),+))?, + body, + }.pack().spanned(span) } )+ } @@ -135,6 +148,10 @@ pdf_marker_tag! { OutlineBody, /// `Figure` FigureBody, + /// `L` bibliography list + Bibliography(numbered: bool), + /// `LBody` wrapping `BibEntry` + BibEntry, /// `Lbl` (marker) of the list item ListItemLabel, /// `LBody` of the enum item diff --git a/crates/typst-pdf/src/tags/list.rs b/crates/typst-pdf/src/tags/list.rs index 4046cdcee..ce18fcd2f 100644 --- a/crates/typst-pdf/src/tags/list.rs +++ b/crates/typst-pdf/src/tags/list.rs @@ -69,6 +69,20 @@ impl ListCtx { item.body = Some(nodes); } + pub(crate) fn push_bib_entry(&mut self, nodes: Vec) { + let nodes = vec![TagNode::Group(TagKind::BibEntry.into(), nodes)]; + // Bibliography lists cannot be nested, but may be missing labels. + if let Some(item) = self.items.last_mut().filter(|item| item.body.is_none()) { + item.body = Some(nodes); + } else { + self.items.push(ListItem { + label: Vec::new(), + body: Some(nodes), + sub_list: None, + }); + } + } + pub(crate) fn build_list(self, mut nodes: Vec) -> TagNode { for item in self.items.into_iter() { nodes.push(TagNode::Group( diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 702f4b763..18caae9ed 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -65,6 +65,16 @@ pub(crate) fn handle_start( return Ok(()); } PdfMarkerTagKind::FigureBody => TagKind::Figure.into(), + PdfMarkerTagKind::Bibliography(numbered) => { + let numbering = + if numbered { ListNumbering::Decimal } else { ListNumbering::None }; + push_stack(gc, loc, StackEntryKind::List(ListCtx::new(numbering)))?; + return Ok(()); + } + PdfMarkerTagKind::BibEntry => { + push_stack(gc, loc, StackEntryKind::BibEntry)?; + return Ok(()); + } PdfMarkerTagKind::ListItemLabel => { push_stack(gc, loc, StackEntryKind::ListItemLabel)?; return Ok(()); @@ -225,6 +235,11 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc list_ctx.push_body(entry.nodes); return; } + StackEntryKind::BibEntry => { + let list_ctx = gc.tags.stack.parent_list().expect("parent list"); + list_ctx.push_bib_entry(entry.nodes); + return; + } StackEntryKind::Link(_, link) => { let alt = link.alt.as_ref().map(EcoString::to_string); let tag = TagKind::Link.with_alt_text(alt); @@ -507,6 +522,7 @@ pub(crate) enum StackEntryKind { List(ListCtx), ListItemLabel, ListItemBody, + BibEntry, Link(LinkId, Packed), /// The footnote reference in the text. FootNoteRef, From f8f900d40b731a88f451075ddd641cec3ab07555 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 17 Jul 2025 16:51:10 +0200 Subject: [PATCH 72/76] feat: update krilla bounding boxes for links are now automatically generated by krilla --- Cargo.lock | 4 +- crates/typst-pdf/src/convert.rs | 16 +++----- crates/typst-pdf/src/link.rs | 70 +++++++------------------------- crates/typst-pdf/src/tags/mod.rs | 5 +-- 4 files changed, 25 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7295c6d04..0301b4324 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1373,7 +1373,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/LaurenzV/krilla?branch=main#c0a456829bb63212470a6fa29d604dd9e051a9bd" +source = "git+https://github.com/LaurenzV/krilla?branch=main#d40f81a01ca8f8654510a76effeef12518437800" dependencies = [ "base64", "bumpalo", @@ -1402,7 +1402,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/LaurenzV/krilla?branch=main#c0a456829bb63212470a6fa29d604dd9e051a9bd" +source = "git+https://github.com/LaurenzV/krilla?branch=main#d40f81a01ca8f8654510a76effeef12518437800" dependencies = [ "flate2", "fontdb", diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 09f3c2c5f..d826ae668 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -373,6 +373,12 @@ fn finish( .collect::>(); Err(errors) } + KrillaError::DuplicateTagId(_, _) => { + unreachable!("duplicate IDs shouldn't be generated") + } + KrillaError::UnknownTagId(_, _) => { + unreachable!("all referenced IDs should be present in the tag tree") + } KrillaError::Image(_, loc) => { let span = to_span(loc); bail!(span, "failed to process image"); @@ -597,16 +603,6 @@ fn convert_error( "{prefix} missing document date"; hint: "set the date of the document" ), - ValidationError::DuplicateTagId(_id, loc) => { - // TODO: display the id and better error message - let span = to_span(*loc); - error!(span, "{prefix} duplicate tag id") - } - ValidationError::UnknownTagId(_id, loc) => { - // TODO: display the id and better error message - let span = to_span(*loc); - error!(span, "{prefix} unknown header tag id") - } } } diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index d225e6d57..7ce630516 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -4,7 +4,7 @@ use krilla::annotation::Target; use krilla::configure::Validator; use krilla::destination::XyzDestination; use krilla::geom as kg; -use typst_library::layout::{Abs, Point, Position, Size}; +use typst_library::layout::{Point, Position, Size}; use typst_library::model::Destination; use crate::convert::{FrameContext, GlobalContext}; @@ -15,8 +15,7 @@ pub(crate) struct LinkAnnotation { pub(crate) id: tags::LinkId, pub(crate) placeholder: Placeholder, pub(crate) alt: Option, - pub(crate) rect: kg::Rect, - pub(crate) quad_points: Vec, + pub(crate) quad_points: Vec, pub(crate) target: Target, } @@ -54,27 +53,21 @@ pub(crate) fn handle_link( }; let alt = link.alt.as_ref().map(EcoString::to_string); - let rect = to_rect(fc, size); - let quadpoints = quadpoints(rect); + let quad = to_quadrilateral(fc, size); // Unfortunately quadpoints still aren't well supported by most PDF readers, // even by acrobat. Which is understandable since they were only introduced // in PDF 1.6 (2005) /s let should_use_quadpoints = gc.options.standards.config.validator() == Validator::UA1; match fc.get_link_annotation(link_id) { - Some(annotation) if should_use_quadpoints => { - // Update the bounding box and add the quadpoints to an existing link annotation. - annotation.rect = bounding_rect(annotation.rect, rect); - annotation.quad_points.extend_from_slice(&quadpoints); - } + Some(annotation) if should_use_quadpoints => annotation.quad_points.push(quad), _ => { let placeholder = gc.tags.placeholders.reserve(); link_nodes.push(TagNode::Placeholder(placeholder)); fc.push_link_annotation(LinkAnnotation { id: link_id, placeholder, - rect, - quad_points: quadpoints.to_vec(), + quad_points: vec![quad], alt, target, }); @@ -82,53 +75,20 @@ pub(crate) fn handle_link( } } -// Compute the bounding box of the transformed link. -fn to_rect(fc: &FrameContext, size: Size) -> kg::Rect { - let mut min_x = Abs::inf(); - let mut min_y = Abs::inf(); - let mut max_x = -Abs::inf(); - let mut max_y = -Abs::inf(); - +/// Compute the quadrilateral representing the transformed rectangle of this frame. +fn to_quadrilateral(fc: &FrameContext, size: Size) -> kg::Quadrilateral { let pos = Point::zero(); - - for point in [ - pos, - pos + Point::with_x(size.x), + let points = [ pos + Point::with_y(size.y), pos + size.to_point(), - ] { - let t = point.transform(fc.state().transform()); - min_x.set_min(t.x); - min_y.set_min(t.y); - max_x.set_max(t.x); - max_y.set_max(t.y); - } + pos + Point::with_x(size.x), + pos, + ]; - let x1 = min_x.to_f32(); - let x2 = max_x.to_f32(); - let y1 = min_y.to_f32(); - let y2 = max_y.to_f32(); - - kg::Rect::from_ltrb(x1, y1, x2, y2).unwrap() -} - -fn bounding_rect(a: kg::Rect, b: kg::Rect) -> kg::Rect { - kg::Rect::from_ltrb( - a.left().min(b.left()), - a.top().min(b.top()), - a.right().max(b.right()), - a.bottom().max(b.bottom()), - ) - .unwrap() -} - -fn quadpoints(rect: kg::Rect) -> [kg::Point; 4] { - [ - kg::Point::from_xy(rect.left(), rect.bottom()), - kg::Point::from_xy(rect.right(), rect.bottom()), - kg::Point::from_xy(rect.right(), rect.top()), - kg::Point::from_xy(rect.left(), rect.top()), - ] + kg::Quadrilateral(points.map(|point| { + let p = point.transform(fc.state().transform()); + kg::Point::from_xy(p.x.to_f32(), p.y.to_f32()) + })) } fn pos_to_target(gc: &mut GlobalContext, pos: Position) -> Option { diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 18caae9ed..858844a57 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -324,10 +324,9 @@ pub(crate) fn add_annotations( annotations: Vec, ) { for annotation in annotations.into_iter() { - let LinkAnnotation { id: _, placeholder, alt, rect, quad_points, target } = - annotation; + let LinkAnnotation { id: _, placeholder, alt, quad_points, target } = annotation; let annot = krilla::annotation::Annotation::new_link( - krilla::annotation::LinkAnnotation::new(rect, Some(quad_points), target), + krilla::annotation::LinkAnnotation::new_with_quad_points(quad_points, target), alt, ); let annot_id = page.add_tagged_annotation(annot); From 79423f3033d276aca7790bd3a5614a8dd9063c33 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 17 Jul 2025 17:31:46 +0200 Subject: [PATCH 73/76] refactor: revert some changes to main --- crates/typst-layout/src/flow/distribute.rs | 2 +- crates/typst-render/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/typst-layout/src/flow/distribute.rs b/crates/typst-layout/src/flow/distribute.rs index 108a8d651..f504d22e7 100644 --- a/crates/typst-layout/src/flow/distribute.rs +++ b/crates/typst-layout/src/flow/distribute.rs @@ -93,7 +93,7 @@ impl Item<'_, '_> { Self::Frame(frame, _) => { frame.size().is_zero() && frame.items().all(|(_, item)| { - matches!(item, FrameItem::Link(..) | FrameItem::Tag(_)) + matches!(item, FrameItem::Link(_, _) | FrameItem::Tag(_)) }) } Self::Placed(_, placed) => !placed.float, diff --git a/crates/typst-render/src/lib.rs b/crates/typst-render/src/lib.rs index 744f2d117..2c57fe2db 100644 --- a/crates/typst-render/src/lib.rs +++ b/crates/typst-render/src/lib.rs @@ -167,7 +167,7 @@ fn render_frame(canvas: &mut sk::Pixmap, state: State, frame: &Frame) { FrameItem::Image(image, size, _) => { image::render_image(canvas, state.pre_translate(*pos), image, *size); } - FrameItem::Link(..) => {} + FrameItem::Link(_, _) => {} FrameItem::Tag(_) => {} } } From 99815f449ca712508b46d9c3eeef783c85337b05 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 18 Jul 2025 12:01:54 +0200 Subject: [PATCH 74/76] feat: best effort link alt text generation --- crates/typst-layout/src/rules.rs | 7 ++-- .../typst-library/src/model/bibliography.rs | 8 ++-- crates/typst-library/src/model/link.rs | 37 ++++++++++++++----- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/crates/typst-layout/src/rules.rs b/crates/typst-layout/src/rules.rs index 829a402d3..915751d9f 100644 --- a/crates/typst-layout/src/rules.rs +++ b/crates/typst-layout/src/rules.rs @@ -222,9 +222,10 @@ const LINK_MARKER_RULE: ShowFn = |elem, _, _| Ok(elem.body.clone()); const LINK_RULE: ShowFn = |elem, engine, styles| { let body = elem.body.clone(); let dest = elem.dest.resolve(engine.introspector).at(elem.span())?; - let url = || dest.as_url().map(|url| url.clone().into_inner()); - // TODO(accessibility): remove custom alt field and generate alt text - let alt = elem.alt.get_cloned(styles).or_else(url); + let alt = match elem.alt.get_cloned(styles) { + Some(alt) => Some(alt), + None => dest.alt_text(engine, styles)?, + }; Ok(body.linked(dest, alt)) }; diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index 4cc252f5b..529f1db40 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -795,8 +795,8 @@ impl<'a> Generator<'a> { renderer.display_elem_child(elem, &mut None, false)?; if let Some(location) = first_occurrences.get(item.key.as_str()) { let dest = Destination::Location(*location); - // TODO(accessibility): generate alt text - content = content.linked(dest, None); + let alt = content.plain_text(); + content = content.linked(dest, Some(alt)); } StrResult::Ok(content) }) @@ -931,8 +931,8 @@ impl ElemRenderer<'_> { if let Some(hayagriva::ElemMeta::Entry(i)) = elem.meta { if let Some(location) = (self.link)(i) { let dest = Destination::Location(location); - // TODO(accessibility): generate alt text - content = content.linked(dest, None); + let alt = content.plain_text(); + content = content.linked(dest, Some(alt)); } } diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index fe92ebd19..5d40a306e 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -1,15 +1,18 @@ use std::ops::Deref; +use std::str::FromStr; use comemo::Tracked; use ecow::{eco_format, EcoString}; -use crate::diag::{bail, StrResult}; +use crate::diag::{bail, SourceResult, StrResult}; +use crate::engine::Engine; use crate::foundations::{ cast, elem, Content, Label, Packed, Repr, ShowSet, Smart, StyleChain, Styles, }; -use crate::introspection::{Introspector, Locatable, Location}; -use crate::layout::Position; -use crate::text::TextElem; +use crate::introspection::{Counter, CounterKey, Introspector, Locatable, Location}; +use crate::layout::{PageElem, Position}; +use crate::model::NumberingPattern; +use crate::text::{LocalName, TextElem}; /// Links to a URL or a location in the document. /// @@ -216,12 +219,26 @@ pub enum Destination { } impl Destination { - pub fn as_url(&self) -> Option<&Url> { - if let Self::Url(v) = self { - Some(v) - } else { - None - } + pub fn alt_text( + &self, + engine: &mut Engine, + styles: StyleChain, + ) -> SourceResult> { + let alt = match self { + Destination::Url(url) => Some(url.clone().into_inner()), + Destination::Position(_) => None, + &Destination::Location(loc) => { + let numbering = loc + .page_numbering(engine) + .unwrap_or_else(|| NumberingPattern::from_str("1").unwrap().into()); + let content = Counter::new(CounterKey::Page) + .display_at_loc(engine, loc, styles, &numbering)?; + let page_nr = content.plain_text(); + let page_str = PageElem::local_name_in(styles); + Some(eco_format!("{page_str} {page_nr}")) + } + }; + Ok(alt) } } From d2105dcc35498e07b533701f5d64d177e1757af3 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 18 Jul 2025 15:39:19 +0200 Subject: [PATCH 75/76] feat: report spans for missing alt text and unknown/duplicate tag ids --- Cargo.lock | 4 +- .../src/foundations/content/mod.rs | 20 +++++-- crates/typst-pdf/src/convert.rs | 57 ++++++++++++++----- crates/typst-pdf/src/link.rs | 3 + crates/typst-pdf/src/tags/mod.rs | 13 +++-- crates/typst-pdf/src/tags/table.rs | 17 ++++-- 6 files changed, 85 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0301b4324..5d22e6dcc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1373,7 +1373,7 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/LaurenzV/krilla?branch=main#d40f81a01ca8f8654510a76effeef12518437800" +source = "git+https://github.com/LaurenzV/krilla?branch=main#32d070e737cd8ae4c3aa4ff901d15cb22bd052f3" dependencies = [ "base64", "bumpalo", @@ -1402,7 +1402,7 @@ dependencies = [ [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/LaurenzV/krilla?branch=main#d40f81a01ca8f8654510a76effeef12518437800" +source = "git+https://github.com/LaurenzV/krilla?branch=main#32d070e737cd8ae4c3aa4ff901d15cb22bd052f3" dependencies = [ "flate2", "fontdb", diff --git a/crates/typst-library/src/foundations/content/mod.rs b/crates/typst-library/src/foundations/content/mod.rs index 84ab0c00a..1fa54b8f3 100644 --- a/crates/typst-library/src/foundations/content/mod.rs +++ b/crates/typst-library/src/foundations/content/mod.rs @@ -23,10 +23,10 @@ use serde::{Serialize, Serializer}; use typst_syntax::Span; use typst_utils::singleton; -use crate::diag::{SourceResult, StrResult}; +use crate::diag::{bail, SourceResult, StrResult}; use crate::engine::Engine; use crate::foundations::{ - func, repr, scope, ty, Context, Dict, IntoValue, Label, Property, Recipe, + func, repr, scope, ty, Args, Context, Dict, IntoValue, Label, Property, Recipe, RecipeIndex, Repr, Selector, Str, Style, StyleChain, Styles, Value, }; use crate::introspection::{Locatable, Location}; @@ -479,7 +479,7 @@ impl Content { /// Link the content somewhere. pub fn linked(self, dest: Destination, alt: Option) -> Self { let span = self.span(); - LinkMarker::new(self, dest.clone(), alt) + LinkMarker::new(self, dest.clone(), alt, span) .pack() .spanned(span) .set(LinkElem::current, Some(dest)) @@ -785,15 +785,27 @@ impl Repr for StyledElem { } /// An element that associates the body of a link with the destination. -#[elem(Locatable)] +#[elem(Locatable, Construct)] pub struct LinkMarker { /// The content. + #[internal] #[required] pub body: Content, + #[internal] #[required] pub dest: Destination, + #[internal] #[required] pub alt: Option, + #[internal] + #[required] + pub span: Span, +} + +impl Construct for LinkMarker { + fn construct(_: &mut Engine, args: &mut Args) -> SourceResult { + bail!(args.span, "cannot be constructed manually"); + } } impl IntoValue for T { diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index d826ae668..b33406816 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -9,6 +9,7 @@ use krilla::error::KrillaError; use krilla::geom::PathBuilder; use krilla::page::{PageLabel, PageSettings}; use krilla::surface::Surface; +use krilla::tagging::TagId; use krilla::{Document, SerializeSettings}; use krilla_svg::render_svg_glyph; use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult}; @@ -373,11 +374,21 @@ fn finish( .collect::>(); Err(errors) } - KrillaError::DuplicateTagId(_, _) => { - unreachable!("duplicate IDs shouldn't be generated") + KrillaError::DuplicateTagId(id, loc) => { + let span = to_span(loc); + let id = display_tag_id(&id); + bail!( + span, "duplicate tag id `{id}`"; + hint: "please report this as a bug" + ) } - KrillaError::UnknownTagId(_, _) => { - unreachable!("all referenced IDs should be present in the tag tree") + KrillaError::UnknownTagId(id, loc) => { + let span = to_span(loc); + let id = display_tag_id(&id); + bail!( + span, "unknown tag id `{id}`"; + hint: "please report this as a bug" + ) } KrillaError::Image(_, loc) => { let span = to_span(loc); @@ -394,6 +405,20 @@ fn finish( } } +fn display_tag_id(id: &TagId) -> impl std::fmt::Display + use<'_> { + typst_utils::display(|f| { + if let Ok(str) = std::str::from_utf8(id.as_bytes()) { + f.write_str(str) + } else { + f.write_str("0x")?; + for b in id.as_bytes() { + write!(f, "{b:x}")?; + } + Ok(()) + } + }) +} + /// Converts a krilla error into a Typst error. fn convert_error( gc: &GlobalContext, @@ -562,16 +587,20 @@ fn convert_error( } // The below errors cannot occur yet, only once Typst supports full PDF/A // and PDF/UA. But let's still add a message just to be on the safe side. - ValidationError::MissingAnnotationAltText => error!( - Span::detached(), - "{prefix} missing annotation alt text"; - hint: "please report this as a bug" - ), - ValidationError::MissingAltText => error!( - Span::detached(), - "{prefix} missing alt text"; - hint: "make sure your images and equations have alt text" - ), + ValidationError::MissingAnnotationAltText(loc) => { + let span = to_span(*loc); + error!( + span, "{prefix} missing annotation alt text"; + hint: "please report this as a bug" + ) + } + ValidationError::MissingAltText(loc) => { + let span = to_span(*loc); + error!( + span, "{prefix} missing alt text"; + hint: "make sure your images and equations have alt text" + ) + } ValidationError::NoDocumentLanguage => error!( Span::detached(), "{prefix} missing document language"; diff --git a/crates/typst-pdf/src/link.rs b/crates/typst-pdf/src/link.rs index 7ce630516..df1e926de 100644 --- a/crates/typst-pdf/src/link.rs +++ b/crates/typst-pdf/src/link.rs @@ -6,6 +6,7 @@ use krilla::destination::XyzDestination; use krilla::geom as kg; use typst_library::layout::{Point, Position, Size}; use typst_library::model::Destination; +use typst_syntax::Span; use crate::convert::{FrameContext, GlobalContext}; use crate::tags::{self, Placeholder, TagNode}; @@ -17,6 +18,7 @@ pub(crate) struct LinkAnnotation { pub(crate) alt: Option, pub(crate) quad_points: Vec, pub(crate) target: Target, + pub(crate) span: Span, } pub(crate) fn handle_link( @@ -70,6 +72,7 @@ pub(crate) fn handle_link( quad_points: vec![quad], alt, target, + span: link.span, }); } } diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index 858844a57..ef7b294d6 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -176,6 +176,7 @@ pub(crate) fn handle_start( return Ok(()); }; + let tag = tag.with_location(Some(elem.span().into_raw().get())); push_stack(gc, loc, StackEntryKind::Standard(tag))?; Ok(()) @@ -202,7 +203,8 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc // PDF/UA compliance of the structure hierarchy is checked // elsewhere. While this doesn't make a lot of sense, just // avoid crashing here. - let tag = TagKind::TOCI.into(); + let tag = TagKind::TOCI + .with_location(Some(outline_entry.span().into_raw().get())); gc.tags.push(TagNode::Group(tag, entry.nodes)); return; }; @@ -216,7 +218,8 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: Loc // PDF/UA compliance of the structure hierarchy is checked // elsewhere. While this doesn't make a lot of sense, just // avoid crashing here. - let tag = TagKind::TD(TableDataCell::new()).into(); + let tag = TagKind::TD(TableDataCell::new()) + .with_location(Some(cell.span().into_raw().get())); gc.tags.push(TagNode::Group(tag, entry.nodes)); return; }; @@ -324,11 +327,13 @@ pub(crate) fn add_annotations( annotations: Vec, ) { for annotation in annotations.into_iter() { - let LinkAnnotation { id: _, placeholder, alt, quad_points, target } = annotation; + let LinkAnnotation { id: _, placeholder, alt, quad_points, target, span } = + annotation; let annot = krilla::annotation::Annotation::new_link( krilla::annotation::LinkAnnotation::new_with_quad_points(quad_points, target), alt, - ); + ) + .with_location(Some(span.into_raw().get())); let annot_id = page.add_tagged_annotation(annot); gc.tags.placeholders.init(placeholder, Node::Leaf(annot_id)); } diff --git a/crates/typst-pdf/src/tags/table.rs b/crates/typst-pdf/src/tags/table.rs index 969440f2f..75128863f 100644 --- a/crates/typst-pdf/src/tags/table.rs +++ b/crates/typst-pdf/src/tags/table.rs @@ -9,6 +9,7 @@ use smallvec::SmallVec; use typst_library::foundations::{Packed, Smart, StyleChain}; use typst_library::model::TableCell; use typst_library::pdf::{TableCellKind, TableHeaderScope}; +use typst_syntax::Span; use crate::tags::{TableId, TagNode}; @@ -57,7 +58,7 @@ impl TableCtx { } } - pub(crate) fn insert(&mut self, cell: &TableCell, nodes: Vec) { + pub(crate) fn insert(&mut self, cell: &Packed, nodes: Vec) { let x = cell.x.get(StyleChain::default()).unwrap_or_else(|| unreachable!()); let y = cell.y.get(StyleChain::default()).unwrap_or_else(|| unreachable!()); let rowspan = cell.rowspan.get(StyleChain::default()); @@ -92,6 +93,7 @@ impl TableCtx { kind, headers: SmallVec::new(), nodes, + span: cell.span(), }); } @@ -175,13 +177,14 @@ impl TableCtx { .with_headers(cell.headers), ) .with_id(Some(id)) + .with_location(Some(cell.span.into_raw().get())) } TableCellKind::Footer | TableCellKind::Data => TagKind::TD( TableDataCell::new() .with_span(span) .with_headers(cell.headers), ) - .into(), + .with_location(Some(cell.span.into_raw().get())), }; Some(TagNode::Group(tag, cell.nodes)) }) @@ -296,6 +299,7 @@ struct TableCtxCell { kind: Smart, headers: SmallVec<[TagId; 1]>, nodes: Vec, + span: Span, } impl TableCtxCell { @@ -344,7 +348,7 @@ mod tests { fn table(cells: [TableCell; SIZE]) -> TableCtx { let mut table = TableCtx::new(TableId(324), Some("summary".into())); for cell in cells { - table.insert(&cell, Vec::new()); + table.insert(&Packed::new(cell), Vec::new()); } table } @@ -416,7 +420,9 @@ mod tests { let id = table_cell_id(TableId(324), x, y); let ids = headers.map(|(x, y)| table_cell_id(TableId(324), x, y)); TagNode::Group( - TagKind::TH(TableHeaderCell::new(scope).with_headers(ids)).with_id(Some(id)), + TagKind::TH(TableHeaderCell::new(scope).with_headers(ids)) + .with_id(Some(id)) + .with_location(Some(Span::detached().into_raw().get())), Vec::new(), ) } @@ -424,7 +430,8 @@ mod tests { fn td(headers: [(u32, u32); SIZE]) -> TagNode { let ids = headers.map(|(x, y)| table_cell_id(TableId(324), x, y)); TagNode::Group( - TagKind::TD(TableDataCell::new().with_headers(ids)).into(), + TagKind::TD(TableDataCell::new().with_headers(ids)) + .with_location(Some(Span::detached().into_raw().get())), Vec::new(), ) } From 9649def1081720d351def471cceb735fb594ea9e Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Fri, 18 Jul 2025 15:55:56 +0200 Subject: [PATCH 76/76] feat: add `alt` parameter to `math.equation` --- crates/typst-library/src/math/equation.rs | 4 ++++ crates/typst-library/src/model/link.rs | 2 +- crates/typst-library/src/visualize/image/mod.rs | 2 +- crates/typst-pdf/src/tags/mod.rs | 6 +++--- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/crates/typst-library/src/math/equation.rs b/crates/typst-library/src/math/equation.rs index db5253834..74550d210 100644 --- a/crates/typst-library/src/math/equation.rs +++ b/crates/typst-library/src/math/equation.rs @@ -1,6 +1,7 @@ use std::num::NonZeroUsize; use codex::styling::MathVariant; +use ecow::EcoString; use typst_utils::NonZeroExt; use unicode_math_class::MathClass; @@ -47,6 +48,9 @@ use crate::text::{FontFamily, FontList, FontWeight, LocalName, TextElem}; /// [main math page]($category/math). #[elem(Locatable, Synthesize, ShowSet, Count, LocalName, Refable, Outlinable)] pub struct EquationElem { + /// An alternative description of the mathematical equation. + pub alt: Option, + /// Whether the equation is displayed as a separate block. #[default(false)] pub block: bool, diff --git a/crates/typst-library/src/model/link.rs b/crates/typst-library/src/model/link.rs index 5d40a306e..6e7539059 100644 --- a/crates/typst-library/src/model/link.rs +++ b/crates/typst-library/src/model/link.rs @@ -88,7 +88,7 @@ use crate::text::{LocalName, TextElem}; /// generated. #[elem(Locatable)] pub struct LinkElem { - /// A text describing the link. + /// An alternative description of the link. pub alt: Option, /// The destination the link points to. diff --git a/crates/typst-library/src/visualize/image/mod.rs b/crates/typst-library/src/visualize/image/mod.rs index 379c25ba7..e7b952f75 100644 --- a/crates/typst-library/src/visualize/image/mod.rs +++ b/crates/typst-library/src/visualize/image/mod.rs @@ -124,7 +124,7 @@ pub struct ImageElem { /// The height of the image. pub height: Sizing, - /// A text describing the image. + /// An alternative description of the image. pub alt: Option, /// How the image should adjust itself to a given area (the area is defined diff --git a/crates/typst-pdf/src/tags/mod.rs b/crates/typst-pdf/src/tags/mod.rs index ef7b294d6..a00c76932 100644 --- a/crates/typst-pdf/src/tags/mod.rs +++ b/crates/typst-pdf/src/tags/mod.rs @@ -123,9 +123,9 @@ pub(crate) fn handle_start( } else { TagKind::Figure.with_alt_text(alt) } - } else if let Some(_) = elem.to_packed::() { - // TODO: alt text - TagKind::Formula.into() + } else if let Some(equation) = elem.to_packed::() { + let alt = equation.alt.get_as_ref().map(|s| s.to_string()); + TagKind::Formula.with_alt_text(alt) } else if let Some(table) = elem.to_packed::() { let table_id = gc.tags.next_table_id(); let summary = table.summary.get_as_ref().map(|s| s.to_string());