feat: [no ci] mark artifacts

This commit is contained in:
Tobias Schmitz 2025-06-02 12:02:31 +02:00
parent 9d788cd73d
commit c9bb9e8981
No known key found for this signature in database
7 changed files with 174 additions and 66 deletions

View File

@ -1,7 +1,10 @@
use typst_library::diag::SourceResult;
use typst_library::engine::Engine;
use typst_library::introspection::{ManualPageCounter, Tag};
use typst_library::layout::{Frame, FrameItem, Page, Point};
use typst_library::foundations::{Content, NativeElement};
use typst_library::introspection::{ManualPageCounter, SplitLocator, Tag};
use typst_library::layout::{
ArtifactKind, ArtifactMarker, Frame, FrameItem, Page, Point,
};
use super::LayoutedPage;
@ -10,6 +13,7 @@ use super::LayoutedPage;
/// physical page number, which is unknown during parallel layout.
pub fn finalize(
engine: &mut Engine,
locator: &mut SplitLocator,
counter: &mut ManualPageCounter,
tags: &mut Vec<Tag>,
LayoutedPage {
@ -45,10 +49,12 @@ pub fn finalize(
// important as it affects the relative ordering of introspectable elements
// and thus how counters resolve.
if let Some(background) = background {
frame.push_frame(Point::zero(), background);
let tag = ArtifactMarker::new(ArtifactKind::Page).pack();
push_tagged(engine, locator, &mut frame, Point::zero(), background, tag);
}
if let Some(header) = header {
frame.push_frame(Point::with_x(margin.left), header);
let tag = ArtifactMarker::new(ArtifactKind::Header).pack();
push_tagged(engine, locator, &mut frame, Point::with_x(margin.left), header, tag);
}
// Add the inner contents.
@ -57,7 +63,8 @@ pub fn finalize(
// Add the "after" marginals.
if let Some(footer) = footer {
let y = frame.height() - footer.height();
frame.push_frame(Point::new(margin.left, y), footer);
let tag = ArtifactMarker::new(ArtifactKind::Footer).pack();
push_tagged(engine, locator, &mut frame, Point::new(margin.left, y), footer, tag);
}
if let Some(foreground) = foreground {
frame.push_frame(Point::zero(), foreground);
@ -72,3 +79,25 @@ pub fn finalize(
Ok(Page { frame, fill, numbering, supplement, number })
}
fn push_tagged(
engine: &mut Engine,
locator: &mut SplitLocator,
frame: &mut Frame,
mut pos: Point,
inner: Frame,
mut tag: Content,
) {
// TODO: use general PDF Tagged/Artifact element that wraps some content and
// is also available to the user.
let key = typst_utils::hash128(&tag);
let loc = locator.next_location(engine.introspector, key);
tag.set_location(loc);
frame.push(pos, FrameItem::Tag(Tag::Start(tag)));
let height = inner.height();
frame.push_frame(pos, inner);
pos.y += height;
frame.push(pos, FrameItem::Tag(Tag::End(loc, key)));
}

View File

@ -123,17 +123,19 @@ fn layout_pages<'a>(
Item::Run(..) => {
let layouted = runs.next().unwrap()?;
for layouted in layouted {
let page = finalize(engine, &mut counter, &mut tags, layouted)?;
let page =
finalize(engine, locator, &mut counter, &mut tags, layouted)?;
pages.push(page);
}
}
Item::Parity(parity, initial, locator) => {
Item::Parity(parity, initial, page_locator) => {
if !parity.matches(pages.len()) {
continue;
}
let layouted = layout_blank_page(engine, locator.relayout(), *initial)?;
let page = finalize(engine, &mut counter, &mut tags, layouted)?;
let layouted =
layout_blank_page(engine, page_locator.relayout(), *initial)?;
let page = finalize(engine, locator, &mut counter, &mut tags, layouted)?;
pages.push(page);
}
Item::Tags(items) => {

View File

@ -185,8 +185,6 @@ fn layout_page_run_impl(
)?;
// Layouts a single marginal.
// TODO: add some sort of tag that indicates the marginals and use it to
// mark them as artifacts for PDF/UA.
let mut layout_marginal = |content: &Option<Content>, area, align| {
let Some(content) = content else { return Ok(None) };
let aligned = content.clone().styled(AlignElem::set_alignment(align));

View File

@ -10,7 +10,7 @@ use crate::foundations::{
cast, elem, Args, AutoValue, Cast, Construct, Content, Dict, Fold, NativeElement,
Set, Smart, Value,
};
use crate::introspection::Introspector;
use crate::introspection::{Introspector, Locatable};
use crate::layout::{
Abs, Alignment, FlushElem, Frame, HAlignment, Length, OuterVAlignment, Ratio, Rel,
Sides, SpecificAlignment,
@ -451,6 +451,28 @@ impl PagebreakElem {
}
}
// HACK: this should probably not be an element
#[derive(Copy)]
#[elem(Construct, Locatable)]
pub struct ArtifactMarker {
#[internal]
#[required]
pub kind: ArtifactKind,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ArtifactKind {
Header,
Footer,
Page,
}
impl Construct for ArtifactMarker {
fn construct(_: &mut Engine, args: &mut Args) -> SourceResult<Content> {
bail!(args.span, "cannot be constructed manually");
}
}
/// A finished document with metadata and page frames.
#[derive(Debug, Default, Clone)]
pub struct PagedDocument {

View File

@ -18,7 +18,8 @@ use crate::introspection::{
Counter, CounterKey, Introspector, Locatable, Location, Locator, LocatorLink,
};
use crate::layout::{
Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem, Region, Rel, RepeatElem, Sides
Abs, Axes, BlockBody, BlockElem, BoxElem, Dir, Em, Fr, HElem, Length, PageElem,
Region, Rel, RepeatElem, Sides,
};
use crate::math::EquationElem;
use crate::model::{Destination, HeadingElem, NumberingPattern, ParElem, Refable};
@ -430,9 +431,9 @@ impl Show for Packed<OutlineEntry> {
let body = body.plain_text();
let page_str = PageElem::local_name_in(styles);
let page_nr = page.plain_text();
eco_format!("{prefix} {body} {page_str} {page_nr}")
eco_format!("{prefix} \"{body}\", {page_str} {page_nr}")
};
let inner = self.inner(engine, context, span, body, page)?;
let inner = self.inner(context, span, body, page)?;
let block = if self.element.is::<EquationElem>() {
let body = prefix.unwrap_or_default() + inner;
BlockElem::new()
@ -578,7 +579,6 @@ impl OutlineEntry {
#[func(contextual)]
pub fn inner(
&self,
engine: &mut Engine,
context: Tracked<Context>,
span: Span,
body: Content,

View File

@ -10,7 +10,6 @@ use krilla::error::KrillaError;
use krilla::geom::PathBuilder;
use krilla::page::{PageLabel, PageSettings};
use krilla::surface::Surface;
use krilla::tagging::{ArtifactType, ContentTag, Node};
use krilla::{Document, SerializeSettings};
use krilla_svg::render_svg_glyph;
use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
@ -31,7 +30,7 @@ use crate::metadata::build_metadata;
use crate::outline::build_outline;
use crate::page::PageLabelExt;
use crate::shape::handle_shape;
use crate::tags::{handle_close_tag, handle_open_tag, Placeholder, TagNode, Tags};
use crate::tags::{self, Placeholder, Tags};
use crate::text::handle_text;
use crate::util::{convert_path, display_font, AbsExt, TransformExt};
use crate::PdfOptions;
@ -42,17 +41,15 @@ pub fn convert(
options: &PdfOptions,
) -> SourceResult<Vec<u8>> {
// HACK
// let config = Configuration::new();
let config = Configuration::new_with_validator(Validator::UA1);
let settings = SerializeSettings {
compress_content_streams: true,
compress_content_streams: false, // true,
no_device_cs: true,
ascii_compatible: false,
ascii_compatible: true, // false,
xmp_metadata: true,
cmyk_profile: None,
configuration: config,
// TODO: Should we just set this to false? If set to `false` this will
// automatically be enabled if the `UA1` validator is used.
configuration: config, // options.standards.config,
// TODO: allow opting out of tagging PDFs
enable_tagging: true,
render_svg_glyph_fn: render_svg_glyph,
};
@ -114,18 +111,7 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
let mut surface = page.surface();
let mut fc = FrameContext::new(typst_page.frame.size());
// Marked-content may not cross page boundaries: reopen tag
// that was closed at the end of the last page.
if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
let tag = if gc.tags.in_artifact {
ContentTag::Artifact(ArtifactType::Other)
} else {
ContentTag::Other
};
// TODO: somehow avoid empty marked-content sequences
let id = surface.start_tagged(tag);
nodes.push(TagNode::Leaf(id));
}
tags::restart(gc, &mut surface);
handle_frame(
&mut fc,
@ -135,17 +121,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
gc,
)?;
// Marked-content may not cross page boundaries: close open tag.
if !gc.tags.stack.is_empty() {
surface.end_tagged();
}
tags::end_open(gc, &mut surface);
surface.finish();
for (placeholder, annotation) in fc.annotations {
let annotation_id = page.add_tagged_annotation(annotation);
gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id));
}
tags::add_annotations(gc, &mut page, fc.annotations);
}
}
@ -318,10 +298,10 @@ pub(crate) fn handle_frame(
handle_link(fc, gc, alt.as_ref().map(EcoString::to_string), dest, *size)
}
FrameItem::Tag(introspection::Tag::Start(elem)) => {
handle_open_tag(gc, surface, elem)
tags::handle_start(gc, surface, elem)
}
FrameItem::Tag(introspection::Tag::End(loc, _)) => {
handle_close_tag(gc, surface, loc);
tags::handle_end(gc, surface, loc);
}
}

View File

@ -1,9 +1,15 @@
use std::cell::OnceCell;
use std::ops::Deref;
use krilla::annotation::Annotation;
use krilla::page::Page;
use krilla::surface::Surface;
use krilla::tagging::{ContentTag, Identifier, Node, Tag, TagGroup, TagTree};
use krilla::tagging::{
ArtifactType, ContentTag, Identifier, Node, Tag, TagGroup, TagTree,
};
use typst_library::foundations::{Content, StyleChain};
use typst_library::introspection::Location;
use typst_library::layout::{ArtifactKind, ArtifactMarker};
use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry};
use crate::convert::GlobalContext;
@ -12,7 +18,7 @@ pub(crate) struct Tags {
/// The intermediary stack of nested tag groups.
pub(crate) stack: Vec<(Location, Tag, Vec<TagNode>)>,
pub(crate) placeholders: Vec<OnceCell<Node>>,
pub(crate) in_artifact: bool,
pub(crate) in_artifact: Option<(Location, ArtifactMarker)>,
/// The output.
pub(crate) tree: Vec<TagNode>,
@ -34,7 +40,7 @@ impl Tags {
Self {
stack: Vec::new(),
placeholders: Vec::new(),
in_artifact: false,
in_artifact: None,
tree: Vec::new(),
}
@ -93,7 +99,16 @@ impl Tags {
}
}
pub(crate) fn context_supports(&self, tag: &Tag) -> bool {
/// Returns the current parent's list of children and whether it is the tree root.
fn parent_nodes(&mut self) -> (bool, &mut Vec<TagNode>) {
if let Some((_, _, parent_nodes)) = self.stack.last_mut() {
(false, parent_nodes)
} else {
(true, &mut self.tree)
}
}
fn context_supports(&self, tag: &Tag) -> bool {
let Some((_, parent, _)) = self.stack.last() else { return true };
use Tag::*;
@ -142,16 +157,57 @@ impl Tags {
}
}
pub(crate) fn handle_open_tag(
/// Marked-content may not cross page boundaries: restart tag that was still open
/// at the end of the last page.
pub(crate) fn restart(gc: &mut GlobalContext, surface: &mut Surface) {
// TODO: somehow avoid empty marked-content sequences
if let Some((_, marker)) = gc.tags.in_artifact {
start_artifact(gc, surface, marker.kind);
} else if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
let id = surface.start_tagged(ContentTag::Other);
nodes.push(TagNode::Leaf(id));
}
}
/// Marked-content may not cross page boundaries: end any open tag.
pub(crate) fn end_open(gc: &mut GlobalContext, surface: &mut Surface) {
if !gc.tags.stack.is_empty() || gc.tags.in_artifact.is_some() {
surface.end_tagged();
}
}
/// Add all annotations that were found in the page frame.
pub(crate) fn add_annotations(
gc: &mut GlobalContext,
page: &mut Page,
annotations: Vec<(Placeholder, Annotation)>,
) {
for (placeholder, annotation) in annotations {
let annotation_id = page.add_tagged_annotation(annotation);
gc.tags.init_placeholder(placeholder, Node::Leaf(annotation_id));
}
}
pub(crate) fn handle_start(
gc: &mut GlobalContext,
surface: &mut Surface,
elem: &Content,
) {
if gc.tags.in_artifact {
if gc.tags.in_artifact.is_some() {
// Don't nest artifacts
return;
}
let Some(loc) = elem.location() else { return };
let loc = elem.location().unwrap();
if let Some(marker) = elem.to_packed::<ArtifactMarker>() {
if !gc.tags.stack.is_empty() {
surface.end_tagged();
}
start_artifact(gc, surface, marker.kind);
gc.tags.in_artifact = Some((loc, *marker.deref()));
return;
}
let tag = if let Some(heading) = elem.to_packed::<HeadingElem>() {
let level = heading.resolve_level(StyleChain::default());
@ -181,29 +237,50 @@ pub(crate) fn handle_open_tag(
if !gc.tags.stack.is_empty() {
surface.end_tagged();
}
let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other);
gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(content_id)]));
let id = surface.start_tagged(krilla::tagging::ContentTag::Other);
gc.tags.stack.push((loc, tag, vec![TagNode::Leaf(id)]));
}
pub(crate) fn handle_close_tag(
gc: &mut GlobalContext,
surface: &mut Surface,
loc: &Location,
) {
pub(crate) fn handle_end(gc: &mut GlobalContext, surface: &mut Surface, loc: &Location) {
if let Some((l, _)) = &gc.tags.in_artifact {
if l == loc {
gc.tags.in_artifact = None;
surface.end_tagged();
if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
let id = surface.start_tagged(ContentTag::Other);
nodes.push(TagNode::Leaf(id));
}
}
return;
}
let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else {
return;
};
surface.end_tagged();
if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() {
parent_nodes.push(TagNode::Group(tag, nodes));
let (is_root, parent_nodes) = gc.tags.parent_nodes();
parent_nodes.push(TagNode::Group(tag, nodes));
if !is_root {
// TODO: somehow avoid empty marked-content sequences
let id = surface.start_tagged(ContentTag::Other);
parent_nodes.push(TagNode::Leaf(id));
} else {
gc.tags.tree.push(TagNode::Group(tag, nodes));
}
}
fn start_artifact(gc: &mut GlobalContext, surface: &mut Surface, kind: ArtifactKind) {
let ty = artifact_type(kind);
let id = surface.start_tagged(ContentTag::Artifact(ty));
let (_, parent_nodes) = gc.tags.parent_nodes();
parent_nodes.push(TagNode::Leaf(id));
}
fn artifact_type(kind: ArtifactKind) -> ArtifactType {
match kind {
ArtifactKind::Header => ArtifactType::Header,
ArtifactKind::Footer => ArtifactType::Footer,
ArtifactKind::Page => ArtifactType::Page,
}
}