mirror of
https://github.com/typst/typst
synced 2025-06-15 16:46:24 +08:00
feat: [WIP] write tags
skip-checks:true
This commit is contained in:
parent
7ac8f48afa
commit
8c861d2d27
@ -185,6 +185,8 @@ fn layout_page_run_impl(
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Layouts a single marginal.
|
// Layouts a single marginal.
|
||||||
|
// TODO: add some sort of tag that indicates the marginals and use it to
|
||||||
|
// mark them as artifacts for PDF/UA.
|
||||||
let mut layout_marginal = |content: &Option<Content>, area, align| {
|
let mut layout_marginal = |content: &Option<Content>, area, align| {
|
||||||
let Some(content) = content else { return Ok(None) };
|
let Some(content) = content else { return Ok(None) };
|
||||||
let aligned = content.clone().styled(AlignElem::set_alignment(align));
|
let aligned = content.clone().styled(AlignElem::set_alignment(align));
|
||||||
|
@ -10,11 +10,11 @@ use krilla::error::KrillaError;
|
|||||||
use krilla::geom::PathBuilder;
|
use krilla::geom::PathBuilder;
|
||||||
use krilla::page::{PageLabel, PageSettings};
|
use krilla::page::{PageLabel, PageSettings};
|
||||||
use krilla::surface::Surface;
|
use krilla::surface::Surface;
|
||||||
use krilla::tagging::{Node, SpanTag, Tag, TagGroup, TagTree};
|
use krilla::tagging::{ArtifactType, ContentTag, Node};
|
||||||
use krilla::{Document, SerializeSettings};
|
use krilla::{Document, SerializeSettings};
|
||||||
use krilla_svg::render_svg_glyph;
|
use krilla_svg::render_svg_glyph;
|
||||||
use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
|
use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
|
||||||
use typst_library::foundations::{NativeElement, StyleChain};
|
use typst_library::foundations::NativeElement;
|
||||||
use typst_library::introspection::{self, Location};
|
use typst_library::introspection::{self, Location};
|
||||||
use typst_library::layout::{
|
use typst_library::layout::{
|
||||||
Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform,
|
Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform,
|
||||||
@ -31,6 +31,7 @@ use crate::metadata::build_metadata;
|
|||||||
use crate::outline::build_outline;
|
use crate::outline::build_outline;
|
||||||
use crate::page::PageLabelExt;
|
use crate::page::PageLabelExt;
|
||||||
use crate::shape::handle_shape;
|
use crate::shape::handle_shape;
|
||||||
|
use crate::tags::{handle_close_tag, handle_open_tag, Tags};
|
||||||
use crate::text::handle_text;
|
use crate::text::handle_text;
|
||||||
use crate::util::{convert_path, display_font, AbsExt, TransformExt};
|
use crate::util::{convert_path, display_font, AbsExt, TransformExt};
|
||||||
use crate::PdfOptions;
|
use crate::PdfOptions;
|
||||||
@ -49,6 +50,8 @@ pub fn convert(
|
|||||||
xmp_metadata: true,
|
xmp_metadata: true,
|
||||||
cmyk_profile: None,
|
cmyk_profile: None,
|
||||||
configuration: config,
|
configuration: config,
|
||||||
|
// TODO: Should we just set this to false? If set to `false` this will
|
||||||
|
// automatically be enabled if the `UA1` validator is used.
|
||||||
enable_tagging: true,
|
enable_tagging: true,
|
||||||
render_svg_glyph_fn: render_svg_glyph,
|
render_svg_glyph_fn: render_svg_glyph,
|
||||||
};
|
};
|
||||||
@ -70,12 +73,7 @@ pub fn convert(
|
|||||||
|
|
||||||
document.set_outline(build_outline(&gc));
|
document.set_outline(build_outline(&gc));
|
||||||
document.set_metadata(build_metadata(&gc));
|
document.set_metadata(build_metadata(&gc));
|
||||||
|
document.set_tag_tree(gc.tags.take_tree());
|
||||||
let mut tag_tree = TagTree::new();
|
|
||||||
for tag in gc.tags.drain(..) {
|
|
||||||
tag_tree.push(tag);
|
|
||||||
}
|
|
||||||
document.set_tag_tree(tag_tree);
|
|
||||||
|
|
||||||
finish(document, gc, options.standards.config)
|
finish(document, gc, options.standards.config)
|
||||||
}
|
}
|
||||||
@ -115,6 +113,19 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
|
|||||||
let mut surface = page.surface();
|
let mut surface = page.surface();
|
||||||
let mut fc = FrameContext::new(typst_page.frame.size());
|
let mut fc = FrameContext::new(typst_page.frame.size());
|
||||||
|
|
||||||
|
// Marked-content may not cross page boundaries: reopen tag
|
||||||
|
// that was closed at the end of the last page.
|
||||||
|
if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
|
||||||
|
let tag = if gc.tags.in_artifact {
|
||||||
|
ContentTag::Artifact(ArtifactType::Other)
|
||||||
|
} else {
|
||||||
|
ContentTag::Other
|
||||||
|
};
|
||||||
|
// TODO: somehow avoid empty marked-content sequences
|
||||||
|
let id = surface.start_tagged(tag);
|
||||||
|
nodes.push(Node::Leaf(id));
|
||||||
|
}
|
||||||
|
|
||||||
handle_frame(
|
handle_frame(
|
||||||
&mut fc,
|
&mut fc,
|
||||||
&typst_page.frame,
|
&typst_page.frame,
|
||||||
@ -123,6 +134,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
|
|||||||
gc,
|
gc,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// Marked-content may not cross page boundaries: close open tag.
|
||||||
|
if !gc.tags.stack.is_empty() {
|
||||||
|
surface.end_tagged();
|
||||||
|
}
|
||||||
|
|
||||||
surface.finish();
|
surface.finish();
|
||||||
|
|
||||||
for annotation in fc.annotations {
|
for annotation in fc.annotations {
|
||||||
@ -235,8 +251,8 @@ pub(crate) struct GlobalContext<'a> {
|
|||||||
/// The languages used throughout the document.
|
/// The languages used throughout the document.
|
||||||
pub(crate) languages: BTreeMap<Lang, usize>,
|
pub(crate) languages: BTreeMap<Lang, usize>,
|
||||||
pub(crate) page_index_converter: PageIndexConverter,
|
pub(crate) page_index_converter: PageIndexConverter,
|
||||||
pub(crate) tag_stack: Vec<Location>,
|
/// Tagged PDF context.
|
||||||
pub(crate) tags: Vec<Node>,
|
pub(crate) tags: Tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> GlobalContext<'a> {
|
impl<'a> GlobalContext<'a> {
|
||||||
@ -256,8 +272,8 @@ impl<'a> GlobalContext<'a> {
|
|||||||
image_spans: HashSet::new(),
|
image_spans: HashSet::new(),
|
||||||
languages: BTreeMap::new(),
|
languages: BTreeMap::new(),
|
||||||
page_index_converter,
|
page_index_converter,
|
||||||
tag_stack: Vec::new(),
|
|
||||||
tags: Vec::new(),
|
tags: Tags::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -294,33 +310,10 @@ pub(crate) fn handle_frame(
|
|||||||
}
|
}
|
||||||
FrameItem::Link(d, s) => handle_link(fc, gc, d, *s),
|
FrameItem::Link(d, s) => handle_link(fc, gc, d, *s),
|
||||||
FrameItem::Tag(introspection::Tag::Start(elem)) => {
|
FrameItem::Tag(introspection::Tag::Start(elem)) => {
|
||||||
let Some(heading) = elem.to_packed::<HeadingElem>() else { continue };
|
handle_open_tag(gc, surface, elem)
|
||||||
let Some(loc) = heading.location() else { continue };
|
|
||||||
|
|
||||||
let level = heading.resolve_level(StyleChain::default());
|
|
||||||
let name = heading.body.plain_text().to_string();
|
|
||||||
let heading_id = surface
|
|
||||||
.start_tagged(krilla::tagging::ContentTag::Span(SpanTag::empty()));
|
|
||||||
let tag = match level.get() {
|
|
||||||
1 => Tag::H1(Some(name)),
|
|
||||||
2 => Tag::H2(Some(name)),
|
|
||||||
3 => Tag::H3(Some(name)),
|
|
||||||
4 => Tag::H4(Some(name)),
|
|
||||||
5 => Tag::H5(Some(name)),
|
|
||||||
_ => Tag::H6(Some(name)),
|
|
||||||
};
|
|
||||||
let mut tag_group = TagGroup::new(tag);
|
|
||||||
tag_group.push(Node::Leaf(heading_id));
|
|
||||||
gc.tags.push(Node::Group(tag_group));
|
|
||||||
|
|
||||||
gc.tag_stack.push(loc);
|
|
||||||
}
|
}
|
||||||
FrameItem::Tag(introspection::Tag::End(loc, _)) => {
|
FrameItem::Tag(introspection::Tag::End(loc, _)) => {
|
||||||
// FIXME: support or split up content tags that span multiple pages
|
handle_close_tag(gc, surface, loc);
|
||||||
if gc.tag_stack.last() == Some(loc) {
|
|
||||||
surface.end_tagged();
|
|
||||||
gc.tag_stack.pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ mod outline;
|
|||||||
mod page;
|
mod page;
|
||||||
mod paint;
|
mod paint;
|
||||||
mod shape;
|
mod shape;
|
||||||
|
mod tags;
|
||||||
mod text;
|
mod text;
|
||||||
mod util;
|
mod util;
|
||||||
|
|
||||||
|
149
crates/typst-pdf/src/tags.rs
Normal file
149
crates/typst-pdf/src/tags.rs
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
use krilla::surface::Surface;
|
||||||
|
use krilla::tagging::{ContentTag, Node, Tag, TagGroup, TagTree};
|
||||||
|
use typst_library::foundations::{Content, StyleChain};
|
||||||
|
use typst_library::introspection::Location;
|
||||||
|
use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry};
|
||||||
|
|
||||||
|
use crate::convert::GlobalContext;
|
||||||
|
|
||||||
|
pub(crate) struct Tags {
|
||||||
|
/// The intermediary stack of nested tag groups.
|
||||||
|
pub(crate) stack: Vec<(Location, Tag, Vec<Node>)>,
|
||||||
|
pub(crate) in_artifact: bool,
|
||||||
|
|
||||||
|
/// The output.
|
||||||
|
pub(crate) tree: TagTree,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Tags {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
stack: Vec::new(),
|
||||||
|
in_artifact: false,
|
||||||
|
tree: TagTree::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn take_tree(&mut self) -> TagTree {
|
||||||
|
std::mem::take(&mut self.tree)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn context_supports(&self, tag: &Tag) -> bool {
|
||||||
|
let Some((_, parent, _)) = self.stack.last() else { return true };
|
||||||
|
|
||||||
|
use Tag::*;
|
||||||
|
|
||||||
|
match parent {
|
||||||
|
Part => true,
|
||||||
|
Article => !matches!(tag, Article),
|
||||||
|
Section => true,
|
||||||
|
BlockQuote => todo!(),
|
||||||
|
Caption => todo!(),
|
||||||
|
TOC => matches!(tag, TOC | TOCI),
|
||||||
|
// TODO: NonStruct is allowed to but (currently?) not supported by krilla
|
||||||
|
TOCI => matches!(tag, TOC | Lbl | Reference | P),
|
||||||
|
Index => todo!(),
|
||||||
|
P => todo!(),
|
||||||
|
H1(_) => todo!(),
|
||||||
|
H2(_) => todo!(),
|
||||||
|
H3(_) => todo!(),
|
||||||
|
H4(_) => todo!(),
|
||||||
|
H5(_) => todo!(),
|
||||||
|
H6(_) => todo!(),
|
||||||
|
L(_list_numbering) => todo!(),
|
||||||
|
LI => todo!(),
|
||||||
|
Lbl => todo!(),
|
||||||
|
LBody => todo!(),
|
||||||
|
Table => todo!(),
|
||||||
|
TR => todo!(),
|
||||||
|
TH(_table_header_scope) => todo!(),
|
||||||
|
TD => todo!(),
|
||||||
|
THead => todo!(),
|
||||||
|
TBody => todo!(),
|
||||||
|
TFoot => todo!(),
|
||||||
|
InlineQuote => todo!(),
|
||||||
|
Note => todo!(),
|
||||||
|
Reference => todo!(),
|
||||||
|
BibEntry => todo!(),
|
||||||
|
Code => todo!(),
|
||||||
|
Link => todo!(),
|
||||||
|
Annot => todo!(),
|
||||||
|
Figure(_) => todo!(),
|
||||||
|
Formula(_) => todo!(),
|
||||||
|
Datetime => todo!(),
|
||||||
|
Terms => todo!(),
|
||||||
|
Title => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn handle_open_tag(
|
||||||
|
gc: &mut GlobalContext,
|
||||||
|
surface: &mut Surface,
|
||||||
|
elem: &Content,
|
||||||
|
) {
|
||||||
|
if gc.tags.in_artifact {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(loc) = elem.location() else { return };
|
||||||
|
|
||||||
|
let tag = if let Some(heading) = elem.to_packed::<HeadingElem>() {
|
||||||
|
let level = heading.resolve_level(StyleChain::default());
|
||||||
|
let name = heading.body.plain_text().to_string();
|
||||||
|
match level.get() {
|
||||||
|
1 => Tag::H1(Some(name)),
|
||||||
|
2 => Tag::H2(Some(name)),
|
||||||
|
3 => Tag::H3(Some(name)),
|
||||||
|
4 => Tag::H4(Some(name)),
|
||||||
|
5 => Tag::H5(Some(name)),
|
||||||
|
// TODO: when targeting PDF 2.0 headings `> 6` are supported
|
||||||
|
_ => Tag::H6(Some(name)),
|
||||||
|
}
|
||||||
|
} else if let Some(_) = elem.to_packed::<OutlineElem>() {
|
||||||
|
Tag::TOC
|
||||||
|
} else if let Some(_outline_entry) = elem.to_packed::<OutlineEntry>() {
|
||||||
|
Tag::TOCI
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
if !gc.tags.context_supports(&tag) {
|
||||||
|
// TODO: error or warning?
|
||||||
|
}
|
||||||
|
|
||||||
|
// close previous marked-content and open a nested tag.
|
||||||
|
if !gc.tags.stack.is_empty() {
|
||||||
|
surface.end_tagged();
|
||||||
|
}
|
||||||
|
let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other);
|
||||||
|
|
||||||
|
gc.tags.stack.push((loc, tag, vec![Node::Leaf(content_id)]));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn handle_close_tag(
|
||||||
|
gc: &mut GlobalContext,
|
||||||
|
surface: &mut Surface,
|
||||||
|
loc: &Location,
|
||||||
|
) {
|
||||||
|
let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
// TODO: contstruct group directly from nodes
|
||||||
|
let mut tag_group = TagGroup::new(tag);
|
||||||
|
for node in nodes {
|
||||||
|
tag_group.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
surface.end_tagged();
|
||||||
|
|
||||||
|
if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() {
|
||||||
|
parent_nodes.push(Node::Group(tag_group));
|
||||||
|
|
||||||
|
// TODO: somehow avoid empty marked-content sequences
|
||||||
|
let id = surface.start_tagged(ContentTag::Other);
|
||||||
|
parent_nodes.push(Node::Leaf(id));
|
||||||
|
} else {
|
||||||
|
gc.tags.tree.push(Node::Group(tag_group));
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user