mirror of
https://github.com/typst/typst
synced 2025-06-15 00:26:26 +08:00
feat: [WIP] write tags
skip-checks:true
This commit is contained in:
parent
7ac8f48afa
commit
8c861d2d27
@ -185,6 +185,8 @@ fn layout_page_run_impl(
|
||||
)?;
|
||||
|
||||
// Layouts a single marginal.
|
||||
// TODO: add some sort of tag that indicates the marginals and use it to
|
||||
// mark them as artifacts for PDF/UA.
|
||||
let mut layout_marginal = |content: &Option<Content>, area, align| {
|
||||
let Some(content) = content else { return Ok(None) };
|
||||
let aligned = content.clone().styled(AlignElem::set_alignment(align));
|
||||
|
@ -10,11 +10,11 @@ use krilla::error::KrillaError;
|
||||
use krilla::geom::PathBuilder;
|
||||
use krilla::page::{PageLabel, PageSettings};
|
||||
use krilla::surface::Surface;
|
||||
use krilla::tagging::{Node, SpanTag, Tag, TagGroup, TagTree};
|
||||
use krilla::tagging::{ArtifactType, ContentTag, Node};
|
||||
use krilla::{Document, SerializeSettings};
|
||||
use krilla_svg::render_svg_glyph;
|
||||
use typst_library::diag::{bail, error, SourceDiagnostic, SourceResult};
|
||||
use typst_library::foundations::{NativeElement, StyleChain};
|
||||
use typst_library::foundations::NativeElement;
|
||||
use typst_library::introspection::{self, Location};
|
||||
use typst_library::layout::{
|
||||
Abs, Frame, FrameItem, GroupItem, PagedDocument, Size, Transform,
|
||||
@ -31,6 +31,7 @@ use crate::metadata::build_metadata;
|
||||
use crate::outline::build_outline;
|
||||
use crate::page::PageLabelExt;
|
||||
use crate::shape::handle_shape;
|
||||
use crate::tags::{handle_close_tag, handle_open_tag, Tags};
|
||||
use crate::text::handle_text;
|
||||
use crate::util::{convert_path, display_font, AbsExt, TransformExt};
|
||||
use crate::PdfOptions;
|
||||
@ -49,6 +50,8 @@ pub fn convert(
|
||||
xmp_metadata: true,
|
||||
cmyk_profile: None,
|
||||
configuration: config,
|
||||
// TODO: Should we just set this to false? If set to `false` this will
|
||||
// automatically be enabled if the `UA1` validator is used.
|
||||
enable_tagging: true,
|
||||
render_svg_glyph_fn: render_svg_glyph,
|
||||
};
|
||||
@ -70,12 +73,7 @@ pub fn convert(
|
||||
|
||||
document.set_outline(build_outline(&gc));
|
||||
document.set_metadata(build_metadata(&gc));
|
||||
|
||||
let mut tag_tree = TagTree::new();
|
||||
for tag in gc.tags.drain(..) {
|
||||
tag_tree.push(tag);
|
||||
}
|
||||
document.set_tag_tree(tag_tree);
|
||||
document.set_tag_tree(gc.tags.take_tree());
|
||||
|
||||
finish(document, gc, options.standards.config)
|
||||
}
|
||||
@ -115,6 +113,19 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
|
||||
let mut surface = page.surface();
|
||||
let mut fc = FrameContext::new(typst_page.frame.size());
|
||||
|
||||
// Marked-content may not cross page boundaries: reopen tag
|
||||
// that was closed at the end of the last page.
|
||||
if let Some((_, _, nodes)) = gc.tags.stack.last_mut() {
|
||||
let tag = if gc.tags.in_artifact {
|
||||
ContentTag::Artifact(ArtifactType::Other)
|
||||
} else {
|
||||
ContentTag::Other
|
||||
};
|
||||
// TODO: somehow avoid empty marked-content sequences
|
||||
let id = surface.start_tagged(tag);
|
||||
nodes.push(Node::Leaf(id));
|
||||
}
|
||||
|
||||
handle_frame(
|
||||
&mut fc,
|
||||
&typst_page.frame,
|
||||
@ -123,6 +134,11 @@ fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResul
|
||||
gc,
|
||||
)?;
|
||||
|
||||
// Marked-content may not cross page boundaries: close open tag.
|
||||
if !gc.tags.stack.is_empty() {
|
||||
surface.end_tagged();
|
||||
}
|
||||
|
||||
surface.finish();
|
||||
|
||||
for annotation in fc.annotations {
|
||||
@ -235,8 +251,8 @@ pub(crate) struct GlobalContext<'a> {
|
||||
/// The languages used throughout the document.
|
||||
pub(crate) languages: BTreeMap<Lang, usize>,
|
||||
pub(crate) page_index_converter: PageIndexConverter,
|
||||
pub(crate) tag_stack: Vec<Location>,
|
||||
pub(crate) tags: Vec<Node>,
|
||||
/// Tagged PDF context.
|
||||
pub(crate) tags: Tags,
|
||||
}
|
||||
|
||||
impl<'a> GlobalContext<'a> {
|
||||
@ -256,8 +272,8 @@ impl<'a> GlobalContext<'a> {
|
||||
image_spans: HashSet::new(),
|
||||
languages: BTreeMap::new(),
|
||||
page_index_converter,
|
||||
tag_stack: Vec::new(),
|
||||
tags: Vec::new(),
|
||||
|
||||
tags: Tags::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -294,33 +310,10 @@ pub(crate) fn handle_frame(
|
||||
}
|
||||
FrameItem::Link(d, s) => handle_link(fc, gc, d, *s),
|
||||
FrameItem::Tag(introspection::Tag::Start(elem)) => {
|
||||
let Some(heading) = elem.to_packed::<HeadingElem>() else { continue };
|
||||
let Some(loc) = heading.location() else { continue };
|
||||
|
||||
let level = heading.resolve_level(StyleChain::default());
|
||||
let name = heading.body.plain_text().to_string();
|
||||
let heading_id = surface
|
||||
.start_tagged(krilla::tagging::ContentTag::Span(SpanTag::empty()));
|
||||
let tag = match level.get() {
|
||||
1 => Tag::H1(Some(name)),
|
||||
2 => Tag::H2(Some(name)),
|
||||
3 => Tag::H3(Some(name)),
|
||||
4 => Tag::H4(Some(name)),
|
||||
5 => Tag::H5(Some(name)),
|
||||
_ => Tag::H6(Some(name)),
|
||||
};
|
||||
let mut tag_group = TagGroup::new(tag);
|
||||
tag_group.push(Node::Leaf(heading_id));
|
||||
gc.tags.push(Node::Group(tag_group));
|
||||
|
||||
gc.tag_stack.push(loc);
|
||||
handle_open_tag(gc, surface, elem)
|
||||
}
|
||||
FrameItem::Tag(introspection::Tag::End(loc, _)) => {
|
||||
// FIXME: support or split up content tags that span multiple pages
|
||||
if gc.tag_stack.last() == Some(loc) {
|
||||
surface.end_tagged();
|
||||
gc.tag_stack.pop();
|
||||
}
|
||||
handle_close_tag(gc, surface, loc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ mod outline;
|
||||
mod page;
|
||||
mod paint;
|
||||
mod shape;
|
||||
mod tags;
|
||||
mod text;
|
||||
mod util;
|
||||
|
||||
|
149
crates/typst-pdf/src/tags.rs
Normal file
149
crates/typst-pdf/src/tags.rs
Normal file
@ -0,0 +1,149 @@
|
||||
use krilla::surface::Surface;
|
||||
use krilla::tagging::{ContentTag, Node, Tag, TagGroup, TagTree};
|
||||
use typst_library::foundations::{Content, StyleChain};
|
||||
use typst_library::introspection::Location;
|
||||
use typst_library::model::{HeadingElem, OutlineElem, OutlineEntry};
|
||||
|
||||
use crate::convert::GlobalContext;
|
||||
|
||||
pub(crate) struct Tags {
|
||||
/// The intermediary stack of nested tag groups.
|
||||
pub(crate) stack: Vec<(Location, Tag, Vec<Node>)>,
|
||||
pub(crate) in_artifact: bool,
|
||||
|
||||
/// The output.
|
||||
pub(crate) tree: TagTree,
|
||||
}
|
||||
|
||||
impl Tags {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
stack: Vec::new(),
|
||||
in_artifact: false,
|
||||
tree: TagTree::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn take_tree(&mut self) -> TagTree {
|
||||
std::mem::take(&mut self.tree)
|
||||
}
|
||||
|
||||
pub(crate) fn context_supports(&self, tag: &Tag) -> bool {
|
||||
let Some((_, parent, _)) = self.stack.last() else { return true };
|
||||
|
||||
use Tag::*;
|
||||
|
||||
match parent {
|
||||
Part => true,
|
||||
Article => !matches!(tag, Article),
|
||||
Section => true,
|
||||
BlockQuote => todo!(),
|
||||
Caption => todo!(),
|
||||
TOC => matches!(tag, TOC | TOCI),
|
||||
// TODO: NonStruct is allowed to but (currently?) not supported by krilla
|
||||
TOCI => matches!(tag, TOC | Lbl | Reference | P),
|
||||
Index => todo!(),
|
||||
P => todo!(),
|
||||
H1(_) => todo!(),
|
||||
H2(_) => todo!(),
|
||||
H3(_) => todo!(),
|
||||
H4(_) => todo!(),
|
||||
H5(_) => todo!(),
|
||||
H6(_) => todo!(),
|
||||
L(_list_numbering) => todo!(),
|
||||
LI => todo!(),
|
||||
Lbl => todo!(),
|
||||
LBody => todo!(),
|
||||
Table => todo!(),
|
||||
TR => todo!(),
|
||||
TH(_table_header_scope) => todo!(),
|
||||
TD => todo!(),
|
||||
THead => todo!(),
|
||||
TBody => todo!(),
|
||||
TFoot => todo!(),
|
||||
InlineQuote => todo!(),
|
||||
Note => todo!(),
|
||||
Reference => todo!(),
|
||||
BibEntry => todo!(),
|
||||
Code => todo!(),
|
||||
Link => todo!(),
|
||||
Annot => todo!(),
|
||||
Figure(_) => todo!(),
|
||||
Formula(_) => todo!(),
|
||||
Datetime => todo!(),
|
||||
Terms => todo!(),
|
||||
Title => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn handle_open_tag(
|
||||
gc: &mut GlobalContext,
|
||||
surface: &mut Surface,
|
||||
elem: &Content,
|
||||
) {
|
||||
if gc.tags.in_artifact {
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(loc) = elem.location() else { return };
|
||||
|
||||
let tag = if let Some(heading) = elem.to_packed::<HeadingElem>() {
|
||||
let level = heading.resolve_level(StyleChain::default());
|
||||
let name = heading.body.plain_text().to_string();
|
||||
match level.get() {
|
||||
1 => Tag::H1(Some(name)),
|
||||
2 => Tag::H2(Some(name)),
|
||||
3 => Tag::H3(Some(name)),
|
||||
4 => Tag::H4(Some(name)),
|
||||
5 => Tag::H5(Some(name)),
|
||||
// TODO: when targeting PDF 2.0 headings `> 6` are supported
|
||||
_ => Tag::H6(Some(name)),
|
||||
}
|
||||
} else if let Some(_) = elem.to_packed::<OutlineElem>() {
|
||||
Tag::TOC
|
||||
} else if let Some(_outline_entry) = elem.to_packed::<OutlineEntry>() {
|
||||
Tag::TOCI
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
|
||||
if !gc.tags.context_supports(&tag) {
|
||||
// TODO: error or warning?
|
||||
}
|
||||
|
||||
// close previous marked-content and open a nested tag.
|
||||
if !gc.tags.stack.is_empty() {
|
||||
surface.end_tagged();
|
||||
}
|
||||
let content_id = surface.start_tagged(krilla::tagging::ContentTag::Other);
|
||||
|
||||
gc.tags.stack.push((loc, tag, vec![Node::Leaf(content_id)]));
|
||||
}
|
||||
|
||||
pub(crate) fn handle_close_tag(
|
||||
gc: &mut GlobalContext,
|
||||
surface: &mut Surface,
|
||||
loc: &Location,
|
||||
) {
|
||||
let Some((_, tag, nodes)) = gc.tags.stack.pop_if(|(l, ..)| l == loc) else {
|
||||
return;
|
||||
};
|
||||
// TODO: contstruct group directly from nodes
|
||||
let mut tag_group = TagGroup::new(tag);
|
||||
for node in nodes {
|
||||
tag_group.push(node);
|
||||
}
|
||||
|
||||
surface.end_tagged();
|
||||
|
||||
if let Some((_, _, parent_nodes)) = gc.tags.stack.last_mut() {
|
||||
parent_nodes.push(Node::Group(tag_group));
|
||||
|
||||
// TODO: somehow avoid empty marked-content sequences
|
||||
let id = surface.start_tagged(ContentTag::Other);
|
||||
parent_nodes.push(Node::Leaf(id));
|
||||
} else {
|
||||
gc.tags.tree.push(Node::Group(tag_group));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user