test: snapshot testing of pdf tag trees

This commit is contained in:
Tobias Schmitz 2025-07-29 11:20:46 +02:00
parent e4825f7957
commit 47edb8b46c
No known key found for this signature in database
7 changed files with 110 additions and 20 deletions

5
Cargo.lock generated
View File

@ -1430,7 +1430,7 @@ dependencies = [
[[package]]
name = "krilla"
version = "0.4.0"
source = "git+https://github.com/LaurenzV/krilla?branch=main#9bfd3adc167080017c6e90a5447b83b376c881ff"
source = "git+https://github.com/LaurenzV/krilla?branch=main#576fb4f95d975ce366f18ad552a62286f6fbd0ec"
dependencies = [
"base64",
"bumpalo",
@ -1460,7 +1460,7 @@ dependencies = [
[[package]]
name = "krilla-svg"
version = "0.1.0"
source = "git+https://github.com/LaurenzV/krilla?branch=main#9bfd3adc167080017c6e90a5447b83b376c881ff"
source = "git+https://github.com/LaurenzV/krilla?branch=main#576fb4f95d975ce366f18ad552a62286f6fbd0ec"
dependencies = [
"flate2",
"fontdb",
@ -3306,6 +3306,7 @@ dependencies = [
"clap",
"comemo",
"ecow",
"krilla",
"oxipng",
"parking_lot",
"rayon",

View File

@ -9,7 +9,7 @@ use krilla::geom::PathBuilder;
use krilla::page::{PageLabel, PageSettings};
use krilla::pdf::PdfError;
use krilla::surface::Surface;
use krilla::tagging::TagId;
use krilla::tagging::{TagId, TagTree};
use krilla::{Document, SerializeSettings};
use krilla_svg::render_svg_glyph;
use typst_library::diag::{SourceDiagnostic, SourceResult, bail, error};
@ -40,6 +40,31 @@ pub fn convert(
typst_document: &PagedDocument,
options: &PdfOptions,
) -> SourceResult<Vec<u8>> {
let (mut document, mut gc) = setup(typst_document, options);
convert_pages(&mut gc, &mut document)?;
embed_files(typst_document, &mut document)?;
document.set_outline(build_outline(&gc));
document.set_metadata(build_metadata(&gc));
document.set_tag_tree(gc.tags.build_tree());
finish(document, gc, options.standards.config)
}
pub fn tag_tree(
typst_document: &PagedDocument,
options: &PdfOptions,
) -> SourceResult<TagTree> {
let (mut document, mut gc) = setup(typst_document, options);
convert_pages(&mut gc, &mut document)?;
Ok(gc.tags.build_tree())
}
fn setup<'a>(
typst_document: &'a PagedDocument,
options: &'a PdfOptions,
) -> (Document, GlobalContext<'a>) {
let settings = SerializeSettings {
compress_content_streams: true,
no_device_cs: true,
@ -51,26 +76,19 @@ pub fn convert(
render_svg_glyph_fn: render_svg_glyph,
};
let mut document = Document::new_with(settings);
let document = Document::new_with(settings);
let page_index_converter = PageIndexConverter::new(typst_document, options);
let named_destinations =
collect_named_destinations(typst_document, &page_index_converter);
let mut gc = GlobalContext::new(
let gc = GlobalContext::new(
typst_document,
options,
named_destinations,
page_index_converter,
);
convert_pages(&mut gc, &mut document)?;
embed_files(typst_document, &mut document)?;
document.set_outline(build_outline(&gc));
document.set_metadata(build_metadata(&gc));
document.set_tag_tree(gc.tags.build_tree());
finish(document, gc, options.standards.config)
(document, gc)
}
fn convert_pages(gc: &mut GlobalContext, document: &mut Document) -> SourceResult<()> {

View File

@ -18,6 +18,7 @@ pub use self::metadata::{Timestamp, Timezone};
use std::fmt::{self, Debug, Formatter};
use ecow::eco_format;
use krilla::tagging::TagTree;
use serde::{Deserialize, Serialize};
use typst_library::diag::{SourceResult, StrResult, bail};
use typst_library::foundations::Smart;
@ -31,6 +32,12 @@ pub fn pdf(document: &PagedDocument, options: &PdfOptions) -> SourceResult<Vec<u
convert::convert(document, options)
}
/// Generate the document tag tree and display it in a human readable form.
#[doc(hidden)]
pub fn pdf_tags(document: &PagedDocument, options: &PdfOptions) -> SourceResult<TagTree> {
convert::tag_tree(document, options)
}
/// Settings for PDF export.
#[derive(Debug, Default)]
pub struct PdfOptions<'a> {

View File

@ -25,6 +25,7 @@ default = [
"typst-render",
"typst-svg",
"typst-svg",
"krilla",
]
[dependencies]
@ -39,6 +40,7 @@ typst-library = { workspace = true, optional = true }
typst-pdf = { workspace = true, optional = true }
typst-render = { workspace = true, optional = true }
typst-svg = { workspace = true, optional = true }
krilla = { workspace = true, optional = true }
clap = { workspace = true }
comemo = { workspace = true }
ecow = { workspace = true }

View File

@ -66,6 +66,7 @@ pub enum Attr {
Html,
Render,
Large,
Pdftags,
}
/// The size of a file.
@ -304,6 +305,7 @@ impl<'a> Parser<'a> {
"large" => Attr::Large,
"html" => Attr::Html,
"render" => Attr::Render,
"pdftags" => Attr::Pdftags,
found => {
self.error(format!(
"expected attribute or closing ---, found `{found}`"

View File

@ -3,13 +3,15 @@ use std::ops::Range;
use std::path::PathBuf;
use ecow::eco_vec;
use krilla::tagging::TagTree;
use krilla::tagging::fmt::Output;
use tiny_skia as sk;
use typst::diag::{SourceDiagnostic, SourceResult, Warned};
use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
use typst::visualize::Color;
use typst::{Document, WorldExt};
use typst::{World, WorldExt};
use typst_html::HtmlDocument;
use typst_pdf::PdfOptions;
use typst_pdf::{PdfOptions, PdfStandard, PdfStandards};
use typst_syntax::{FileId, Lines};
use crate::collect::{Attr, FileSize, NoteKind, Test};
@ -65,13 +67,17 @@ impl<'a> Runner<'a> {
}
let html = self.test.attrs.contains(&Attr::Html);
let render = !html || self.test.attrs.contains(&Attr::Render);
let pdftags = self.test.attrs.contains(&Attr::Pdftags);
let render = !html && !pdftags || self.test.attrs.contains(&Attr::Render);
if render {
self.run_test::<PagedDocument>();
}
if html {
self.run_test::<HtmlDocument>();
}
if pdftags {
self.run_test::<TagTree>();
}
self.handle_not_emitted();
self.handle_not_annotated();
@ -81,7 +87,7 @@ impl<'a> Runner<'a> {
/// Run test specific to document format.
fn run_test<D: OutputType>(&mut self) {
let Warned { output, warnings } = typst::compile(&self.world);
let Warned { output, warnings } = D::compile(&self.world);
let (doc, mut errors) = match output {
Ok(doc) => (Some(doc), eco_vec![]),
Err(errors) => (None, errors),
@ -89,7 +95,7 @@ impl<'a> Runner<'a> {
D::check_custom(self, doc.as_ref());
let output = doc.and_then(|doc: D| match doc.make_live() {
let output = doc.and_then(|doc| match doc.make_live() {
Ok(live) => Some((doc, live)),
Err(list) => {
errors.extend(list);
@ -357,13 +363,15 @@ impl<'a> Runner<'a> {
}
/// An output type we can test.
trait OutputType: Document {
trait OutputType: Sized {
/// The type that represents live output.
type Live;
/// The path at which the live output is stored.
fn live_path(name: &str) -> PathBuf;
fn compile(world: &dyn World) -> Warned<SourceResult<Self>>;
/// The path at which the reference output is stored.
fn ref_path(name: &str) -> PathBuf;
@ -400,6 +408,10 @@ impl OutputType for PagedDocument {
format!("{}/{}.png", crate::REF_PATH, name).into()
}
fn compile(world: &dyn World) -> Warned<SourceResult<Self>> {
typst::compile(world)
}
fn is_skippable(&self) -> Result<bool, ()> {
/// Whether rendering of a frame can be skipped.
fn skippable_frame(frame: &Frame) -> bool {
@ -485,6 +497,10 @@ impl OutputType for HtmlDocument {
format!("{}/html/{}.html", crate::REF_PATH, name).into()
}
fn compile(world: &dyn World) -> Warned<SourceResult<Self>> {
typst::compile(world)
}
fn make_live(&self) -> SourceResult<Self::Live> {
typst_html::html(self)
}
@ -502,6 +518,50 @@ impl OutputType for HtmlDocument {
}
}
impl OutputType for TagTree {
type Live = String;
fn live_path(name: &str) -> PathBuf {
format!("{}/pdftags/{}.yml", crate::STORE_PATH, name).into()
}
fn ref_path(name: &str) -> PathBuf {
format!("{}/pdftags/{}.yml", crate::REF_PATH, name).into()
}
fn compile(world: &dyn World) -> Warned<SourceResult<Self>> {
let Warned { output, warnings } = typst::compile::<PagedDocument>(world);
let doc = match output {
Ok(doc) => doc,
Err(errors) => return Warned { output: Err(errors), warnings },
};
let mut options = PdfOptions::default();
options.standards = PdfStandards::new(&[PdfStandard::Ua_1]).unwrap();
let output = typst_pdf::pdf_tags(&doc, &options);
Warned { warnings, output }
}
fn is_skippable(&self) -> Result<bool, ()> {
Ok(self.children.is_empty())
}
fn make_live(&self) -> SourceResult<Self::Live> {
Ok(self.display().to_string())
}
fn save_live(&self, name: &str, live: &Self::Live) {
std::fs::write(Self::live_path(name), live).unwrap();
}
fn make_ref(live: Self::Live) -> Vec<u8> {
live.into_bytes()
}
fn matches(live: &Self::Live, ref_data: &[u8]) -> bool {
live.as_bytes() == ref_data
}
}
/// Draw all frames into one image with padding in between.
fn render(document: &PagedDocument, pixel_per_pt: f32) -> sk::Pixmap {
for page in &document.pages {

View File

@ -64,7 +64,7 @@ fn setup() {
std::env::set_current_dir(workspace_dir).unwrap();
// Create the storage.
for ext in ["render", "html", "pdf", "svg"] {
for ext in ["render", "html", "pdf", "pdftags", "svg"] {
std::fs::create_dir_all(Path::new(STORE_PATH).join(ext)).unwrap();
}