feat: [no ci] pdf.tag function to manually create pdf tags

This commit is contained in:
Tobias Schmitz 2025-06-20 16:01:04 +02:00
parent e6a222647a
commit 05f7946d82
No known key found for this signature in database
3 changed files with 191 additions and 3 deletions

View File

@ -1,3 +1,4 @@
use ecow::EcoString;
use typst_macros::{cast, elem}; use typst_macros::{cast, elem};
use crate::diag::SourceResult; use crate::diag::SourceResult;
@ -6,9 +7,189 @@ use crate::foundations::{Content, Packed, Show, StyleChain};
use crate::introspection::Locatable; use crate::introspection::Locatable;
// TODO: docs // TODO: docs
#[elem(Locatable, Show)]
pub struct PdfTagElem {
#[default(PdfTagKind::NonStruct)]
pub kind: PdfTagKind,
/// An alternate description.
pub alt: Option<EcoString>,
/// Exact replacement for this structure element and its children.
pub actual_text: Option<EcoString>,
/// The expanded form of an abbreviation/acronym.
pub expansion: Option<EcoString>,
/// The content to underline.
#[required]
pub body: Content,
}
impl Show for Packed<PdfTagElem> {
#[typst_macros::time(name = "pdf.tag", span = self.span())]
fn show(&self, _: &mut Engine, _: StyleChain) -> SourceResult<Content> {
Ok(self.body.clone())
}
}
// TODO: docs
/// PDF structure elements
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum PdfTagKind {
// grouping elements
/// (Part)
Part,
/// (Article)
Art,
/// (Section)
Sect,
/// (Division)
Div,
/// (Block quotation)
BlockQuote,
/// (Caption)
Caption,
/// (Table of contents)
TOC,
/// (Table of contents item)
TOCI,
/// (Index)
Index,
/// (Nonstructural element)
NonStruct,
/// (Private element)
Private,
// paragraph like elements
/// (Heading)
H { title: Option<EcoString> },
/// (Heading level 1)
H1 { title: Option<EcoString> },
/// (Heading level 2)
H2 { title: Option<EcoString> },
/// (Heading level 3)
H4 { title: Option<EcoString> },
/// (Heading level 4)
H3 { title: Option<EcoString> },
/// (Heading level 5)
H5 { title: Option<EcoString> },
/// (Heading level 6)
H6 { title: Option<EcoString> },
/// (Paragraph)
P,
// list elements
/// (List)
L { numbering: ListNumbering },
/// (List item)
LI,
/// (Label)
Lbl,
/// (List body)
LBody,
// table elements
/// (Table)
Table,
/// (Table row)
TR,
/// (Table header)
TH { scope: TableHeaderScope },
/// (Table data cell)
TD,
/// (Table header row group)
THead,
/// (Table body row group)
TBody,
/// (Table footer row group)
TFoot,
// inline elements
/// (Span)
Span,
/// (Quotation)
Quote,
/// (Note)
Note,
/// (Reference)
Reference,
/// (Bibliography Entry)
BibEntry,
/// (Code)
Code,
/// (Link)
Link,
/// (Annotation)
Annot,
/// (Ruby)
Ruby,
/// (Ruby base text)
RB,
/// (Ruby annotation text)
RT,
/// (Ruby punctuation)
RP,
/// (Warichu)
Warichu,
/// (Warichu text)
WT,
/// (Warichu punctuation)
WP,
/// (Figure)
Figure,
/// (Formula)
Formula,
/// (Form)
Form,
}
cast! {
PdfTagKind,
self => match self {
PdfTagKind::Part => "part".into_value(),
_ => todo!(),
},
"part" => Self::Part,
// TODO
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ListNumbering {
/// No numbering.
None,
/// Solid circular bullets.
Disc,
/// Open circular bullets.
Circle,
/// Solid square bullets.
Square,
/// Decimal numbers.
Decimal,
/// Lowercase Roman numerals.
LowerRoman,
/// Uppercase Roman numerals.
UpperRoman,
/// Lowercase letters.
LowerAlpha,
/// Uppercase letters.
UpperAlpha,
}
/// The scope of a table header cell.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum TableHeaderScope {
/// The header cell refers to the row.
Row,
/// The header cell refers to the column.
Column,
/// The header cell refers to both the row and the column.
Both,
}
/// Mark content as a PDF artifact. /// Mark content as a PDF artifact.
/// TODO: also use to mark html elements with `aria-hidden="true"`? /// TODO: maybe generalize this and use it to mark html elements with `aria-hidden="true"`?
#[elem(Locatable, Show)] #[elem(Locatable, Show)]
pub struct ArtifactElem { pub struct ArtifactElem {
#[default(ArtifactKind::Other)] #[default(ArtifactKind::Other)]

View File

@ -13,6 +13,7 @@ pub fn module() -> Module {
let mut pdf = Scope::deduplicating(); let mut pdf = Scope::deduplicating();
pdf.start_category(crate::Category::Pdf); pdf.start_category(crate::Category::Pdf);
pdf.define_elem::<EmbedElem>(); pdf.define_elem::<EmbedElem>();
pdf.define_elem::<PdfTagElem>();
pdf.define_elem::<ArtifactElem>(); pdf.define_elem::<ArtifactElem>();
Module::new("pdf", pdf) Module::new("pdf", pdf)
} }

View File

@ -12,7 +12,7 @@ use typst_library::model::{
Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem, Destination, FigureCaption, FigureElem, HeadingElem, Outlinable, OutlineElem,
OutlineEntry, OutlineEntry,
}; };
use typst_library::pdf::{ArtifactElem, ArtifactKind}; use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfTagElem, PdfTagKind};
use typst_library::visualize::ImageElem; use typst_library::visualize::ImageElem;
use crate::convert::GlobalContext; use crate::convert::GlobalContext;
@ -196,7 +196,13 @@ pub(crate) fn handle_start(
let mut link_id = None; let mut link_id = None;
let mut wrappers = Vec::new(); let mut wrappers = Vec::new();
let tag = if let Some(heading) = elem.to_packed::<HeadingElem>() { let tag = if let Some(pdf_tag) = elem.to_packed::<PdfTagElem>() {
let kind = pdf_tag.kind(StyleChain::default());
match kind {
PdfTagKind::Part => Tag::Part,
_ => todo!(),
}
} else if let Some(heading) = elem.to_packed::<HeadingElem>() {
let level = heading.level(); let level = heading.level();
let name = heading.body.plain_text().to_string(); let name = heading.body.plain_text().to_string();
match level.get() { match level.get() {