Compare commits

...

5 Commits

Author SHA1 Message Date
Tobias Schmitz
7d5b9a716f
feat: wrap table cell content in a paragraph 2025-07-07 12:30:56 +02:00
Tobias Schmitz
b0d3c2dca4
test: table header id generation 2025-07-07 12:28:53 +02:00
Tobias Schmitz
58c6729df4
feat: generate human readable table cell IDs
in almost all real-world cases these IDs require less memory than the binary IDs
used before, and they are also require less storage in PDF files, since binary
data is encoded in hex escape sequences, taking up 4 bytes per byte of data.
2025-07-07 10:52:20 +02:00
Tobias Schmitz
157e0fa142
fix: generate cell id with correct indices 2025-07-04 15:56:39 +02:00
Tobias Schmitz
4dceb7f5ef
refactor: update krilla 2025-07-04 10:37:46 +02:00
7 changed files with 240 additions and 49 deletions

29
Cargo.lock generated
View File

@ -592,6 +592,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]] [[package]]
name = "dirs" name = "dirs"
version = "6.0.0" version = "6.0.0"
@ -1367,7 +1373,7 @@ dependencies = [
[[package]] [[package]]
name = "krilla" name = "krilla"
version = "0.4.0" version = "0.4.0"
source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd" source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9"
dependencies = [ dependencies = [
"base64", "base64",
"bumpalo", "bumpalo",
@ -1396,7 +1402,7 @@ dependencies = [
[[package]] [[package]]
name = "krilla-svg" name = "krilla-svg"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd" source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9"
dependencies = [ dependencies = [
"flate2", "flate2",
"fontdb", "fontdb",
@ -1982,6 +1988,16 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "pretty_assertions"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
dependencies = [
"diff",
"yansi",
]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.93" version = "1.0.93"
@ -3131,6 +3147,7 @@ dependencies = [
name = "typst-pdf" name = "typst-pdf"
version = "0.13.1" version = "0.13.1"
dependencies = [ dependencies = [
"az",
"bytemuck", "bytemuck",
"comemo", "comemo",
"ecow", "ecow",
@ -3138,7 +3155,9 @@ dependencies = [
"infer", "infer",
"krilla", "krilla",
"krilla-svg", "krilla-svg",
"pretty_assertions",
"serde", "serde",
"smallvec",
"typst-assets", "typst-assets",
"typst-library", "typst-library",
"typst-macros", "typst-macros",
@ -3801,6 +3820,12 @@ dependencies = [
"linked-hash-map", "linked-hash-map",
] ]
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]] [[package]]
name = "yoke" name = "yoke"
version = "0.7.5" version = "0.7.5"

View File

@ -92,6 +92,7 @@ phf = { version = "0.11", features = ["macros"] }
pixglyph = "0.6" pixglyph = "0.6"
png = "0.17" png = "0.17"
portable-atomic = "1.6" portable-atomic = "1.6"
pretty_assertions = "1.4.1"
proc-macro2 = "1" proc-macro2 = "1"
pulldown-cmark = "0.9" pulldown-cmark = "0.9"
qcms = "0.3.0" qcms = "0.3.0"

View File

@ -19,6 +19,7 @@ typst-macros = { workspace = true }
typst-syntax = { workspace = true } typst-syntax = { workspace = true }
typst-timing = { workspace = true } typst-timing = { workspace = true }
typst-utils = { workspace = true } typst-utils = { workspace = true }
az = { workspace = true }
bytemuck = { workspace = true } bytemuck = { workspace = true }
comemo = { workspace = true } comemo = { workspace = true }
ecow = { workspace = true } ecow = { workspace = true }
@ -27,6 +28,10 @@ infer = { workspace = true }
krilla = { workspace = true } krilla = { workspace = true }
krilla-svg = { workspace = true } krilla-svg = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
smallvec = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }
[lints] [lints]
workspace = true workspace = true

View File

@ -598,7 +598,7 @@ fn convert_error(
let span = to_span(*loc); let span = to_span(*loc);
error!(span, "{prefix} duplicate tag id") error!(span, "{prefix} duplicate tag id")
} }
ValidationError::UnknownHeaderTagId(_id, loc) => { ValidationError::UnknownTagId(_id, loc) => {
// TODO: display the id and better error message // TODO: display the id and better error message
let span = to_span(*loc); let span = to_span(*loc);
error!(span, "{prefix} unknown header tag id") error!(span, "{prefix} unknown header tag id")

View File

@ -83,7 +83,8 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResu
TagKind::Caption.into() TagKind::Caption.into()
} else if let Some(table) = elem.to_packed::<TableElem>() { } else if let Some(table) = elem.to_packed::<TableElem>() {
let table_id = gc.tags.next_table_id(); let table_id = gc.tags.next_table_id();
let ctx = TableCtx::new(table_id, table.clone()); let summary = table.summary(StyleChain::default()).map(EcoString::into);
let ctx = TableCtx::new(table_id, summary);
push_stack(gc, loc, StackEntryKind::Table(ctx))?; push_stack(gc, loc, StackEntryKind::Table(ctx))?;
return Ok(()); return Ok(());
} else if let Some(cell) = elem.to_packed::<TableCell>() { } else if let Some(cell) = elem.to_packed::<TableCell>() {
@ -151,10 +152,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
let node = match entry.kind { let node = match entry.kind {
StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes), StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes),
StackEntryKind::Outline(ctx) => { StackEntryKind::Outline(ctx) => ctx.build_outline(entry.nodes),
let nodes = ctx.build_outline(entry.nodes);
TagNode::Group(TagKind::TOC.into(), nodes)
}
StackEntryKind::OutlineEntry(outline_entry) => { StackEntryKind::OutlineEntry(outline_entry) => {
let parent = gc.tags.stack.last_mut().and_then(|parent| { let parent = gc.tags.stack.last_mut().and_then(|parent| {
let ctx = parent.kind.as_outline_mut()?; let ctx = parent.kind.as_outline_mut()?;
@ -172,11 +170,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
outline_ctx.insert(parent_nodes, outline_entry, entry.nodes); outline_ctx.insert(parent_nodes, outline_entry, entry.nodes);
return; return;
} }
StackEntryKind::Table(ctx) => { StackEntryKind::Table(ctx) => ctx.build_table(entry.nodes),
let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into);
let nodes = ctx.build_table(entry.nodes);
TagNode::Group(TagKind::Table(summary).into(), nodes)
}
StackEntryKind::TableCell(cell) => { StackEntryKind::TableCell(cell) => {
let Some(table_ctx) = gc.tags.parent_table() else { let Some(table_ctx) = gc.tags.parent_table() else {
// PDF/UA compliance of the structure hierarchy is checked // PDF/UA compliance of the structure hierarchy is checked
@ -385,7 +379,7 @@ impl StackEntryKind {
} }
} }
#[derive(Clone)] #[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) enum TagNode { pub(crate) enum TagNode {
Group(Tag, Vec<TagNode>), Group(Tag, Vec<TagNode>),
Leaf(Identifier), Leaf(Identifier),
@ -394,7 +388,7 @@ pub(crate) enum TagNode {
Placeholder(Placeholder), Placeholder(Placeholder),
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) struct Placeholder(usize); pub(crate) struct Placeholder(usize);
/// Automatically calls [`Surface::end_tagged`] when dropped. /// Automatically calls [`Surface::end_tagged`] when dropped.

View File

@ -44,14 +44,11 @@ impl OutlineCtx {
} }
} }
pub(crate) fn build_outline( pub(crate) fn build_outline(mut self, mut outline_nodes: Vec<TagNode>) -> TagNode {
mut self,
mut outline_nodes: Vec<TagNode>,
) -> Vec<TagNode> {
while !self.stack.is_empty() { while !self.stack.is_empty() {
self.finish_section(&mut outline_nodes); self.finish_section(&mut outline_nodes);
} }
outline_nodes TagNode::Group(TagKind::TOC.into(), outline_nodes)
} }
} }

View File

@ -1,23 +1,25 @@
use std::num::{NonZeroU32, NonZeroUsize}; use std::io::Write as _;
use std::num::NonZeroU32;
use az::SaturatingAs;
use krilla::tagging::{ use krilla::tagging::{
TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagIdRefs, TagKind,
TagKind,
}; };
use smallvec::SmallVec;
use typst_library::foundations::{Packed, Smart, StyleChain}; use typst_library::foundations::{Packed, Smart, StyleChain};
use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope}; use typst_library::model::{TableCell, TableCellKind, TableHeaderScope};
use crate::tags::{TableId, TagNode}; use crate::tags::{TableId, TagNode};
pub(crate) struct TableCtx { pub(crate) struct TableCtx {
pub(crate) id: TableId, pub(crate) id: TableId,
pub(crate) table: Packed<TableElem>, pub(crate) summary: Option<String>,
rows: Vec<Vec<GridCell>>, rows: Vec<Vec<GridCell>>,
} }
impl TableCtx { impl TableCtx {
pub(crate) fn new(id: TableId, table: Packed<TableElem>) -> Self { pub(crate) fn new(id: TableId, summary: Option<String>) -> Self {
Self { id, table: table.clone(), rows: Vec::new() } Self { id, summary, rows: Vec::new() }
} }
fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> { fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> {
@ -79,21 +81,21 @@ impl TableCtx {
} }
self.rows[y][x] = GridCell::Cell(TableCtxCell { self.rows[y][x] = GridCell::Cell(TableCtxCell {
x: x as u32, x: x.saturating_as(),
y: y as u32, y: y.saturating_as(),
rowspan, rowspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX),
colspan, colspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX),
kind, kind,
headers: TableCellHeaders::NONE, headers: TagIdRefs::NONE,
nodes, nodes,
}); });
} }
pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> { pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> TagNode {
// Table layouting ensures that there are no overlapping cells, and that // Table layouting ensures that there are no overlapping cells, and that
// any gaps left by the user are filled with empty cells. // any gaps left by the user are filled with empty cells.
if self.rows.is_empty() { if self.rows.is_empty() {
return nodes; return TagNode::Group(TagKind::Table(self.summary).into(), nodes);
} }
let height = self.rows.len(); let height = self.rows.len();
let width = self.rows[0].len(); let width = self.rows[0].len();
@ -158,10 +160,7 @@ impl TableCtx {
.into_iter() .into_iter()
.filter_map(|cell| { .filter_map(|cell| {
let cell = cell.into_cell()?; let cell = cell.into_cell()?;
let span = TableCellSpan { let span = TableCellSpan { rows: cell.rowspan, cols: cell.colspan };
rows: cell.rowspan.try_into().unwrap_or(NonZeroU32::MAX),
cols: cell.colspan.try_into().unwrap_or(NonZeroU32::MAX),
};
let tag = match cell.unwrap_kind() { let tag = match cell.unwrap_kind() {
TableCellKind::Header(_, scope) => { TableCellKind::Header(_, scope) => {
let id = table_cell_id(self.id, cell.x, cell.y); let id = table_cell_id(self.id, cell.x, cell.y);
@ -181,7 +180,10 @@ impl TableCtx {
.into(), .into(),
}; };
Some(TagNode::Group(tag, cell.nodes)) // Wrap content in a paragraph.
// TODO: maybe avoid nested paragraphs?
let par = TagNode::Group(TagKind::P.into(), cell.nodes);
Some(TagNode::Group(tag, vec![par]))
}) })
.collect(); .collect();
@ -216,7 +218,7 @@ impl TableCtx {
nodes.push(TagNode::Group(tag.into(), row_chunk)); nodes.push(TagNode::Group(tag.into(), row_chunk));
} }
nodes TagNode::Group(TagKind::Table(self.summary).into(), nodes)
} }
fn resolve_cell_headers<F>( fn resolve_cell_headers<F>(
@ -246,7 +248,7 @@ impl TableCtx {
if let TableCellKind::Header(level, scope) = cell.unwrap_kind() { if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
if refers_to_dir(&scope) { if refers_to_dir(&scope) {
let tag_id = table_cell_id(table_id, x as u32, y as u32); let tag_id = table_cell_id(table_id, cell.x, cell.y);
*current_header = Some((level, tag_id)); *current_header = Some((level, tag_id));
} }
} }
@ -291,10 +293,10 @@ impl GridCell {
struct TableCtxCell { struct TableCtxCell {
x: u32, x: u32,
y: u32, y: u32,
rowspan: NonZeroUsize, rowspan: NonZeroU32,
colspan: NonZeroUsize, colspan: NonZeroU32,
kind: Smart<TableCellKind>, kind: Smart<TableCellKind>,
headers: TableCellHeaders, headers: TagIdRefs,
nodes: Vec<TagNode>, nodes: Vec<TagNode>,
} }
@ -314,11 +316,9 @@ fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool {
} }
fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId { fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId {
let mut bytes = [0; 12]; let mut buf = SmallVec::new();
bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes()); _ = write!(&mut buf, "{}x{x}y{y}", table_id.0);
bytes[4..8].copy_from_slice(&x.to_ne_bytes()); TagId::from_smallvec(buf)
bytes[8..12].copy_from_slice(&y.to_ne_bytes());
TagId::from_slice(&bytes)
} }
fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope { fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope {
@ -328,3 +328,172 @@ fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderSc
TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row, TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row,
} }
} }
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use typst_library::foundations::Content;
use super::*;
#[track_caller]
fn test(table: TableCtx, exp_tag: TagNode) {
let tag = table.build_table(Vec::new());
assert_eq!(tag, exp_tag);
}
#[track_caller]
fn table<const SIZE: usize>(cells: [TableCell; SIZE]) -> TableCtx {
let mut table = TableCtx::new(TableId(324), Some("summary".into()));
for cell in cells {
table.insert(Packed::new(cell), Vec::new());
}
table
}
#[track_caller]
fn header_cell(x: usize, y: usize, level: u32, scope: TableHeaderScope) -> TableCell {
TableCell::new(Content::default())
.with_x(Smart::Custom(x))
.with_y(Smart::Custom(y))
.with_kind(Smart::Custom(TableCellKind::Header(
NonZeroU32::new(level).unwrap(),
scope,
)))
}
fn cell(x: usize, y: usize) -> TableCell {
TableCell::new(Content::default())
.with_x(Smart::Custom(x))
.with_y(Smart::Custom(y))
.with_kind(Smart::Custom(TableCellKind::Data))
}
fn table_tag<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
let tag = TagKind::Table(Some("summary".into()));
TagNode::Group(tag.into(), nodes.into())
}
fn header<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
TagNode::Group(TagKind::THead.into(), nodes.into())
}
fn body<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
TagNode::Group(TagKind::TBody.into(), nodes.into())
}
fn row<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
TagNode::Group(TagKind::TR.into(), nodes.into())
}
fn header_cell_tag<const SIZE: usize>(
x: u32,
y: u32,
scope: TableHeaderScope,
headers: [(u32, u32); SIZE],
) -> TagNode {
let scope = table_header_scope(scope);
let id = table_cell_id(TableId(324), x, y);
let ids = headers
.map(|(x, y)| table_cell_id(TableId(324), x, y))
.into_iter()
.collect();
TagNode::Group(
TagKind::TH(TableHeaderCell::new(scope).with_headers(TagIdRefs { ids }))
.with_id(Some(id)),
vec![TagNode::Group(TagKind::P.into(), Vec::new())],
)
}
fn cell_tag<const SIZE: usize>(headers: [(u32, u32); SIZE]) -> TagNode {
let ids = headers
.map(|(x, y)| table_cell_id(TableId(324), x, y))
.into_iter()
.collect();
TagNode::Group(
TagKind::TD(TableDataCell::new().with_headers(TagIdRefs { ids })).into(),
vec![TagNode::Group(TagKind::P.into(), Vec::new())],
)
}
#[test]
fn simple_table() {
#[rustfmt::skip]
let table = table([
header_cell(0, 0, 1, TableHeaderScope::Column),
header_cell(1, 0, 1, TableHeaderScope::Column),
header_cell(2, 0, 1, TableHeaderScope::Column),
cell(0, 1),
cell(1, 1),
cell(2, 1),
cell(0, 2),
cell(1, 2),
cell(2, 2),
]);
#[rustfmt::skip]
let tag = table_tag([
header([row([
header_cell_tag(0, 0, TableHeaderScope::Column, []),
header_cell_tag(1, 0, TableHeaderScope::Column, []),
header_cell_tag(2, 0, TableHeaderScope::Column, []),
])]),
body([
row([
cell_tag([(0, 0)]),
cell_tag([(1, 0)]),
cell_tag([(2, 0)]),
]),
row([
cell_tag([(0, 0)]),
cell_tag([(1, 0)]),
cell_tag([(2, 0)]),
]),
]),
]);
test(table, tag);
}
#[test]
fn header_row_and_column() {
#[rustfmt::skip]
let table = table([
header_cell(0, 0, 1, TableHeaderScope::Column),
header_cell(1, 0, 1, TableHeaderScope::Column),
header_cell(2, 0, 1, TableHeaderScope::Column),
header_cell(0, 1, 1, TableHeaderScope::Row),
cell(1, 1),
cell(2, 1),
header_cell(0, 2, 1, TableHeaderScope::Row),
cell(1, 2),
cell(2, 2),
]);
#[rustfmt::skip]
let tag = table_tag([
row([
header_cell_tag(0, 0, TableHeaderScope::Column, []),
header_cell_tag(1, 0, TableHeaderScope::Column, []),
header_cell_tag(2, 0, TableHeaderScope::Column, []),
]),
row([
header_cell_tag(0, 1, TableHeaderScope::Row, [(0, 0)]),
cell_tag([(1, 0), (0, 1)]),
cell_tag([(2, 0), (0, 1)]),
]),
row([
header_cell_tag(0, 2, TableHeaderScope::Row, [(0, 0)]),
cell_tag([(1, 0), (0, 2)]),
cell_tag([(2, 0), (0, 2)]),
]),
]);
test(table, tag);
}
}