Compare commits

...

5 Commits

Author SHA1 Message Date
Tobias Schmitz
7d5b9a716f
feat: wrap table cell content in a paragraph 2025-07-07 12:30:56 +02:00
Tobias Schmitz
b0d3c2dca4
test: table header id generation 2025-07-07 12:28:53 +02:00
Tobias Schmitz
58c6729df4
feat: generate human readable table cell IDs
in almost all real-world cases these IDs require less memory than the binary IDs
used before, and they are also require less storage in PDF files, since binary
data is encoded in hex escape sequences, taking up 4 bytes per byte of data.
2025-07-07 10:52:20 +02:00
Tobias Schmitz
157e0fa142
fix: generate cell id with correct indices 2025-07-04 15:56:39 +02:00
Tobias Schmitz
4dceb7f5ef
refactor: update krilla 2025-07-04 10:37:46 +02:00
7 changed files with 240 additions and 49 deletions

29
Cargo.lock generated
View File

@ -592,6 +592,12 @@ dependencies = [
"syn",
]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "dirs"
version = "6.0.0"
@ -1367,7 +1373,7 @@ dependencies = [
[[package]]
name = "krilla"
version = "0.4.0"
source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd"
source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9"
dependencies = [
"base64",
"bumpalo",
@ -1396,7 +1402,7 @@ dependencies = [
[[package]]
name = "krilla-svg"
version = "0.1.0"
source = "git+https://github.com/saecki/krilla?branch=tag-attributes#2897351d6eeb139675b5e7e2765fe6f082e26efd"
source = "git+https://github.com/saecki/krilla?branch=tag-attributes#736d8b7e2c9c43d3fcf8b6bf31fb1a179605cab9"
dependencies = [
"flate2",
"fontdb",
@ -1982,6 +1988,16 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "pretty_assertions"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "proc-macro2"
version = "1.0.93"
@ -3131,6 +3147,7 @@ dependencies = [
name = "typst-pdf"
version = "0.13.1"
dependencies = [
"az",
"bytemuck",
"comemo",
"ecow",
@ -3138,7 +3155,9 @@ dependencies = [
"infer",
"krilla",
"krilla-svg",
"pretty_assertions",
"serde",
"smallvec",
"typst-assets",
"typst-library",
"typst-macros",
@ -3801,6 +3820,12 @@ dependencies = [
"linked-hash-map",
]
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "yoke"
version = "0.7.5"

View File

@ -92,6 +92,7 @@ phf = { version = "0.11", features = ["macros"] }
pixglyph = "0.6"
png = "0.17"
portable-atomic = "1.6"
pretty_assertions = "1.4.1"
proc-macro2 = "1"
pulldown-cmark = "0.9"
qcms = "0.3.0"

View File

@ -19,6 +19,7 @@ typst-macros = { workspace = true }
typst-syntax = { workspace = true }
typst-timing = { workspace = true }
typst-utils = { workspace = true }
az = { workspace = true }
bytemuck = { workspace = true }
comemo = { workspace = true }
ecow = { workspace = true }
@ -27,6 +28,10 @@ infer = { workspace = true }
krilla = { workspace = true }
krilla-svg = { workspace = true }
serde = { workspace = true }
smallvec = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }
[lints]
workspace = true

View File

@ -598,7 +598,7 @@ fn convert_error(
let span = to_span(*loc);
error!(span, "{prefix} duplicate tag id")
}
ValidationError::UnknownHeaderTagId(_id, loc) => {
ValidationError::UnknownTagId(_id, loc) => {
// TODO: display the id and better error message
let span = to_span(*loc);
error!(span, "{prefix} unknown header tag id")

View File

@ -83,7 +83,8 @@ pub(crate) fn handle_start(gc: &mut GlobalContext, elem: &Content) -> SourceResu
TagKind::Caption.into()
} else if let Some(table) = elem.to_packed::<TableElem>() {
let table_id = gc.tags.next_table_id();
let ctx = TableCtx::new(table_id, table.clone());
let summary = table.summary(StyleChain::default()).map(EcoString::into);
let ctx = TableCtx::new(table_id, summary);
push_stack(gc, loc, StackEntryKind::Table(ctx))?;
return Ok(());
} else if let Some(cell) = elem.to_packed::<TableCell>() {
@ -151,10 +152,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
let node = match entry.kind {
StackEntryKind::Standard(tag) => TagNode::Group(tag, entry.nodes),
StackEntryKind::Outline(ctx) => {
let nodes = ctx.build_outline(entry.nodes);
TagNode::Group(TagKind::TOC.into(), nodes)
}
StackEntryKind::Outline(ctx) => ctx.build_outline(entry.nodes),
StackEntryKind::OutlineEntry(outline_entry) => {
let parent = gc.tags.stack.last_mut().and_then(|parent| {
let ctx = parent.kind.as_outline_mut()?;
@ -172,11 +170,7 @@ pub(crate) fn handle_end(gc: &mut GlobalContext, loc: Location) {
outline_ctx.insert(parent_nodes, outline_entry, entry.nodes);
return;
}
StackEntryKind::Table(ctx) => {
let summary = ctx.table.summary(StyleChain::default()).map(EcoString::into);
let nodes = ctx.build_table(entry.nodes);
TagNode::Group(TagKind::Table(summary).into(), nodes)
}
StackEntryKind::Table(ctx) => ctx.build_table(entry.nodes),
StackEntryKind::TableCell(cell) => {
let Some(table_ctx) = gc.tags.parent_table() else {
// PDF/UA compliance of the structure hierarchy is checked
@ -385,7 +379,7 @@ impl StackEntryKind {
}
}
#[derive(Clone)]
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) enum TagNode {
Group(Tag, Vec<TagNode>),
Leaf(Identifier),
@ -394,7 +388,7 @@ pub(crate) enum TagNode {
Placeholder(Placeholder),
}
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(crate) struct Placeholder(usize);
/// Automatically calls [`Surface::end_tagged`] when dropped.

View File

@ -44,14 +44,11 @@ impl OutlineCtx {
}
}
pub(crate) fn build_outline(
mut self,
mut outline_nodes: Vec<TagNode>,
) -> Vec<TagNode> {
pub(crate) fn build_outline(mut self, mut outline_nodes: Vec<TagNode>) -> TagNode {
while !self.stack.is_empty() {
self.finish_section(&mut outline_nodes);
}
outline_nodes
TagNode::Group(TagKind::TOC.into(), outline_nodes)
}
}

View File

@ -1,23 +1,25 @@
use std::num::{NonZeroU32, NonZeroUsize};
use std::io::Write as _;
use std::num::NonZeroU32;
use az::SaturatingAs;
use krilla::tagging::{
TableCellHeaders, TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId,
TagKind,
TableCellSpan, TableDataCell, TableHeaderCell, TagBuilder, TagId, TagIdRefs, TagKind,
};
use smallvec::SmallVec;
use typst_library::foundations::{Packed, Smart, StyleChain};
use typst_library::model::{TableCell, TableCellKind, TableElem, TableHeaderScope};
use typst_library::model::{TableCell, TableCellKind, TableHeaderScope};
use crate::tags::{TableId, TagNode};
pub(crate) struct TableCtx {
pub(crate) id: TableId,
pub(crate) table: Packed<TableElem>,
pub(crate) summary: Option<String>,
rows: Vec<Vec<GridCell>>,
}
impl TableCtx {
pub(crate) fn new(id: TableId, table: Packed<TableElem>) -> Self {
Self { id, table: table.clone(), rows: Vec::new() }
pub(crate) fn new(id: TableId, summary: Option<String>) -> Self {
Self { id, summary, rows: Vec::new() }
}
fn get(&self, x: usize, y: usize) -> Option<&TableCtxCell> {
@ -79,21 +81,21 @@ impl TableCtx {
}
self.rows[y][x] = GridCell::Cell(TableCtxCell {
x: x as u32,
y: y as u32,
rowspan,
colspan,
x: x.saturating_as(),
y: y.saturating_as(),
rowspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX),
colspan: rowspan.try_into().unwrap_or(NonZeroU32::MAX),
kind,
headers: TableCellHeaders::NONE,
headers: TagIdRefs::NONE,
nodes,
});
}
pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> Vec<TagNode> {
pub(crate) fn build_table(mut self, mut nodes: Vec<TagNode>) -> TagNode {
// Table layouting ensures that there are no overlapping cells, and that
// any gaps left by the user are filled with empty cells.
if self.rows.is_empty() {
return nodes;
return TagNode::Group(TagKind::Table(self.summary).into(), nodes);
}
let height = self.rows.len();
let width = self.rows[0].len();
@ -158,10 +160,7 @@ impl TableCtx {
.into_iter()
.filter_map(|cell| {
let cell = cell.into_cell()?;
let span = TableCellSpan {
rows: cell.rowspan.try_into().unwrap_or(NonZeroU32::MAX),
cols: cell.colspan.try_into().unwrap_or(NonZeroU32::MAX),
};
let span = TableCellSpan { rows: cell.rowspan, cols: cell.colspan };
let tag = match cell.unwrap_kind() {
TableCellKind::Header(_, scope) => {
let id = table_cell_id(self.id, cell.x, cell.y);
@ -181,7 +180,10 @@ impl TableCtx {
.into(),
};
Some(TagNode::Group(tag, cell.nodes))
// Wrap content in a paragraph.
// TODO: maybe avoid nested paragraphs?
let par = TagNode::Group(TagKind::P.into(), cell.nodes);
Some(TagNode::Group(tag, vec![par]))
})
.collect();
@ -216,7 +218,7 @@ impl TableCtx {
nodes.push(TagNode::Group(tag.into(), row_chunk));
}
nodes
TagNode::Group(TagKind::Table(self.summary).into(), nodes)
}
fn resolve_cell_headers<F>(
@ -246,7 +248,7 @@ impl TableCtx {
if let TableCellKind::Header(level, scope) = cell.unwrap_kind() {
if refers_to_dir(&scope) {
let tag_id = table_cell_id(table_id, x as u32, y as u32);
let tag_id = table_cell_id(table_id, cell.x, cell.y);
*current_header = Some((level, tag_id));
}
}
@ -291,10 +293,10 @@ impl GridCell {
struct TableCtxCell {
x: u32,
y: u32,
rowspan: NonZeroUsize,
colspan: NonZeroUsize,
rowspan: NonZeroU32,
colspan: NonZeroU32,
kind: Smart<TableCellKind>,
headers: TableCellHeaders,
headers: TagIdRefs,
nodes: Vec<TagNode>,
}
@ -314,11 +316,9 @@ fn should_group_rows(a: TableCellKind, b: TableCellKind) -> bool {
}
fn table_cell_id(table_id: TableId, x: u32, y: u32) -> TagId {
let mut bytes = [0; 12];
bytes[0..4].copy_from_slice(&table_id.0.to_ne_bytes());
bytes[4..8].copy_from_slice(&x.to_ne_bytes());
bytes[8..12].copy_from_slice(&y.to_ne_bytes());
TagId::from_slice(&bytes)
let mut buf = SmallVec::new();
_ = write!(&mut buf, "{}x{x}y{y}", table_id.0);
TagId::from_smallvec(buf)
}
fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderScope {
@ -328,3 +328,172 @@ fn table_header_scope(scope: TableHeaderScope) -> krilla::tagging::TableHeaderSc
TableHeaderScope::Row => krilla::tagging::TableHeaderScope::Row,
}
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use typst_library::foundations::Content;
use super::*;
#[track_caller]
fn test(table: TableCtx, exp_tag: TagNode) {
let tag = table.build_table(Vec::new());
assert_eq!(tag, exp_tag);
}
#[track_caller]
fn table<const SIZE: usize>(cells: [TableCell; SIZE]) -> TableCtx {
let mut table = TableCtx::new(TableId(324), Some("summary".into()));
for cell in cells {
table.insert(Packed::new(cell), Vec::new());
}
table
}
#[track_caller]
fn header_cell(x: usize, y: usize, level: u32, scope: TableHeaderScope) -> TableCell {
TableCell::new(Content::default())
.with_x(Smart::Custom(x))
.with_y(Smart::Custom(y))
.with_kind(Smart::Custom(TableCellKind::Header(
NonZeroU32::new(level).unwrap(),
scope,
)))
}
fn cell(x: usize, y: usize) -> TableCell {
TableCell::new(Content::default())
.with_x(Smart::Custom(x))
.with_y(Smart::Custom(y))
.with_kind(Smart::Custom(TableCellKind::Data))
}
fn table_tag<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
let tag = TagKind::Table(Some("summary".into()));
TagNode::Group(tag.into(), nodes.into())
}
fn header<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
TagNode::Group(TagKind::THead.into(), nodes.into())
}
fn body<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
TagNode::Group(TagKind::TBody.into(), nodes.into())
}
fn row<const SIZE: usize>(nodes: [TagNode; SIZE]) -> TagNode {
TagNode::Group(TagKind::TR.into(), nodes.into())
}
fn header_cell_tag<const SIZE: usize>(
x: u32,
y: u32,
scope: TableHeaderScope,
headers: [(u32, u32); SIZE],
) -> TagNode {
let scope = table_header_scope(scope);
let id = table_cell_id(TableId(324), x, y);
let ids = headers
.map(|(x, y)| table_cell_id(TableId(324), x, y))
.into_iter()
.collect();
TagNode::Group(
TagKind::TH(TableHeaderCell::new(scope).with_headers(TagIdRefs { ids }))
.with_id(Some(id)),
vec![TagNode::Group(TagKind::P.into(), Vec::new())],
)
}
fn cell_tag<const SIZE: usize>(headers: [(u32, u32); SIZE]) -> TagNode {
let ids = headers
.map(|(x, y)| table_cell_id(TableId(324), x, y))
.into_iter()
.collect();
TagNode::Group(
TagKind::TD(TableDataCell::new().with_headers(TagIdRefs { ids })).into(),
vec![TagNode::Group(TagKind::P.into(), Vec::new())],
)
}
#[test]
fn simple_table() {
#[rustfmt::skip]
let table = table([
header_cell(0, 0, 1, TableHeaderScope::Column),
header_cell(1, 0, 1, TableHeaderScope::Column),
header_cell(2, 0, 1, TableHeaderScope::Column),
cell(0, 1),
cell(1, 1),
cell(2, 1),
cell(0, 2),
cell(1, 2),
cell(2, 2),
]);
#[rustfmt::skip]
let tag = table_tag([
header([row([
header_cell_tag(0, 0, TableHeaderScope::Column, []),
header_cell_tag(1, 0, TableHeaderScope::Column, []),
header_cell_tag(2, 0, TableHeaderScope::Column, []),
])]),
body([
row([
cell_tag([(0, 0)]),
cell_tag([(1, 0)]),
cell_tag([(2, 0)]),
]),
row([
cell_tag([(0, 0)]),
cell_tag([(1, 0)]),
cell_tag([(2, 0)]),
]),
]),
]);
test(table, tag);
}
#[test]
fn header_row_and_column() {
#[rustfmt::skip]
let table = table([
header_cell(0, 0, 1, TableHeaderScope::Column),
header_cell(1, 0, 1, TableHeaderScope::Column),
header_cell(2, 0, 1, TableHeaderScope::Column),
header_cell(0, 1, 1, TableHeaderScope::Row),
cell(1, 1),
cell(2, 1),
header_cell(0, 2, 1, TableHeaderScope::Row),
cell(1, 2),
cell(2, 2),
]);
#[rustfmt::skip]
let tag = table_tag([
row([
header_cell_tag(0, 0, TableHeaderScope::Column, []),
header_cell_tag(1, 0, TableHeaderScope::Column, []),
header_cell_tag(2, 0, TableHeaderScope::Column, []),
]),
row([
header_cell_tag(0, 1, TableHeaderScope::Row, [(0, 0)]),
cell_tag([(1, 0), (0, 1)]),
cell_tag([(2, 0), (0, 1)]),
]),
row([
header_cell_tag(0, 2, TableHeaderScope::Row, [(0, 0)]),
cell_tag([(1, 0), (0, 2)]),
cell_tag([(2, 0), (0, 2)]),
]),
]);
test(table, tag);
}
}