feat: highlight, super- and sub-script text attributes

This commit is contained in:
Tobias Schmitz 2025-08-05 16:34:33 +02:00
parent 3bcfe54002
commit 8c9e548cd7
No known key found for this signature in database
8 changed files with 412 additions and 192 deletions

View File

@ -3,31 +3,27 @@ use std::collections::HashMap;
use std::slice::SliceIndex;
use krilla::geom as kg;
use krilla::tagging::{
BBox, Identifier, LineHeight, NaiveRgbColor, Node, Tag, TagKind, TagTree,
TextDecorationType,
};
use typst_library::diag::{SourceResult, bail};
use typst_library::foundations::{Content, LinkMarker, Packed};
use krilla::tagging::{BBox, Identifier, Node, TagKind, TagTree};
use typst_library::foundations::{LinkMarker, Packed};
use typst_library::introspection::Location;
use typst_library::layout::{Abs, Length, Point, Rect};
use typst_library::layout::{Abs, Point, Rect};
use typst_library::model::{OutlineEntry, TableCell};
use typst_library::pdf::ArtifactKind;
use typst_library::text::Lang;
use typst_syntax::Span;
use crate::PdfOptions;
use crate::convert::FrameContext;
use crate::tags::list::ListCtx;
use crate::tags::outline::OutlineCtx;
use crate::tags::table::TableCtx;
use crate::tags::text::{ResolvedTextAttrs, TextAttrs};
use crate::tags::{Placeholder, TagNode};
use crate::util::AbsExt;
pub struct Tags {
/// The language of the first text item that has been encountered.
pub doc_lang: Option<Lang>,
/// The current set of text attributes.
/// The set of text attributes.
pub text_attrs: TextAttrs,
/// The intermediary stack of nested tag groups.
pub stack: TagStack,
@ -75,6 +71,11 @@ impl Tags {
}
pub fn push_text(&mut self, new_attrs: ResolvedTextAttrs, id: Identifier) {
if new_attrs.is_empty() {
self.push(TagNode::Leaf(id));
return;
}
// FIXME: Artifacts will force a split in the spans, and decoartions
// generate artifacts
let last_node = if let Some(entry) = self.stack.last_mut() {
@ -102,11 +103,11 @@ impl Tags {
pub fn build_tree(&mut self) -> TagTree {
assert!(self.stack.items.is_empty(), "tags weren't properly closed");
let mut nodes = Vec::new();
for child in std::mem::take(&mut self.tree) {
self.resolve_node(&mut nodes, child);
}
TagTree::from(nodes)
let children = std::mem::take(&mut self.tree)
.into_iter()
.map(|node| self.resolve_node(node))
.collect::<Vec<_>>();
TagTree::from(children)
}
/// Try to set the language of a parent tag, or the entire document.
@ -128,45 +129,28 @@ impl Tags {
}
/// Resolves nodes into an accumulator.
fn resolve_node(&mut self, accum: &mut Vec<Node>, node: TagNode) {
fn resolve_node(&mut self, node: TagNode) -> Node {
match node {
TagNode::Group(group) => {
let mut nodes = Vec::new();
for child in group.nodes {
self.resolve_node(&mut nodes, child);
}
let nodes = (group.nodes.into_iter())
.map(|node| self.resolve_node(node))
.collect();
let group = krilla::tagging::TagGroup::with_children(group.tag, nodes);
accum.push(Node::Group(group));
}
TagNode::Leaf(identifier) => {
accum.push(Node::Leaf(identifier));
}
TagNode::Placeholder(placeholder) => {
accum.push(self.placeholders.take(placeholder));
Node::Group(group)
}
TagNode::Leaf(identifier) => Node::Leaf(identifier),
TagNode::Placeholder(placeholder) => self.placeholders.take(placeholder),
TagNode::FootnoteEntry(loc) => {
let node = (self.footnotes.remove(&loc))
.and_then(|ctx| ctx.entry)
.expect("footnote");
self.resolve_node(accum, node)
self.resolve_node(node)
}
TagNode::Text(attrs, ids) => {
let children = ids.into_iter().map(|id| Node::Leaf(id));
if attrs.is_empty() {
accum.extend(children);
} else {
let tag = Tag::Span
.with_line_height(attrs.lineheight)
.with_baseline_shift(attrs.baseline_shift)
.with_text_decoration_type(attrs.deco.map(|d| d.kind.to_krilla()))
.with_text_decoration_color(attrs.deco.and_then(|d| d.color))
.with_text_decoration_thickness(
attrs.deco.and_then(|d| d.thickness),
);
let group =
krilla::tagging::TagGroup::with_children(tag, children.collect());
accum.push(Node::Group(group));
}
let tag = attrs.to_tag();
let children = ids.into_iter().map(|id| Node::Leaf(id)).collect();
let group = krilla::tagging::TagGroup::with_children(tag, children);
Node::Group(group)
}
}
}
@ -194,123 +178,6 @@ pub enum Disable {
Tiling,
}
#[derive(Clone, Debug)]
pub struct TextAttrs {
lineheight: Option<LineHeight>,
baseline_shift: Option<f32>,
/// PDF can only represent one of the following attributes at a time.
/// Keep track of all of them, and depending if PDF/UA-1 is enforced, either
/// throw an error, or just use one of them.
decos: Vec<(Location, TextDeco)>,
}
impl TextAttrs {
pub fn new() -> Self {
Self {
lineheight: None,
baseline_shift: None,
decos: Vec::new(),
}
}
pub fn push_deco(
&mut self,
options: &PdfOptions,
elem: &Content,
kind: TextDecoKind,
stroke: TextDecoStroke,
) -> SourceResult<()> {
let deco = TextDeco { kind, stroke };
// TODO: can overlapping tags break this?
if options.is_pdf_ua() && self.decos.iter().any(|(_, d)| d.kind != deco.kind) {
let validator = options.standards.config.validator();
let validator = validator.as_str();
bail!(
elem.span(),
"{validator} error: cannot combine underline, overline, and or strike"
);
}
let loc = elem.location().unwrap();
self.decos.push((loc, deco));
Ok(())
}
/// Returns true if a decoration was removed.
pub fn pop_deco(&mut self, loc: Location) -> bool {
// TODO: Ideally we would just check the top of the stack, can
// overlapping tags even happen for decorations?
if let Some(i) = self.decos.iter().rposition(|(l, _)| *l == loc) {
self.decos.remove(i);
return true;
}
false
}
pub fn resolve(&self, em: Abs) -> ResolvedTextAttrs {
let deco = self.decos.last().map(|&(_, TextDeco { kind, stroke })| {
let thickness = stroke.thickness.map(|t| t.at(em).to_f32());
ResolvedTextDeco { kind, color: stroke.color, thickness }
});
ResolvedTextAttrs {
lineheight: self.lineheight,
baseline_shift: self.baseline_shift,
deco,
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct TextDeco {
kind: TextDecoKind,
stroke: TextDecoStroke,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TextDecoKind {
Underline,
Overline,
Strike,
}
impl TextDecoKind {
fn to_krilla(self) -> TextDecorationType {
match self {
TextDecoKind::Underline => TextDecorationType::Underline,
TextDecoKind::Overline => TextDecorationType::Overline,
TextDecoKind::Strike => TextDecorationType::LineThrough,
}
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct TextDecoStroke {
pub color: Option<NaiveRgbColor>,
pub thickness: Option<Length>,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ResolvedTextAttrs {
lineheight: Option<LineHeight>,
baseline_shift: Option<f32>,
deco: Option<ResolvedTextDeco>,
}
impl ResolvedTextAttrs {
pub fn is_empty(&self) -> bool {
self.lineheight.is_none() && self.baseline_shift.is_none() && self.deco.is_none()
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ResolvedTextDeco {
kind: TextDecoKind,
color: Option<NaiveRgbColor>,
thickness: Option<f32>,
}
#[derive(Debug)]
pub struct TagStack {
items: Vec<StackEntry>,

View File

@ -5,11 +5,10 @@ use krilla::configure::Validator;
use krilla::page::Page;
use krilla::surface::Surface;
use krilla::tagging::{
ArtifactType, ContentTag, Identifier, ListNumbering, NaiveRgbColor, Node, SpanTag,
Tag, TagKind,
ArtifactType, ContentTag, Identifier, ListNumbering, Node, SpanTag, Tag, TagKind,
};
use typst_library::diag::{SourceResult, bail};
use typst_library::foundations::{Content, LinkMarker, Smart};
use typst_library::foundations::{Content, LinkMarker};
use typst_library::introspection::Location;
use typst_library::layout::{HideElem, Point, Rect, RepeatElem, Size};
use typst_library::math::EquationElem;
@ -20,9 +19,10 @@ use typst_library::model::{
};
use typst_library::pdf::{ArtifactElem, ArtifactKind, PdfMarkerTag, PdfMarkerTagKind};
use typst_library::text::{
Lang, OverlineElem, RawElem, StrikeElem, TextItem, UnderlineElem,
HighlightElem, Lang, OverlineElem, RawElem, ScriptKind, StrikeElem, SubElem,
SuperElem, TextItem, UnderlineElem,
};
use typst_library::visualize::{Image, ImageElem, Paint, Shape, Stroke};
use typst_library::visualize::{Image, ImageElem, Shape};
use typst_syntax::Span;
use crate::convert::{FrameContext, GlobalContext};
@ -30,6 +30,7 @@ use crate::link::LinkAnnotation;
use crate::tags::list::ListCtx;
use crate::tags::outline::OutlineCtx;
use crate::tags::table::TableCtx;
use crate::tags::text::{ResolvedTextAttrs, TextDecoKind};
use crate::tags::util::{PropertyOptRef, PropertyValCloned, PropertyValCopied};
pub use context::*;
@ -38,6 +39,7 @@ mod context;
mod list;
mod outline;
mod table;
mod text;
mod util;
#[derive(Debug, Clone, PartialEq)]
@ -248,19 +250,35 @@ pub fn handle_start(
});
push_stack(gc, elem, StackEntryKind::Code(desc))?;
return Ok(());
} else if let Some(sub) = elem.to_packed::<SubElem>() {
let baseline_shift = sub.baseline.val();
let lineheight = sub.size.val();
let kind = ScriptKind::Sub;
gc.tags.text_attrs.push_script(elem, kind, baseline_shift, lineheight);
return Ok(());
} else if let Some(sub) = elem.to_packed::<SuperElem>() {
let baseline_shift = sub.baseline.val();
let lineheight = sub.size.val();
let kind = ScriptKind::Super;
gc.tags.text_attrs.push_script(elem, kind, baseline_shift, lineheight);
return Ok(());
} else if let Some(highlight) = elem.to_packed::<HighlightElem>() {
let paint = highlight.fill.opt_ref();
gc.tags.text_attrs.push_highlight(elem, paint);
return Ok(());
} else if let Some(underline) = elem.to_packed::<UnderlineElem>() {
let kind = TextDecoKind::Underline;
let stroke = deco_stroke(underline.stroke.val_cloned());
let stroke = underline.stroke.val_cloned();
gc.tags.text_attrs.push_deco(gc.options, elem, kind, stroke)?;
return Ok(());
} else if let Some(overline) = elem.to_packed::<OverlineElem>() {
let kind = TextDecoKind::Overline;
let stroke = deco_stroke(overline.stroke.val_cloned());
let stroke = overline.stroke.val_cloned();
gc.tags.text_attrs.push_deco(gc.options, elem, kind, stroke)?;
return Ok(());
} else if let Some(strike) = elem.to_packed::<StrikeElem>() {
let kind = TextDecoKind::Strike;
let stroke = deco_stroke(strike.stroke.val_cloned());
let stroke = strike.stroke.val_cloned();
gc.tags.text_attrs.push_deco(gc.options, elem, kind, stroke)?;
return Ok(());
} else {
@ -272,26 +290,6 @@ pub fn handle_start(
Ok(())
}
fn deco_stroke(stroke: Smart<Stroke>) -> TextDecoStroke {
let Smart::Custom(stroke) = stroke else {
return TextDecoStroke::default();
};
let color = stroke.paint.custom().and_then(|paint| match paint {
Paint::Solid(color) => {
let c = color.to_rgb();
Some(NaiveRgbColor::new(c.red, c.green, c.blue))
}
// TODO: Don't fail silently, maybe make a best effort to convert a
// gradient to a single solid color?
Paint::Gradient(_) => None,
// TODO: Don't fail silently, maybe just error in PDF/UA mode?
Paint::Tiling(_) => None,
});
let thickness = stroke.thickness.custom();
TextDecoStroke { color, thickness }
}
fn push_stack(
gc: &mut GlobalContext,
elem: &Content,
@ -350,7 +348,7 @@ pub fn handle_end(
return Ok(());
}
if gc.tags.text_attrs.pop_deco(loc) {
if gc.tags.text_attrs.pop(loc) {
return Ok(());
}
@ -640,7 +638,7 @@ pub fn text<'a, 'b>(
return TagHandle { surface, started: false };
}
let attrs = gc.tags.text_attrs.resolve(text.size);
let attrs = gc.tags.text_attrs.resolve(text);
// Marked content
let lang = gc.tags.try_set_lang(text.lang);

View File

@ -0,0 +1,292 @@
use krilla::tagging::{LineHeight, NaiveRgbColor, Tag, TextDecorationType, kind};
use typst_library::diag::{SourceResult, bail};
use typst_library::foundations::{Content, Smart};
use typst_library::introspection::Location;
use typst_library::layout::{Abs, Length};
use typst_library::text::{Font, ScriptKind, TextItem, TextSize};
use typst_library::visualize::{Paint, Stroke};
use crate::PdfOptions;
use crate::util::AbsExt;
#[derive(Clone, Debug)]
pub struct TextAttrs {
/// Store the last resolved set of text attribute. The resolution isn't that
/// expensive, but for large bodies of text it is resolved quite often.
last_resolved: Option<(TextParams, ResolvedTextAttrs)>,
items: Vec<(Location, TextAttr)>,
}
impl TextAttrs {
pub const fn new() -> Self {
Self { last_resolved: None, items: Vec::new() }
}
pub fn push_script(
&mut self,
elem: &Content,
kind: ScriptKind,
baseline_shift: Smart<Length>,
lineheight: Smart<TextSize>,
) {
let val = Script { kind, baseline_shift, lineheight };
let loc = elem.location().unwrap();
self.push(loc, TextAttr::Script(val));
}
pub fn push_highlight(&mut self, elem: &Content, paint: Option<&Paint>) {
let color = match paint {
// TODO: don't fail silently
Some(paint) => color_from_paint(paint),
None => None,
};
let loc = elem.location().unwrap();
self.push(loc, TextAttr::Highlight(color));
}
pub fn push_deco(
&mut self,
options: &PdfOptions,
elem: &Content,
kind: TextDecoKind,
stroke: Smart<Stroke>,
) -> SourceResult<()> {
let stroke = TextDecoStroke::from(stroke);
let deco = TextDeco { kind, stroke };
// TODO: can overlapping tags break this?
// PDF can only represent one text decoration style at a time.
// If PDF/UA-1 is enforced throw an error.
if options.is_pdf_ua()
&& self
.items
.iter()
.filter_map(|(_, a)| a.as_deco())
.any(|d| d.kind != deco.kind)
{
let validator = options.standards.config.validator();
let validator = validator.as_str();
bail!(
elem.span(),
"{validator} error: cannot combine underline, overline, and or strike"
);
}
let loc = elem.location().unwrap();
self.push(loc, TextAttr::Deco(deco));
Ok(())
}
fn push(&mut self, loc: Location, attr: TextAttr) {
self.last_resolved = None;
self.items.push((loc, attr));
}
/// Returns true if a decoration was removed.
pub fn pop(&mut self, loc: Location) -> bool {
self.last_resolved = None;
// TODO: Ideally we would just check the top of the stack, can
// overlapping tags even happen for decorations?
if let Some(i) = self.items.iter().rposition(|(l, _)| *l == loc) {
self.items.remove(i);
return true;
}
false
}
pub fn resolve(&mut self, text: &TextItem) -> ResolvedTextAttrs {
let params = TextParams::new(text);
if let Some((prev_params, attrs)) = &self.last_resolved
&& prev_params == &params
{
return *attrs;
}
let attrs = resolve_attrs(&self.items, &text.font, text.size);
self.last_resolved = Some((params, attrs));
attrs
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
enum TextAttr {
Script(Script),
Highlight(Option<NaiveRgbColor>),
Deco(TextDeco),
}
impl TextAttr {
fn as_deco(&self) -> Option<&TextDeco> {
if let Self::Deco(v) = self { Some(v) } else { None }
}
}
/// Sub- or super-script.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Script {
kind: ScriptKind,
baseline_shift: Smart<Length>,
lineheight: Smart<TextSize>,
}
#[derive(Clone, Copy, Debug, PartialEq)]
struct TextDeco {
kind: TextDecoKind,
stroke: TextDecoStroke,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TextDecoKind {
Underline,
Overline,
Strike,
}
impl TextDecoKind {
fn to_krilla(self) -> TextDecorationType {
match self {
TextDecoKind::Underline => TextDecorationType::Underline,
TextDecoKind::Overline => TextDecorationType::Overline,
TextDecoKind::Strike => TextDecorationType::LineThrough,
}
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq)]
struct TextDecoStroke {
color: Option<NaiveRgbColor>,
thickness: Option<Length>,
}
impl TextDecoStroke {
fn from(stroke: Smart<Stroke>) -> Self {
let Smart::Custom(stroke) = stroke else {
return TextDecoStroke::default();
};
let color = match stroke.paint.custom() {
// TODO: don't fail silently
Some(paint) => color_from_paint(&paint),
None => None,
};
let thickness = stroke.thickness.custom();
TextDecoStroke { color, thickness }
}
}
fn color_from_paint(paint: &Paint) -> Option<NaiveRgbColor> {
match paint {
Paint::Solid(color) => {
let c = color.to_rgb();
Some(NaiveRgbColor::new(c.red, c.green, c.blue))
}
// TODO: Don't fail silently, maybe make a best effort to convert a
// gradient to a single solid color?
Paint::Gradient(_) => None,
// TODO: Don't fail silently, maybe just error in PDF/UA mode?
Paint::Tiling(_) => None,
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ResolvedTextAttrs {
script: Option<ResolvedScript>,
background: Option<Option<NaiveRgbColor>>,
deco: Option<ResolvedTextDeco>,
}
impl ResolvedTextAttrs {
pub const EMPTY: Self = Self { script: None, background: None, deco: None };
pub fn is_empty(&self) -> bool {
self == &Self::EMPTY
}
pub fn all_resolved(&self) -> bool {
self.script.is_some() && self.background.is_some() && self.deco.is_some()
}
pub fn to_tag(self) -> Tag<kind::Span> {
Tag::Span
.with_line_height(self.script.map(|s| s.lineheight))
.with_baseline_shift(self.script.map(|s| s.baseline_shift))
.with_background_color(self.background.flatten())
.with_text_decoration_type(self.deco.map(|d| d.kind.to_krilla()))
.with_text_decoration_color(self.deco.and_then(|d| d.color))
.with_text_decoration_thickness(self.deco.and_then(|d| d.thickness))
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ResolvedScript {
baseline_shift: f32,
lineheight: LineHeight,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ResolvedTextDeco {
kind: TextDecoKind,
color: Option<NaiveRgbColor>,
thickness: Option<f32>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct TextParams {
font_index: u32,
size: Abs,
}
impl TextParams {
fn new(text: &TextItem) -> TextParams {
TextParams {
// Comparing font indices is enough.
font_index: text.font.index(),
size: text.size,
}
}
}
fn resolve_attrs(
items: &[(Location, TextAttr)],
font: &Font,
size: Abs,
) -> ResolvedTextAttrs {
let mut attrs = ResolvedTextAttrs::EMPTY;
for (_, attr) in items.iter().rev() {
match *attr {
TextAttr::Script(script) => {
// TODO: The `typographic` setting is ignored for now.
// Is it better to be acurate regarding the layouting, and
// thus don't write any baseline shift and lineheight when
// a typographic sub/super script glyph is used? Or should
// we always write the shift so the sub/super script can be
// picked up by AT?
let script_metrics = script.kind.read_metrics(font.metrics());
// NOTE: The user provided baseline_shift needs to be inverted.
let baseline_shift = (script.baseline_shift.map(|s| -s.at(size)))
.unwrap_or_else(|| script_metrics.vertical_offset.at(size));
let lineheight = (script.lineheight.map(|s| s.0.at(size)))
.unwrap_or_else(|| script_metrics.height.at(size));
attrs.script.get_or_insert_with(|| ResolvedScript {
baseline_shift: baseline_shift.to_f32(),
lineheight: LineHeight::Custom(lineheight.to_f32()),
});
}
TextAttr::Highlight(color) => {
attrs.background.get_or_insert(color);
}
TextAttr::Deco(TextDeco { kind, stroke }) => {
attrs.deco.get_or_insert_with(|| {
let thickness = stroke.thickness.map(|t| t.at(size).to_f32());
ResolvedTextDeco { kind, color: stroke.color, thickness }
});
}
}
if attrs.all_resolved() {
break;
}
}
attrs
}

View File

@ -0,0 +1,9 @@
- Tag: P
/K:
- Content: page=0 mcid=0
- Tag: Span
/BackgroundColor: #fffd11
/K:
- Content: page=0 mcid=1
- Content: page=0 mcid=2
- Content: page=0 mcid=3

View File

@ -0,0 +1,13 @@
- Tag: P
/K:
- Tag: Span
/BaselineShift: -2.500
/LineHeight: 6.000
/K:
- Content: page=0 mcid=0
- Content: page=0 mcid=1
- Tag: Span
/BaselineShift: 9.500
/LineHeight: 6.000
/K:
- Content: page=0 mcid=2

View File

@ -0,0 +1,14 @@
- Tag: P
/K:
- Content: page=0 mcid=0
- Tag: Span
/BaselineShift: -0.750
/LineHeight: 6.000
/K:
- Content: page=0 mcid=1
- Content: page=0 mcid=2
- Tag: Span
/BaselineShift: -0.750
/LineHeight: 6.000
/K:
- Content: page=0 mcid=3

View File

@ -0,0 +1,10 @@
- Tag: P
/K:
- Content: page=0 mcid=0
- Tag: Span
/BaselineShift: 3.500
/LineHeight: 6.000
/K:
- Content: page=0 mcid=1
- Content: page=0 mcid=2
- Content: page=0 mcid=3

View File

@ -16,9 +16,9 @@ blue underlined text
--- deco-tags-different-stroke-thickness pdftags ---
#show: underline.with(stroke: 2pt)
red underlined text
thick underlined
#show: underline.with(stroke: 1pt)
blue underlined text
thin underlined
--- deco-tags-different-type pdftags ---
#underline[underlined]\
@ -30,3 +30,20 @@ blue underlined text
// Error: 2-16 PDF/UA1 error: cannot combine underline, overline, and or strike
#show: overline
text with a bunch of lines
--- deco-tags-highlight-basic pdftags ---
A #highlight[highlighted] alksjdflk asdjlkfj alskdj word.
--- deco-tags-subscript-basic pdftags ---
CO#sub[2] emissions.
A2#sub[hex]
--- deco-tags-superscript-basic pdftags ---
CI#super[-] has a negative charge.
--- deco-tags-script-custom-baseline pdftags ---
// NOTE: the baseline shift values attribute is inverted.
#set sub(baseline: 2.5pt)
#set super(baseline: -9.5pt)
#sub[sub]
#super[super]