From c58766440cf8daafb9596ae623eaba83ee583640 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 16 Jul 2025 10:17:42 +0200 Subject: [PATCH] Support intra-doc links in HTML (#6602) --- crates/typst-html/src/document.rs | 51 ++- crates/typst-html/src/dom.rs | 25 +- crates/typst-html/src/encode.rs | 24 +- crates/typst-html/src/lib.rs | 14 + crates/typst-html/src/link.rs | 290 ++++++++++++++++++ crates/typst-html/src/rules.rs | 43 ++- crates/typst-layout/src/rules.rs | 14 +- .../src/introspection/introspector.rs | 25 +- crates/typst-library/src/model/link.rs | 71 ++++- crates/typst-library/src/model/reference.rs | 14 +- crates/typst-svg/src/image.rs | 2 +- crates/typst-svg/src/lib.rs | 95 +++++- crates/typst-svg/src/paint.rs | 2 +- crates/typst-svg/src/shape.rs | 2 +- crates/typst-svg/src/text.rs | 2 +- crates/typst-utils/src/pico.rs | 17 + tests/ref/html/link-html-frame-ref.html | 11 + tests/ref/html/link-html-frame.html | 15 + tests/ref/html/link-html-here.html | 10 + tests/ref/html/link-html-id-attach.html | 29 ++ tests/ref/html/link-html-id-existing.html | 11 + .../html/link-html-label-disambiguation.html | 31 ++ tests/ref/html/link-html-nested-empty.html | 12 + tests/ref/html/ref-basic.html | 13 + tests/suite/model/link.typ | 111 +++++++ tests/suite/model/ref.typ | 2 +- 26 files changed, 861 insertions(+), 75 deletions(-) create mode 100644 crates/typst-html/src/link.rs create mode 100644 tests/ref/html/link-html-frame-ref.html create mode 100644 tests/ref/html/link-html-frame.html create mode 100644 tests/ref/html/link-html-here.html create mode 100644 tests/ref/html/link-html-id-attach.html create mode 100644 tests/ref/html/link-html-id-existing.html create mode 100644 tests/ref/html/link-html-label-disambiguation.html create mode 100644 tests/ref/html/link-html-nested-empty.html create mode 100644 tests/ref/html/ref-basic.html diff --git a/crates/typst-html/src/document.rs b/crates/typst-html/src/document.rs index 9f0124e57..c581df05f 100644 --- a/crates/typst-html/src/document.rs +++ b/crates/typst-html/src/document.rs @@ -1,10 +1,13 @@ +use std::collections::HashSet; use std::num::NonZeroUsize; use comemo::{Tracked, TrackedMut}; use typst_library::diag::{bail, SourceResult}; use typst_library::engine::{Engine, Route, Sink, Traced}; use typst_library::foundations::{Content, StyleChain}; -use typst_library::introspection::{Introspector, IntrospectorBuilder, Locator}; +use typst_library::introspection::{ + Introspector, IntrospectorBuilder, Location, Locator, +}; use typst_library::layout::{Point, Position, Transform}; use typst_library::model::DocumentInfo; use typst_library::routines::{Arenas, RealizationKind, Routines}; @@ -83,42 +86,56 @@ fn html_document_impl( &mut locator, children.iter().copied(), )?; - let introspector = introspect_html(&output); - let root = root_element(output, &info)?; + + let mut link_targets = HashSet::new(); + let mut introspector = introspect_html(&output, &mut link_targets); + let mut root = root_element(output, &info)?; + crate::link::identify_link_targets(&mut root, &mut introspector, link_targets); Ok(HtmlDocument { info, root, introspector }) } /// Introspects HTML nodes. #[typst_macros::time(name = "introspect html")] -fn introspect_html(output: &[HtmlNode]) -> Introspector { +fn introspect_html( + output: &[HtmlNode], + link_targets: &mut HashSet, +) -> Introspector { fn discover( builder: &mut IntrospectorBuilder, sink: &mut Vec<(Content, Position)>, + link_targets: &mut HashSet, nodes: &[HtmlNode], ) { for node in nodes { match node { - HtmlNode::Tag(tag) => builder.discover_in_tag( - sink, - tag, - Position { page: NonZeroUsize::ONE, point: Point::zero() }, - ), + HtmlNode::Tag(tag) => { + builder.discover_in_tag( + sink, + tag, + Position { page: NonZeroUsize::ONE, point: Point::zero() }, + ); + } HtmlNode::Text(_, _) => {} - HtmlNode::Element(elem) => discover(builder, sink, &elem.children), - HtmlNode::Frame(frame) => builder.discover_in_frame( - sink, - &frame.inner, - NonZeroUsize::ONE, - Transform::identity(), - ), + HtmlNode::Element(elem) => { + discover(builder, sink, link_targets, &elem.children) + } + HtmlNode::Frame(frame) => { + builder.discover_in_frame( + sink, + &frame.inner, + NonZeroUsize::ONE, + Transform::identity(), + ); + crate::link::introspect_frame_links(&frame.inner, link_targets); + } } } } let mut elems = Vec::new(); let mut builder = IntrospectorBuilder::new(); - discover(&mut builder, &mut elems, output); + discover(&mut builder, &mut elems, link_targets, output); builder.finalize(elems) } diff --git a/crates/typst-html/src/dom.rs b/crates/typst-html/src/dom.rs index d7287d42d..e7f5fcbcd 100644 --- a/crates/typst-html/src/dom.rs +++ b/crates/typst-html/src/dom.rs @@ -4,7 +4,7 @@ use ecow::{EcoString, EcoVec}; use typst_library::diag::{bail, HintedStrResult, StrResult}; use typst_library::foundations::{cast, Dict, Repr, Str, StyleChain}; use typst_library::introspection::{Introspector, Tag}; -use typst_library::layout::{Abs, Frame}; +use typst_library::layout::{Abs, Frame, Point}; use typst_library::model::DocumentInfo; use typst_library::text::TextElem; use typst_syntax::Span; @@ -172,10 +172,20 @@ impl HtmlAttrs { Self::default() } - /// Add an attribute. + /// Adds an attribute. pub fn push(&mut self, attr: HtmlAttr, value: impl Into) { self.0.push((attr, value.into())); } + + /// Adds an attribute to the start of the list. + pub fn push_front(&mut self, attr: HtmlAttr, value: impl Into) { + self.0.insert(0, (attr, value.into())); + } + + /// Finds an attribute value. + pub fn get(&self, attr: HtmlAttr) -> Option<&EcoString> { + self.0.iter().find(|&&(k, _)| k == attr).map(|(_, v)| v) + } } cast! { @@ -279,11 +289,20 @@ pub struct HtmlFrame { /// frame with em units to make text in and outside of the frame sized /// consistently. pub text_size: Abs, + /// An ID to assign to the SVG itself. + pub id: Option, + /// IDs to assign to destination jump points within the SVG. + pub link_points: Vec<(Point, EcoString)>, } impl HtmlFrame { /// Wraps a laid-out frame. pub fn new(inner: Frame, styles: StyleChain) -> Self { - Self { inner, text_size: styles.resolve(TextElem::size) } + Self { + inner, + text_size: styles.resolve(TextElem::size), + id: None, + link_points: vec![], + } } } diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs index 4447186b8..02c3f16de 100644 --- a/crates/typst-html/src/encode.rs +++ b/crates/typst-html/src/encode.rs @@ -2,6 +2,7 @@ use std::fmt::Write; use typst_library::diag::{bail, At, SourceResult, StrResult}; use typst_library::foundations::Repr; +use typst_library::introspection::Introspector; use typst_syntax::Span; use crate::{ @@ -10,7 +11,7 @@ use crate::{ /// Encodes an HTML document into a string. pub fn html(document: &HtmlDocument) -> SourceResult { - let mut w = Writer { pretty: true, ..Writer::default() }; + let mut w = Writer::new(&document.introspector, true); w.buf.push_str(""); write_indent(&mut w); write_element(&mut w, &document.root)?; @@ -20,16 +21,25 @@ pub fn html(document: &HtmlDocument) -> SourceResult { Ok(w.buf) } -#[derive(Default)] -struct Writer { +/// Encodes HTML. +struct Writer<'a> { /// The output buffer. buf: String, /// The current indentation level level: usize, + /// The document's introspector. + introspector: &'a Introspector, /// Whether pretty printing is enabled. pretty: bool, } +impl<'a> Writer<'a> { + /// Creates a new writer. + fn new(introspector: &'a Introspector, pretty: bool) -> Self { + Self { buf: String::new(), level: 0, introspector, pretty } + } +} + /// Writes a newline and indent, if pretty printing is enabled. fn write_indent(w: &mut Writer) { if w.pretty { @@ -306,6 +316,12 @@ fn write_escape(w: &mut Writer, c: char) -> StrResult<()> { /// Encode a laid out frame into the writer. fn write_frame(w: &mut Writer, frame: &HtmlFrame) { - let svg = typst_svg::svg_html_frame(&frame.inner, frame.text_size); + let svg = typst_svg::svg_html_frame( + &frame.inner, + frame.text_size, + frame.id.as_deref(), + &frame.link_points, + w.introspector, + ); w.buf.push_str(&svg); } diff --git a/crates/typst-html/src/lib.rs b/crates/typst-html/src/lib.rs index d7b29dbbc..42b3c5d6f 100644 --- a/crates/typst-html/src/lib.rs +++ b/crates/typst-html/src/lib.rs @@ -8,6 +8,7 @@ mod document; mod dom; mod encode; mod fragment; +mod link; mod rules; mod tag; mod typed; @@ -79,6 +80,19 @@ impl HtmlElem { self } + /// Adds the attribute to the element if value is not `None`. + pub fn with_optional_attr( + self, + attr: HtmlAttr, + value: Option>, + ) -> Self { + if let Some(value) = value { + self.with_attr(attr, value) + } else { + self + } + } + /// Adds CSS styles to an element. fn with_styles(self, properties: css::Properties) -> Self { if let Some(value) = properties.into_inline_styles() { diff --git a/crates/typst-html/src/link.rs b/crates/typst-html/src/link.rs new file mode 100644 index 000000000..0fcbe906a --- /dev/null +++ b/crates/typst-html/src/link.rs @@ -0,0 +1,290 @@ +use std::collections::{HashMap, HashSet, VecDeque}; + +use comemo::Track; +use ecow::{eco_format, EcoString}; +use typst_library::foundations::{Label, NativeElement}; +use typst_library::introspection::{Introspector, Location, Tag}; +use typst_library::layout::{Frame, FrameItem, Point}; +use typst_library::model::{Destination, LinkElem}; +use typst_utils::PicoStr; + +use crate::{attr, tag, HtmlElement, HtmlNode}; + +/// Searches for links within a frame. +/// +/// If all links are created via `LinkElem` in the future, this can be removed +/// in favor of the query in `identify_link_targets`. For the time being, some +/// links are created without existence of a `LinkElem`, so this is +/// unfortunately necessary. +pub fn introspect_frame_links(frame: &Frame, targets: &mut HashSet) { + for (_, item) in frame.items() { + match item { + FrameItem::Link(Destination::Location(loc), _) => { + targets.insert(*loc); + } + FrameItem::Group(group) => introspect_frame_links(&group.frame, targets), + _ => {} + } + } +} + +/// Attaches IDs to nodes produced by link targets to make them linkable. +/// +/// May produce ``s for link targets that turned into text nodes or no +/// nodes at all. See the [`LinkElem`] documentation for more details. +pub fn identify_link_targets( + root: &mut HtmlElement, + introspector: &mut Introspector, + mut targets: HashSet, +) { + // Query for all links with an intra-doc (i.e. `Location`) destination to + // know what needs IDs. + targets.extend( + introspector + .query(&LinkElem::ELEM.select()) + .iter() + .map(|elem| elem.to_packed::().unwrap()) + .filter_map(|elem| match elem.dest.resolve(introspector.track()) { + Ok(Destination::Location(loc)) => Some(loc), + _ => None, + }), + ); + + if targets.is_empty() { + // Nothing to do. + return; + } + + // Assign IDs to all link targets. + let mut work = Work::new(); + traverse( + &mut work, + &targets, + &mut Identificator::new(introspector), + &mut root.children, + ); + + // Add the mapping from locations to IDs to the introspector to make it + // available to links in the next iteration. + introspector.set_html_ids(work.ids); +} + +/// Traverses a list of nodes. +fn traverse( + work: &mut Work, + targets: &HashSet, + identificator: &mut Identificator<'_>, + nodes: &mut Vec, +) { + let mut i = 0; + while i < nodes.len() { + let node = &mut nodes[i]; + match node { + // When visiting a start tag, we check whether the element needs an + // ID and if so, add it to the queue, so that its first child node + // receives an ID. + HtmlNode::Tag(Tag::Start(elem)) => { + let loc = elem.location().unwrap(); + if targets.contains(&loc) { + work.enqueue(loc, elem.label()); + } + } + + // When we reach an end tag, we check whether it closes an element + // that is still in our queue. If so, that means the element + // produced no nodes and we need to insert an empty span. + HtmlNode::Tag(Tag::End(loc, _)) => { + work.remove(*loc, |label| { + let mut element = HtmlElement::new(tag::span); + let id = identificator.assign(&mut element, label); + nodes.insert(i + 1, HtmlNode::Element(element)); + id + }); + } + + // When visiting an element and the queue is non-empty, we assign an + // ID. Then, we traverse its children. + HtmlNode::Element(element) => { + work.drain(|label| identificator.assign(element, label)); + traverse(work, targets, identificator, &mut element.children); + } + + // When visiting text and the queue is non-empty, we generate a span + // and assign an ID. + HtmlNode::Text(..) => { + work.drain(|label| { + let mut element = + HtmlElement::new(tag::span).with_children(vec![node.clone()]); + let id = identificator.assign(&mut element, label); + *node = HtmlNode::Element(element); + id + }); + } + + // When visiting a frame and the queue is non-empty, we assign an + // ID to it (will be added to the resulting SVG element). + HtmlNode::Frame(frame) => { + work.drain(|label| { + frame.id.get_or_insert_with(|| identificator.identify(label)).clone() + }); + traverse_frame( + work, + targets, + identificator, + &frame.inner, + &mut frame.link_points, + ); + } + } + + i += 1; + } +} + +/// Traverses a frame embedded in HTML. +fn traverse_frame( + work: &mut Work, + targets: &HashSet, + identificator: &mut Identificator<'_>, + frame: &Frame, + link_points: &mut Vec<(Point, EcoString)>, +) { + for (_, item) in frame.items() { + match item { + FrameItem::Tag(Tag::Start(elem)) => { + let loc = elem.location().unwrap(); + if targets.contains(&loc) { + let pos = identificator.introspector.position(loc).point; + let id = identificator.identify(elem.label()); + work.ids.insert(loc, id.clone()); + link_points.push((pos, id)); + } + } + FrameItem::Group(group) => { + traverse_frame(work, targets, identificator, &group.frame, link_points); + } + _ => {} + } + } +} + +/// Keeps track of the work to be done during ID generation. +struct Work { + /// The locations and labels of elements we need to assign an ID to right + /// now. + queue: VecDeque<(Location, Option