Support intra-doc links in HTML

This commit is contained in:
Laurenz 2025-07-15 12:19:21 +02:00
parent 733a2cda15
commit 83ee0c0e73
9 changed files with 303 additions and 22 deletions

View File

@ -83,8 +83,10 @@ fn html_document_impl(
&mut locator,
children.iter().copied(),
)?;
let introspector = introspect_html(&output);
let root = root_element(output, &info)?;
let mut introspector = introspect_html(&output);
let mut root = root_element(output, &info)?;
crate::link::identify_link_targets(&mut root, &mut introspector);
Ok(HtmlDocument { info, root, introspector })
}

View File

@ -289,11 +289,17 @@ pub struct HtmlFrame {
/// frame with em units to make text in and outside of the frame sized
/// consistently.
pub text_size: Abs,
/// An ID to assign to the SVG.
pub id: Option<EcoString>,
}
impl HtmlFrame {
/// Wraps a laid-out frame.
pub fn new(inner: Frame, styles: StyleChain) -> Self {
Self { inner, text_size: styles.resolve(TextElem::size) }
Self {
inner,
text_size: styles.resolve(TextElem::size),
id: None,
}
}
}

View File

@ -306,6 +306,7 @@ fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
/// Encode a laid out frame into the writer.
fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
let svg = typst_svg::svg_html_frame(&frame.inner, frame.text_size);
let svg =
typst_svg::svg_html_frame(&frame.inner, frame.text_size, frame.id.as_deref());
w.buf.push_str(&svg);
}

View File

@ -8,6 +8,7 @@ mod document;
mod dom;
mod encode;
mod fragment;
mod link;
mod rules;
mod tag;
mod typed;

View File

@ -0,0 +1,232 @@
use std::collections::{HashMap, HashSet, VecDeque};
use comemo::Track;
use ecow::{eco_format, EcoString};
use typst_library::foundations::{Label, NativeElement};
use typst_library::introspection::{Introspector, Location, Tag};
use typst_library::model::{Destination, LinkElem};
use typst_utils::PicoStr;
use crate::{attr, tag, HtmlElement, HtmlNode};
/// Attaches IDs to nodes produced by link targets to make them linkable.
///
/// May produce `<span>`s for link targets that turned into text nodes or no
/// nodes at all. See the [`LinkElem`] documentation for more details.
pub fn identify_link_targets(root: &mut HtmlElement, introspector: &mut Introspector) {
// Query for all links with an intra-doc (i.e. `Location`) destination to
// know what needs IDs.
let targets = introspector
.query(&LinkElem::ELEM.select())
.iter()
.map(|elem| elem.to_packed::<LinkElem>().unwrap())
.filter_map(|elem| match elem.dest.resolve(introspector.track()) {
Ok(Destination::Location(loc)) => Some(loc),
_ => None,
})
.collect::<HashSet<_>>();
if targets.is_empty() {
// Nothing to do.
return;
}
// Assign IDs to all link targets.
let mut work = Work::new();
traverse(
&mut work,
&targets,
&mut Identificator::new(introspector),
&mut root.children,
);
// Add the mapping from locations to IDs to the introspector to make it
// available to links in the next iteration.
introspector.set_html_ids(work.ids);
}
/// Traverses a list of nodes.
fn traverse(
work: &mut Work,
targets: &HashSet<Location>,
identificator: &mut Identificator<'_>,
nodes: &mut Vec<HtmlNode>,
) {
let mut i = 0;
while i < nodes.len() {
let node = &mut nodes[i];
match node {
// When visiting a start tag, we check whether the element needs an
// ID and if so, add it to the queue, so that its first child node
// receives an ID.
HtmlNode::Tag(Tag::Start(elem)) => {
let loc = elem.location().unwrap();
if targets.contains(&loc) {
work.enqueue(loc, elem.label());
}
}
// When we reach an end tag, we check whether it closes an element
// that is still in our queue. If so, that means the element
// produced no nodes and we need to insert an empty span.
HtmlNode::Tag(Tag::End(loc, _)) => {
work.remove(*loc, |label| {
let mut element = HtmlElement::new(tag::span);
let id = identificator.assign(&mut element, label);
nodes.insert(i + 1, HtmlNode::Element(element));
id
});
}
// When visiting an element and the queue is non-empty, we assign an
// ID. Then, we traverse its children.
HtmlNode::Element(element) => {
work.drain(|label| identificator.assign(element, label));
traverse(work, targets, identificator, &mut element.children);
}
// When visiting text and the queue is non-empty, we generate a span
// and assign an ID.
HtmlNode::Text(..) => {
work.drain(|label| {
let mut element =
HtmlElement::new(tag::span).with_children(vec![node.clone()]);
let id = identificator.assign(&mut element, label);
*node = HtmlNode::Element(element);
id
});
}
// When visiting a frame and the queue is non-empty, we assign an
// ID to it (will be added to the resulting SVG element).
HtmlNode::Frame(frame) => {
work.drain(|label| {
frame.id.get_or_insert_with(|| identificator.identify(label)).clone()
});
}
}
i += 1;
}
}
/// Keeps track of the work to be done during ID generation.
struct Work {
/// The locations and labels of elements we need to assign an ID to right
/// now.
queue: VecDeque<(Location, Option<Label>)>,
/// The resulting mapping from element location's to HTML IDs.
ids: HashMap<Location, EcoString>,
}
impl Work {
/// Sets up.
fn new() -> Self {
Self { queue: VecDeque::new(), ids: HashMap::new() }
}
/// Marks the element with the given location and label as in need of an
/// ID. A subsequent call to `drain` will call `f`.
fn enqueue(&mut self, loc: Location, label: Option<Label>) {
self.queue.push_back((loc, label))
}
/// If one or multiple elements are in need of an ID, calls `f` to generate
/// an ID and apply it to the current node with `f`, and then establishes a
/// mapping from the elements' locations to that ID.
fn drain(&mut self, f: impl FnOnce(Option<Label>) -> EcoString) {
if let Some(&(_, label)) = self.queue.front() {
let id = f(label);
for (loc, _) in self.queue.drain(..) {
self.ids.insert(loc, id.clone());
}
}
}
/// Similar to `drain`, but only for a specific given location.
fn remove(&mut self, loc: Location, f: impl FnOnce(Option<Label>) -> EcoString) {
if let Some(i) = self.queue.iter().position(|&(l, _)| l == loc) {
let (_, label) = self.queue.remove(i).unwrap();
let id = f(label);
self.ids.insert(loc, id.clone());
}
}
}
/// Creates unique IDs for elements.
struct Identificator<'a> {
introspector: &'a Introspector,
loc_counter: usize,
label_counter: HashMap<Label, usize>,
}
impl<'a> Identificator<'a> {
/// Creates a new identificator.
fn new(introspector: &'a Introspector) -> Self {
Self {
introspector,
loc_counter: 0,
label_counter: HashMap::new(),
}
}
/// Assigns an ID to an element or reuses an existing ID.
fn assign(&mut self, element: &mut HtmlElement, label: Option<Label>) -> EcoString {
element.attrs.get(attr::id).cloned().unwrap_or_else(|| {
let id = self.identify(label);
element.attrs.push_front(attr::id, id.clone());
id
})
}
/// Generates an ID, potentially based on a label.
fn identify(&mut self, label: Option<Label>) -> EcoString {
if let Some(label) = label {
let resolved = label.resolve();
let text = resolved.as_str();
if can_use_label_as_id(text) {
if self.introspector.label_count(label) == 1 {
return text.into();
}
let counter = self.label_counter.entry(label).or_insert(0);
*counter += 1;
return disambiguate(self.introspector, text, counter);
}
}
self.loc_counter += 1;
disambiguate(self.introspector, "loc", &mut self.loc_counter)
}
}
/// Whether the label is both a valid CSS identifier and a valid URL fragment
/// for linking.
///
/// This is slightly more restrictive than HTML and CSS, but easier to
/// understand and explain.
fn can_use_label_as_id(label: &str) -> bool {
!label.is_empty()
&& label.chars().all(|c| c.is_alphanumeric() || matches!(c, '-' | '_'))
&& !label.starts_with(|c: char| c.is_numeric() || c == '-')
}
/// Disambiguates `text` with the suffix `-{counter}`, while ensuring that this
/// does not result in a collision with an existing label.
fn disambiguate(
introspector: &Introspector,
text: &str,
counter: &mut usize,
) -> EcoString {
loop {
let disambiguated = eco_format!("{text}-{counter}");
if PicoStr::get(&disambiguated)
.and_then(Label::new)
.is_some_and(|label| introspector.label_count(label) > 0)
{
*counter += 1;
} else {
break disambiguated;
}
}
}

View File

@ -1,7 +1,7 @@
use std::num::NonZeroUsize;
use ecow::{eco_format, EcoVec};
use typst_library::diag::warning;
use typst_library::diag::{warning, At};
use typst_library::foundations::{
Content, NativeElement, NativeRuleMap, ShowFn, Smart, StyleChain, Target,
};
@ -141,20 +141,33 @@ const TERMS_RULE: ShowFn<TermsElem> = |elem, _, styles| {
};
const LINK_RULE: ShowFn<LinkElem> = |elem, engine, _| {
let body = elem.body.clone();
Ok(if let LinkTarget::Dest(Destination::Url(url)) = &elem.dest {
HtmlElem::new(tag::a)
.with_attr(attr::href, url.clone().into_inner())
.with_body(Some(body))
.pack()
.spanned(elem.span())
} else {
engine.sink.warn(warning!(
elem.span(),
"non-URL links are not yet supported by HTML export"
));
body
})
let dest = elem.dest.resolve(engine.introspector).at(elem.span())?;
let href = match dest {
Destination::Url(url) => Some(url.clone().into_inner()),
Destination::Location(location) => {
let id = engine
.introspector
.html_id(location)
.cloned()
.ok_or("failed to determine link anchor")
.at(elem.span())?;
Some(eco_format!("#{id}"))
}
Destination::Position(_) => {
engine.sink.warn(warning!(
elem.span(),
"positional link was ignored during HTML export"
));
None
}
};
Ok(HtmlElem::new(tag::a)
.with_optional_attr(attr::href, href)
.with_body(Some(elem.body.clone()))
.pack()
.spanned(elem.span()))
};
const HEADING_RULE: ShowFn<HeadingElem> = |elem, engine, styles| {

View File

@ -4,7 +4,7 @@ use std::hash::Hash;
use std::num::NonZeroUsize;
use std::sync::RwLock;
use ecow::EcoVec;
use ecow::{EcoString, EcoVec};
use smallvec::SmallVec;
use typst_utils::NonZeroExt;
@ -35,6 +35,11 @@ pub struct Introspector {
/// Accelerates lookup of elements by label.
labels: MultiMap<Label, usize>,
/// Maps from element locations to assigned HTML IDs. This used to support
/// intra-doc links in HTML export. In paged export, is is simply left
/// empty and [`Self::html_id`] is not used.
html_ids: HashMap<Location, EcoString>,
/// Caches queries done on the introspector. This is important because
/// even if all top-level queries are distinct, they often have shared
/// subqueries. Example: Individual counter queries with `before` that
@ -51,6 +56,17 @@ impl Introspector {
self.elems.iter().map(|(c, _)| c)
}
/// Checks how many times a label exists.
pub fn label_count(&self, label: Label) -> usize {
self.labels.get(&label).len()
}
/// Enriches an existing introspector with HTML IDs, which were assigned
/// to the DOM in a post-processing step.
pub fn set_html_ids(&mut self, html_ids: HashMap<Location, EcoString>) {
self.html_ids = html_ids;
}
/// Retrieves the element with the given index.
#[track_caller]
fn get_by_idx(&self, idx: usize) -> &Content {
@ -268,6 +284,11 @@ impl Introspector {
self.page_supplements.get(page.get() - 1).cloned().unwrap_or_default()
}
/// Retrieves the ID to link to for this location in HTML export.
pub fn html_id(&self, location: Location) -> Option<&EcoString> {
self.html_ids.get(&location)
}
/// Try to find a location for an element with the given `key` hash
/// that is closest after the `anchor`.
///
@ -343,6 +364,7 @@ pub struct IntrospectorBuilder {
pub pages: usize,
pub page_numberings: Vec<Option<Numbering>>,
pub page_supplements: Vec<Content>,
pub html_ids: HashMap<Location, EcoString>,
seen: HashSet<Location>,
insertions: MultiMap<Location, Vec<Pair>>,
keys: MultiMap<u128, Location>,
@ -426,6 +448,7 @@ impl IntrospectorBuilder {
pages: self.pages,
page_numberings: self.page_numberings,
page_supplements: self.page_supplements,
html_ids: self.html_ids,
elems,
keys: self.keys,
locations: self.locations,

View File

@ -36,7 +36,7 @@ use crate::text::TextElem;
/// # Syntax
/// This function also has dedicated syntax: Text that starts with `http://` or
/// `https://` is automatically turned into a link.
#[elem]
#[elem(Locatable)]
pub struct LinkElem {
/// The destination the link points to.
///

View File

@ -47,12 +47,15 @@ pub fn svg_frame(frame: &Frame) -> String {
/// Export a frame into an SVG suitable for embedding into HTML.
#[typst_macros::time(name = "svg html frame")]
pub fn svg_html_frame(frame: &Frame, text_size: Abs) -> String {
pub fn svg_html_frame(frame: &Frame, text_size: Abs, id: Option<&str>) -> String {
let mut renderer = SVGRenderer::with_options(xmlwriter::Options {
indent: xmlwriter::Indent::None,
..Default::default()
});
renderer.write_header_with_custom_attrs(frame.size(), |xml| {
if let Some(id) = id {
xml.write_attribute("id", id);
}
xml.write_attribute("class", "typst-frame");
xml.write_attribute_fmt(
"style",