Support smartquotes in HTML export (#6710)

Co-authored-by: Malo <57839069+MDLC01@users.noreply.github.com>
This commit is contained in:
Laurenz 2025-08-06 14:32:39 +02:00 committed by GitHub
parent cfce90ec29
commit 298c293181
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 166 additions and 27 deletions

View File

@ -5,20 +5,24 @@ use typst_library::foundations::{Content, StyleChain, Target, TargetElem};
use typst_library::introspection::{SplitLocator, TagElem};
use typst_library::layout::{Abs, Axes, Region, Size};
use typst_library::routines::Pair;
use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem};
use typst_library::text::{
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
is_default_ignorable,
};
use crate::fragment::html_fragment;
use crate::fragment::{html_block_fragment, html_inline_fragment};
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag};
/// Converts realized content into HTML nodes.
pub fn convert_to_nodes<'a>(
engine: &mut Engine,
locator: &mut SplitLocator,
quoter: &mut SmartQuoter,
children: impl IntoIterator<Item = Pair<'a>>,
) -> SourceResult<EcoVec<HtmlNode>> {
let mut output = EcoVec::new();
for (child, styles) in children {
handle(engine, child, locator, styles, &mut output)?;
handle(engine, child, locator, styles, quoter, &mut output)?;
}
Ok(output)
}
@ -29,6 +33,7 @@ fn handle(
child: &Content,
locator: &mut SplitLocator,
styles: StyleChain,
quoter: &mut SmartQuoter,
output: &mut EcoVec<HtmlNode>,
) -> SourceResult<()> {
if let Some(elem) = child.to_packed::<TagElem>() {
@ -36,7 +41,22 @@ fn handle(
} else if let Some(elem) = child.to_packed::<HtmlElem>() {
let mut children = EcoVec::new();
if let Some(body) = elem.body.get_ref(styles) {
children = html_fragment(engine, body, locator.next(&elem.span()), styles)?;
if tag::is_block_by_default(elem.tag) {
children = html_block_fragment(
engine,
body,
locator.next(&elem.span()),
styles,
)?;
// Block-level elements reset the smart quoting state. This part
// is unfortunately untested as it's currently not possible to
// create inline-level content next to block-level content
// without a paragraph automatically appearing.
*quoter = SmartQuoter::new();
} else {
children = html_inline_fragment(engine, body, locator, quoter, styles)?;
}
}
let element = HtmlElement {
tag: elem.tag,
@ -57,10 +77,20 @@ fn handle(
} else if let Some(elem) = child.to_packed::<LinebreakElem>() {
output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
output.push(HtmlNode::text(
if elem.double.get(styles) { '"' } else { '\'' },
child.span(),
));
let double = elem.double.get(styles);
if elem.enabled.get(styles) {
let before = last_char(output);
let quotes = SmartQuotes::get(
elem.quotes.get_ref(styles),
styles.get(TextElem::lang),
styles.get(TextElem::region),
elem.alternative.get(styles),
);
let quote = quoter.quote(before, &quotes, double);
output.push(HtmlNode::text(quote, child.span()));
} else {
output.push(HtmlNode::text(if double { '"' } else { '\'' }, child.span()));
}
} else if let Some(elem) = child.to_packed::<FrameElem>() {
let locator = locator.next(&elem.span());
let style = TargetElem::target.set(Target::Paged).wrap();
@ -82,6 +112,20 @@ fn handle(
Ok(())
}
/// Returns the last non-default ignorable character from the passed nodes.
fn last_char(nodes: &[HtmlNode]) -> Option<char> {
for node in nodes.iter().rev() {
if let Some(c) = match node {
HtmlNode::Text(s, _) => s.chars().rev().find(|&c| !is_default_ignorable(c)),
HtmlNode::Element(e) => last_char(&e.children),
_ => None,
} {
return Some(c);
}
}
None
}
/// Checks whether the given element is an inline-level HTML element.
pub fn is_inline(elem: &Content) -> bool {
elem.to_packed::<HtmlElem>()

View File

@ -13,6 +13,7 @@ use typst_library::introspection::{
use typst_library::layout::{Point, Position, Transform};
use typst_library::model::DocumentInfo;
use typst_library::routines::{Arenas, RealizationKind, Routines};
use typst_library::text::SmartQuoter;
use typst_syntax::Span;
use typst_utils::NonZeroExt;
@ -85,6 +86,7 @@ fn html_document_impl(
let output = crate::convert::convert_to_nodes(
&mut engine,
&mut locator,
&mut SmartQuoter::new(),
children.iter().copied(),
)?;

View File

@ -3,22 +3,24 @@ use ecow::EcoVec;
use typst_library::diag::{At, SourceResult};
use typst_library::engine::{Engine, Route, Sink, Traced};
use typst_library::foundations::{Content, StyleChain};
use typst_library::introspection::{Introspector, Locator, LocatorLink};
use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator};
use typst_library::World;
use typst_library::routines::{Arenas, FragmentKind, RealizationKind, Routines};
use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines};
use typst_library::text::SmartQuoter;
use crate::HtmlNode;
/// Produce HTML nodes from content.
#[typst_macros::time(name = "html fragment")]
pub fn html_fragment(
/// Produces HTML nodes from content contained in an HTML element that is
/// block-level by default.
#[typst_macros::time(name = "html block fragment")]
pub fn html_block_fragment(
engine: &mut Engine,
content: &Content,
locator: Locator,
styles: StyleChain,
) -> SourceResult<EcoVec<HtmlNode>> {
html_fragment_impl(
html_block_fragment_impl(
engine.routines,
engine.world,
engine.introspector,
@ -34,7 +36,7 @@ pub fn html_fragment(
/// The cached, internal implementation of [`html_fragment`].
#[comemo::memoize]
#[allow(clippy::too_many_arguments)]
fn html_fragment_impl(
fn html_block_fragment_impl(
routines: &Routines,
world: Tracked<dyn World + '_>,
introspector: Tracked<Introspector>,
@ -59,19 +61,65 @@ fn html_fragment_impl(
engine.route.check_html_depth().at(content.span())?;
let arenas = Arenas::default();
let children = (engine.routines.realize)(
// No need to know about the `FragmentKind` because we handle both
// uniformly.
let children = realize_fragment(&mut engine, &mut locator, &arenas, content, styles)?;
crate::convert::convert_to_nodes(
&mut engine,
&mut locator,
&mut SmartQuoter::new(),
children.iter().copied(),
)
}
/// Produces HTML nodes from content contained in an HTML element that is
/// inline-level by default.
///
/// The difference to block-level content is that inline-level content has
/// shared smartquoting state with surrounding inline-level content. This
/// requires mutable state, which is at odds with memoization. However, the
/// caching granularity would be unnecessarily high anyway if every single
/// fragment was cached, so this works out pretty well together.
#[typst_macros::time(name = "html inline fragment")]
pub fn html_inline_fragment(
engine: &mut Engine,
content: &Content,
locator: &mut SplitLocator,
quoter: &mut SmartQuoter,
styles: StyleChain,
) -> SourceResult<EcoVec<HtmlNode>> {
engine.route.increase();
engine.route.check_html_depth().at(content.span())?;
let arenas = Arenas::default();
let children = realize_fragment(engine, locator, &arenas, content, styles)?;
let result = crate::convert::convert_to_nodes(
engine,
locator,
quoter,
children.iter().copied(),
);
engine.route.decrease();
result
}
/// Realizes the body of an HTML fragment.
fn realize_fragment<'a>(
engine: &mut Engine,
locator: &mut SplitLocator,
arenas: &'a Arenas,
content: &'a Content,
styles: StyleChain<'a>,
) -> SourceResult<Vec<Pair<'a>>> {
(engine.routines.realize)(
RealizationKind::HtmlFragment {
// We ignore the `FragmentKind` because we handle both uniformly.
kind: &mut FragmentKind::Block,
is_inline: crate::convert::is_inline,
},
&mut engine,
&mut locator,
&arenas,
engine,
locator,
arenas,
content,
styles,
)?;
crate::convert::convert_to_nodes(&mut engine, &mut locator, children.iter().copied())
)
}

View File

@ -6,8 +6,8 @@
</head>
<body>
<h2>Heading is no paragraph</h2>
<p>I'm a paragraph.</p>
<div>I'm not.</div>
<p>Im a paragraph.</p>
<div>Im not.</div>
<div>
<p>We are two.</p>
<p>So we are paragraphs.</p>

View File

@ -5,6 +5,6 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<p>When you said that “he surely meant that she intended to say “I'm sorry””, I was quite confused.</p>
<p>When you said that “he surely meant that she intended to say “Im sorry””, I was quite confused.</p>
</body>
</html>

View File

@ -0,0 +1,13 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<p>Applies across <span>“inline-level</span> elements”.</p>
<p>Does not apply across</p>
<div>“block-level</div>
<p>elements“.</p>
</body>
</html>

View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<p>When you said <em>that “he</em> surely meant that she intended to say “Im sorry””, I was quite confused.</p>
<p><span style="display: inline-block">box</span></p>
</body>
</html>

View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<p>When you said that “he surely meant that she intended to say “Im sorry””, I was quite confused.</p>
<p><span style="display: inline-block;">box</span></p>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View File

@ -113,6 +113,16 @@ Some people's thought on this would be #[#set smartquote(enabled: false); "stran
"'test' statement" \
"statement 'test'"
--- smartquote-nesting-twice render html ---
When you said _that "he_ surely meant that 'she intended to say "I'm sorry"'", I was quite confused.
'#box[box]'
--- smartquote-inline-block html ---
Applies across #html.span["inline-level] elements".
Does not apply across #html.div["block-level] elements".
--- smartquote-with-embedding-chars ---
#set text(lang: "fr")
"#"\u{202A}"bonjour#"\u{202C}"" \