From 298c293181a24c6ba7325820dccefb3611bfeda3 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 6 Aug 2025 14:32:39 +0200 Subject: [PATCH] Support smartquotes in HTML export (#6710) Co-authored-by: Malo <57839069+MDLC01@users.noreply.github.com> --- crates/typst-html/src/convert.rs | 60 ++++++++++++-- crates/typst-html/src/document.rs | 2 + crates/typst-html/src/fragment.rs | 80 +++++++++++++++---- tests/ref/html/par-semantic-html.html | 4 +- tests/ref/html/quote-nesting-html.html | 2 +- tests/ref/html/smartquote-inline-block.html | 13 +++ tests/ref/html/smartquote-nesting-twice.html | 11 +++ tests/ref/html/smartquotes-html.html | 11 +++ tests/ref/smartquote-nesting-twice.png | Bin 0 -> 2228 bytes tests/suite/text/smartquote.typ | 10 +++ 10 files changed, 166 insertions(+), 27 deletions(-) create mode 100644 tests/ref/html/smartquote-inline-block.html create mode 100644 tests/ref/html/smartquote-nesting-twice.html create mode 100644 tests/ref/html/smartquotes-html.html create mode 100644 tests/ref/smartquote-nesting-twice.png diff --git a/crates/typst-html/src/convert.rs b/crates/typst-html/src/convert.rs index 65a4dabab..def5b0431 100644 --- a/crates/typst-html/src/convert.rs +++ b/crates/typst-html/src/convert.rs @@ -5,20 +5,24 @@ use typst_library::foundations::{Content, StyleChain, Target, TargetElem}; use typst_library::introspection::{SplitLocator, TagElem}; use typst_library::layout::{Abs, Axes, Region, Size}; use typst_library::routines::Pair; -use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem}; +use typst_library::text::{ + LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem, + is_default_ignorable, +}; -use crate::fragment::html_fragment; +use crate::fragment::{html_block_fragment, html_inline_fragment}; use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag}; /// Converts realized content into HTML nodes. pub fn convert_to_nodes<'a>( engine: &mut Engine, locator: &mut SplitLocator, + quoter: &mut SmartQuoter, children: impl IntoIterator>, ) -> SourceResult> { let mut output = EcoVec::new(); for (child, styles) in children { - handle(engine, child, locator, styles, &mut output)?; + handle(engine, child, locator, styles, quoter, &mut output)?; } Ok(output) } @@ -29,6 +33,7 @@ fn handle( child: &Content, locator: &mut SplitLocator, styles: StyleChain, + quoter: &mut SmartQuoter, output: &mut EcoVec, ) -> SourceResult<()> { if let Some(elem) = child.to_packed::() { @@ -36,7 +41,22 @@ fn handle( } else if let Some(elem) = child.to_packed::() { let mut children = EcoVec::new(); if let Some(body) = elem.body.get_ref(styles) { - children = html_fragment(engine, body, locator.next(&elem.span()), styles)?; + if tag::is_block_by_default(elem.tag) { + children = html_block_fragment( + engine, + body, + locator.next(&elem.span()), + styles, + )?; + + // Block-level elements reset the smart quoting state. This part + // is unfortunately untested as it's currently not possible to + // create inline-level content next to block-level content + // without a paragraph automatically appearing. + *quoter = SmartQuoter::new(); + } else { + children = html_inline_fragment(engine, body, locator, quoter, styles)?; + } } let element = HtmlElement { tag: elem.tag, @@ -57,10 +77,20 @@ fn handle( } else if let Some(elem) = child.to_packed::() { output.push(HtmlElement::new(tag::br).spanned(elem.span()).into()); } else if let Some(elem) = child.to_packed::() { - output.push(HtmlNode::text( - if elem.double.get(styles) { '"' } else { '\'' }, - child.span(), - )); + let double = elem.double.get(styles); + if elem.enabled.get(styles) { + let before = last_char(output); + let quotes = SmartQuotes::get( + elem.quotes.get_ref(styles), + styles.get(TextElem::lang), + styles.get(TextElem::region), + elem.alternative.get(styles), + ); + let quote = quoter.quote(before, "es, double); + output.push(HtmlNode::text(quote, child.span())); + } else { + output.push(HtmlNode::text(if double { '"' } else { '\'' }, child.span())); + } } else if let Some(elem) = child.to_packed::() { let locator = locator.next(&elem.span()); let style = TargetElem::target.set(Target::Paged).wrap(); @@ -82,6 +112,20 @@ fn handle( Ok(()) } +/// Returns the last non-default ignorable character from the passed nodes. +fn last_char(nodes: &[HtmlNode]) -> Option { + for node in nodes.iter().rev() { + if let Some(c) = match node { + HtmlNode::Text(s, _) => s.chars().rev().find(|&c| !is_default_ignorable(c)), + HtmlNode::Element(e) => last_char(&e.children), + _ => None, + } { + return Some(c); + } + } + None +} + /// Checks whether the given element is an inline-level HTML element. pub fn is_inline(elem: &Content) -> bool { elem.to_packed::() diff --git a/crates/typst-html/src/document.rs b/crates/typst-html/src/document.rs index d5ba536ca..1dc85572c 100644 --- a/crates/typst-html/src/document.rs +++ b/crates/typst-html/src/document.rs @@ -13,6 +13,7 @@ use typst_library::introspection::{ use typst_library::layout::{Point, Position, Transform}; use typst_library::model::DocumentInfo; use typst_library::routines::{Arenas, RealizationKind, Routines}; +use typst_library::text::SmartQuoter; use typst_syntax::Span; use typst_utils::NonZeroExt; @@ -85,6 +86,7 @@ fn html_document_impl( let output = crate::convert::convert_to_nodes( &mut engine, &mut locator, + &mut SmartQuoter::new(), children.iter().copied(), )?; diff --git a/crates/typst-html/src/fragment.rs b/crates/typst-html/src/fragment.rs index 73a5ed878..773c66faf 100644 --- a/crates/typst-html/src/fragment.rs +++ b/crates/typst-html/src/fragment.rs @@ -3,22 +3,24 @@ use ecow::EcoVec; use typst_library::diag::{At, SourceResult}; use typst_library::engine::{Engine, Route, Sink, Traced}; use typst_library::foundations::{Content, StyleChain}; -use typst_library::introspection::{Introspector, Locator, LocatorLink}; +use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator}; use typst_library::World; -use typst_library::routines::{Arenas, FragmentKind, RealizationKind, Routines}; +use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines}; +use typst_library::text::SmartQuoter; use crate::HtmlNode; -/// Produce HTML nodes from content. -#[typst_macros::time(name = "html fragment")] -pub fn html_fragment( +/// Produces HTML nodes from content contained in an HTML element that is +/// block-level by default. +#[typst_macros::time(name = "html block fragment")] +pub fn html_block_fragment( engine: &mut Engine, content: &Content, locator: Locator, styles: StyleChain, ) -> SourceResult> { - html_fragment_impl( + html_block_fragment_impl( engine.routines, engine.world, engine.introspector, @@ -34,7 +36,7 @@ pub fn html_fragment( /// The cached, internal implementation of [`html_fragment`]. #[comemo::memoize] #[allow(clippy::too_many_arguments)] -fn html_fragment_impl( +fn html_block_fragment_impl( routines: &Routines, world: Tracked, introspector: Tracked, @@ -59,19 +61,65 @@ fn html_fragment_impl( engine.route.check_html_depth().at(content.span())?; let arenas = Arenas::default(); - let children = (engine.routines.realize)( - // No need to know about the `FragmentKind` because we handle both - // uniformly. + let children = realize_fragment(&mut engine, &mut locator, &arenas, content, styles)?; + crate::convert::convert_to_nodes( + &mut engine, + &mut locator, + &mut SmartQuoter::new(), + children.iter().copied(), + ) +} + +/// Produces HTML nodes from content contained in an HTML element that is +/// inline-level by default. +/// +/// The difference to block-level content is that inline-level content has +/// shared smartquoting state with surrounding inline-level content. This +/// requires mutable state, which is at odds with memoization. However, the +/// caching granularity would be unnecessarily high anyway if every single +/// fragment was cached, so this works out pretty well together. +#[typst_macros::time(name = "html inline fragment")] +pub fn html_inline_fragment( + engine: &mut Engine, + content: &Content, + locator: &mut SplitLocator, + quoter: &mut SmartQuoter, + styles: StyleChain, +) -> SourceResult> { + engine.route.increase(); + engine.route.check_html_depth().at(content.span())?; + + let arenas = Arenas::default(); + let children = realize_fragment(engine, locator, &arenas, content, styles)?; + let result = crate::convert::convert_to_nodes( + engine, + locator, + quoter, + children.iter().copied(), + ); + + engine.route.decrease(); + result +} + +/// Realizes the body of an HTML fragment. +fn realize_fragment<'a>( + engine: &mut Engine, + locator: &mut SplitLocator, + arenas: &'a Arenas, + content: &'a Content, + styles: StyleChain<'a>, +) -> SourceResult>> { + (engine.routines.realize)( RealizationKind::HtmlFragment { + // We ignore the `FragmentKind` because we handle both uniformly. kind: &mut FragmentKind::Block, is_inline: crate::convert::is_inline, }, - &mut engine, - &mut locator, - &arenas, + engine, + locator, + arenas, content, styles, - )?; - - crate::convert::convert_to_nodes(&mut engine, &mut locator, children.iter().copied()) + ) } diff --git a/tests/ref/html/par-semantic-html.html b/tests/ref/html/par-semantic-html.html index 09c7d2fd0..2ae1f6779 100644 --- a/tests/ref/html/par-semantic-html.html +++ b/tests/ref/html/par-semantic-html.html @@ -6,8 +6,8 @@

Heading is no paragraph

-

I'm a paragraph.

-
I'm not.
+

I’m a paragraph.

+
I’m not.

We are two.

So we are paragraphs.

diff --git a/tests/ref/html/quote-nesting-html.html b/tests/ref/html/quote-nesting-html.html index 6b05a94a0..7c2f3d33a 100644 --- a/tests/ref/html/quote-nesting-html.html +++ b/tests/ref/html/quote-nesting-html.html @@ -5,6 +5,6 @@ -

When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused.

+

When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.

diff --git a/tests/ref/html/smartquote-inline-block.html b/tests/ref/html/smartquote-inline-block.html new file mode 100644 index 000000000..15afefc0a --- /dev/null +++ b/tests/ref/html/smartquote-inline-block.html @@ -0,0 +1,13 @@ + + + + + + + +

Applies across “inline-level elements”.

+

Does not apply across

+
“block-level
+

elements“.

+ + diff --git a/tests/ref/html/smartquote-nesting-twice.html b/tests/ref/html/smartquote-nesting-twice.html new file mode 100644 index 000000000..232e6a52d --- /dev/null +++ b/tests/ref/html/smartquote-nesting-twice.html @@ -0,0 +1,11 @@ + + + + + + + +

When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.

+

box

+ + diff --git a/tests/ref/html/smartquotes-html.html b/tests/ref/html/smartquotes-html.html new file mode 100644 index 000000000..7a9d083c6 --- /dev/null +++ b/tests/ref/html/smartquotes-html.html @@ -0,0 +1,11 @@ + + + + + + + +

When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.

+

box

+ + diff --git a/tests/ref/smartquote-nesting-twice.png b/tests/ref/smartquote-nesting-twice.png new file mode 100644 index 0000000000000000000000000000000000000000..69f366807379904672fa40d2127f260325f4b28a GIT binary patch literal 2228 zcmV;l2ut^gP)|IG_X{#s}L7^%dK>q$Q>by zR1QHvR0t@F90H<1G$5c{CM1MF^1RnF>f-K-=qT=V^UiPi;dxD-$@lZjJn!(9ddCPG zVI%yzgw<#JdAaH;0RUy5nu%$ycgbb-FMvHQhe|(`QoXolVI5Ph{tS@%;}iHVV@%Aa z-;K=~FM#_#gbjssCgIe;)DwWlp8!VIqqE>i4gjYc!-pLeFMzSCWy1!ojMTJM z4J_s4fngi2jl%&YgsoP8!Rf@YZ%9vgawFfn`b+hVO*yX5hlgfIsecYD0jCBm2Uiz# z_OP-OQ-C6uRNZwOc%LOoOabENR%#-$J|*Wsz4hZ;Z9T;59>q^s>T-N=Z3X~xgRmOi zFS^O|VQl@lG*bYFY8G~di|D|rm6-s{k3lo*!M$&=yV3UEFkF(?ZH-M*&PC#_dRKGW zN)LQp%hyQ!2j{n&iw$Pi0RXQm!;%)?gx=eez~xSR zUTr{x__woMhVSkgxB7PpYh|NMty^t8Q^P%gd-bmfcf3Ey) zm2NMmBPmurlI7p7@hIKpOY)~DK)2HMiqtP)lbX}w^Ef-42SA7mm9^Q&IJZ-m9&eZ& zF7)Ycx#-5eQ_PX|$3)3yJ%YPFCGQS!(2F@uT`O5p?c*oAQR9ZUmwt)5Z7slo9HeX1 zUZveK2+YP5mgm$qKZ5x|77q}b$--f^ znfk-J>pjYtIBWLjVkiF>987qIAt7VVQ&{h@NYS>I3!b&+b=L0bU)0cU+NM+{0ajyko|U+tI<(! z-0J@WxbHpyAhZZfTA5r9j68TrSU3y-2u#di9|Zv1{DTejUN6-QLEic!cWin zA}029jNtPzV5yep)E$}LpqxoZ(M!Um`v@rs-=nX3FLA6B9$2jq9ce zV~R8w-vUf2TTxupSGb4J%%lL4#kp&Wiv|}AJTk{&jR}0x3&VDH{py71wcJ_UT=5J% zevxkLDZ+-?WE{lnSLV?@ThDN=h`_X>RYZlfekKUoRYm+w1$?39gF)sAP-2J?@2tGF z25j7Toa(Q@Zx@+sP%N7weA?oQ$Hs^2M63_hhyEO|*|qMWvj;mx2V6A%ZV4l|9l|$C zfRLTNb?#9WKu$>TK%OZ5+ji2Dc$|1i5j_0Kr^%`$ZROoJJ|L;Ox3gS+65b9h7#|X^ z16DDF3WG*9-SGC5uka{r`gVH(z!}FRUXWDtdf>D5C&=%-mwQ$Lz#nzIAO#N{UlKkU z+fM$mI*Kz>tB+m`ZU{Ix7G9y(gMR!2z)YY$;G*QvNK!n-QN74B;sGwirF5LmB(myW z@Piza6q-*|O&Kmebn|Lf1h$vuGs?STmsJ^DP7=bijl;O)cP>D3x8nR&cfM7p0_XN& zBfDkI(FiQs`<61ePH)~eRd`^r`|$Tw96g{kgUk?UvrrWMSGd_;wxA^r!^uQTvQ2Y= zO=Wn}-O|ijoYJC!5mD6FGF<&d-`6h=!~6)&?g_)M{;!ryJiNL@Ce1_k+Rkx!JQLCj zOqR+l=(WKbULwuyR^vnBmElqTy|HP51A{|(wzD5i2YhEM|Iz$61J@@v(wkgLcif=3 zfo<018CQnlIw((Wra4oAkefK~jlgv~^sXZ|s-)f8k0a~N$lcEgFE$l-x;)5|Zx`l{ zdbD=Vek1U%JSf|Ub|#jCnEnPA?qOHyn5DZi$UQkqX9A{g1m5qDuIcYQScG{$^T_d~ zbQ`}EHtM3i5=FoamB9qz4Z!`&cSuXqho{3c-o)%bNisuC;R&{?`V8B!4 zeT=)_v57ic-wuavm^QeC&y=g~vE;^(`H_C65x&5R&RI&fQ2yJT)*7w=0000