mirror of
https://github.com/typst/typst
synced 2025-08-12 14:17:55 +08:00
Support smartquotes in HTML export (#6710)
Co-authored-by: Malo <57839069+MDLC01@users.noreply.github.com>
This commit is contained in:
parent
cfce90ec29
commit
298c293181
@ -5,20 +5,24 @@ use typst_library::foundations::{Content, StyleChain, Target, TargetElem};
|
||||
use typst_library::introspection::{SplitLocator, TagElem};
|
||||
use typst_library::layout::{Abs, Axes, Region, Size};
|
||||
use typst_library::routines::Pair;
|
||||
use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem};
|
||||
use typst_library::text::{
|
||||
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
|
||||
is_default_ignorable,
|
||||
};
|
||||
|
||||
use crate::fragment::html_fragment;
|
||||
use crate::fragment::{html_block_fragment, html_inline_fragment};
|
||||
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag};
|
||||
|
||||
/// Converts realized content into HTML nodes.
|
||||
pub fn convert_to_nodes<'a>(
|
||||
engine: &mut Engine,
|
||||
locator: &mut SplitLocator,
|
||||
quoter: &mut SmartQuoter,
|
||||
children: impl IntoIterator<Item = Pair<'a>>,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
let mut output = EcoVec::new();
|
||||
for (child, styles) in children {
|
||||
handle(engine, child, locator, styles, &mut output)?;
|
||||
handle(engine, child, locator, styles, quoter, &mut output)?;
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
@ -29,6 +33,7 @@ fn handle(
|
||||
child: &Content,
|
||||
locator: &mut SplitLocator,
|
||||
styles: StyleChain,
|
||||
quoter: &mut SmartQuoter,
|
||||
output: &mut EcoVec<HtmlNode>,
|
||||
) -> SourceResult<()> {
|
||||
if let Some(elem) = child.to_packed::<TagElem>() {
|
||||
@ -36,7 +41,22 @@ fn handle(
|
||||
} else if let Some(elem) = child.to_packed::<HtmlElem>() {
|
||||
let mut children = EcoVec::new();
|
||||
if let Some(body) = elem.body.get_ref(styles) {
|
||||
children = html_fragment(engine, body, locator.next(&elem.span()), styles)?;
|
||||
if tag::is_block_by_default(elem.tag) {
|
||||
children = html_block_fragment(
|
||||
engine,
|
||||
body,
|
||||
locator.next(&elem.span()),
|
||||
styles,
|
||||
)?;
|
||||
|
||||
// Block-level elements reset the smart quoting state. This part
|
||||
// is unfortunately untested as it's currently not possible to
|
||||
// create inline-level content next to block-level content
|
||||
// without a paragraph automatically appearing.
|
||||
*quoter = SmartQuoter::new();
|
||||
} else {
|
||||
children = html_inline_fragment(engine, body, locator, quoter, styles)?;
|
||||
}
|
||||
}
|
||||
let element = HtmlElement {
|
||||
tag: elem.tag,
|
||||
@ -57,10 +77,20 @@ fn handle(
|
||||
} else if let Some(elem) = child.to_packed::<LinebreakElem>() {
|
||||
output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
|
||||
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
|
||||
output.push(HtmlNode::text(
|
||||
if elem.double.get(styles) { '"' } else { '\'' },
|
||||
child.span(),
|
||||
));
|
||||
let double = elem.double.get(styles);
|
||||
if elem.enabled.get(styles) {
|
||||
let before = last_char(output);
|
||||
let quotes = SmartQuotes::get(
|
||||
elem.quotes.get_ref(styles),
|
||||
styles.get(TextElem::lang),
|
||||
styles.get(TextElem::region),
|
||||
elem.alternative.get(styles),
|
||||
);
|
||||
let quote = quoter.quote(before, "es, double);
|
||||
output.push(HtmlNode::text(quote, child.span()));
|
||||
} else {
|
||||
output.push(HtmlNode::text(if double { '"' } else { '\'' }, child.span()));
|
||||
}
|
||||
} else if let Some(elem) = child.to_packed::<FrameElem>() {
|
||||
let locator = locator.next(&elem.span());
|
||||
let style = TargetElem::target.set(Target::Paged).wrap();
|
||||
@ -82,6 +112,20 @@ fn handle(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the last non-default ignorable character from the passed nodes.
|
||||
fn last_char(nodes: &[HtmlNode]) -> Option<char> {
|
||||
for node in nodes.iter().rev() {
|
||||
if let Some(c) = match node {
|
||||
HtmlNode::Text(s, _) => s.chars().rev().find(|&c| !is_default_ignorable(c)),
|
||||
HtmlNode::Element(e) => last_char(&e.children),
|
||||
_ => None,
|
||||
} {
|
||||
return Some(c);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Checks whether the given element is an inline-level HTML element.
|
||||
pub fn is_inline(elem: &Content) -> bool {
|
||||
elem.to_packed::<HtmlElem>()
|
||||
|
@ -13,6 +13,7 @@ use typst_library::introspection::{
|
||||
use typst_library::layout::{Point, Position, Transform};
|
||||
use typst_library::model::DocumentInfo;
|
||||
use typst_library::routines::{Arenas, RealizationKind, Routines};
|
||||
use typst_library::text::SmartQuoter;
|
||||
use typst_syntax::Span;
|
||||
use typst_utils::NonZeroExt;
|
||||
|
||||
@ -85,6 +86,7 @@ fn html_document_impl(
|
||||
let output = crate::convert::convert_to_nodes(
|
||||
&mut engine,
|
||||
&mut locator,
|
||||
&mut SmartQuoter::new(),
|
||||
children.iter().copied(),
|
||||
)?;
|
||||
|
||||
|
@ -3,22 +3,24 @@ use ecow::EcoVec;
|
||||
use typst_library::diag::{At, SourceResult};
|
||||
use typst_library::engine::{Engine, Route, Sink, Traced};
|
||||
use typst_library::foundations::{Content, StyleChain};
|
||||
use typst_library::introspection::{Introspector, Locator, LocatorLink};
|
||||
use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator};
|
||||
|
||||
use typst_library::World;
|
||||
use typst_library::routines::{Arenas, FragmentKind, RealizationKind, Routines};
|
||||
use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines};
|
||||
use typst_library::text::SmartQuoter;
|
||||
|
||||
use crate::HtmlNode;
|
||||
|
||||
/// Produce HTML nodes from content.
|
||||
#[typst_macros::time(name = "html fragment")]
|
||||
pub fn html_fragment(
|
||||
/// Produces HTML nodes from content contained in an HTML element that is
|
||||
/// block-level by default.
|
||||
#[typst_macros::time(name = "html block fragment")]
|
||||
pub fn html_block_fragment(
|
||||
engine: &mut Engine,
|
||||
content: &Content,
|
||||
locator: Locator,
|
||||
styles: StyleChain,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
html_fragment_impl(
|
||||
html_block_fragment_impl(
|
||||
engine.routines,
|
||||
engine.world,
|
||||
engine.introspector,
|
||||
@ -34,7 +36,7 @@ pub fn html_fragment(
|
||||
/// The cached, internal implementation of [`html_fragment`].
|
||||
#[comemo::memoize]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn html_fragment_impl(
|
||||
fn html_block_fragment_impl(
|
||||
routines: &Routines,
|
||||
world: Tracked<dyn World + '_>,
|
||||
introspector: Tracked<Introspector>,
|
||||
@ -59,19 +61,65 @@ fn html_fragment_impl(
|
||||
engine.route.check_html_depth().at(content.span())?;
|
||||
|
||||
let arenas = Arenas::default();
|
||||
let children = (engine.routines.realize)(
|
||||
// No need to know about the `FragmentKind` because we handle both
|
||||
// uniformly.
|
||||
let children = realize_fragment(&mut engine, &mut locator, &arenas, content, styles)?;
|
||||
crate::convert::convert_to_nodes(
|
||||
&mut engine,
|
||||
&mut locator,
|
||||
&mut SmartQuoter::new(),
|
||||
children.iter().copied(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Produces HTML nodes from content contained in an HTML element that is
|
||||
/// inline-level by default.
|
||||
///
|
||||
/// The difference to block-level content is that inline-level content has
|
||||
/// shared smartquoting state with surrounding inline-level content. This
|
||||
/// requires mutable state, which is at odds with memoization. However, the
|
||||
/// caching granularity would be unnecessarily high anyway if every single
|
||||
/// fragment was cached, so this works out pretty well together.
|
||||
#[typst_macros::time(name = "html inline fragment")]
|
||||
pub fn html_inline_fragment(
|
||||
engine: &mut Engine,
|
||||
content: &Content,
|
||||
locator: &mut SplitLocator,
|
||||
quoter: &mut SmartQuoter,
|
||||
styles: StyleChain,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
engine.route.increase();
|
||||
engine.route.check_html_depth().at(content.span())?;
|
||||
|
||||
let arenas = Arenas::default();
|
||||
let children = realize_fragment(engine, locator, &arenas, content, styles)?;
|
||||
let result = crate::convert::convert_to_nodes(
|
||||
engine,
|
||||
locator,
|
||||
quoter,
|
||||
children.iter().copied(),
|
||||
);
|
||||
|
||||
engine.route.decrease();
|
||||
result
|
||||
}
|
||||
|
||||
/// Realizes the body of an HTML fragment.
|
||||
fn realize_fragment<'a>(
|
||||
engine: &mut Engine,
|
||||
locator: &mut SplitLocator,
|
||||
arenas: &'a Arenas,
|
||||
content: &'a Content,
|
||||
styles: StyleChain<'a>,
|
||||
) -> SourceResult<Vec<Pair<'a>>> {
|
||||
(engine.routines.realize)(
|
||||
RealizationKind::HtmlFragment {
|
||||
// We ignore the `FragmentKind` because we handle both uniformly.
|
||||
kind: &mut FragmentKind::Block,
|
||||
is_inline: crate::convert::is_inline,
|
||||
},
|
||||
&mut engine,
|
||||
&mut locator,
|
||||
&arenas,
|
||||
engine,
|
||||
locator,
|
||||
arenas,
|
||||
content,
|
||||
styles,
|
||||
)?;
|
||||
|
||||
crate::convert::convert_to_nodes(&mut engine, &mut locator, children.iter().copied())
|
||||
)
|
||||
}
|
||||
|
@ -6,8 +6,8 @@
|
||||
</head>
|
||||
<body>
|
||||
<h2>Heading is no paragraph</h2>
|
||||
<p>I'm a paragraph.</p>
|
||||
<div>I'm not.</div>
|
||||
<p>I’m a paragraph.</p>
|
||||
<div>I’m not.</div>
|
||||
<div>
|
||||
<p>We are two.</p>
|
||||
<p>So we are paragraphs.</p>
|
||||
|
@ -5,6 +5,6 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<p>When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused.</p>
|
||||
<p>When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
13
tests/ref/html/smartquote-inline-block.html
Normal file
13
tests/ref/html/smartquote-inline-block.html
Normal file
@ -0,0 +1,13 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<p>Applies across <span>“inline-level</span> elements”.</p>
|
||||
<p>Does not apply across</p>
|
||||
<div>“block-level</div>
|
||||
<p>elements“.</p>
|
||||
</body>
|
||||
</html>
|
11
tests/ref/html/smartquote-nesting-twice.html
Normal file
11
tests/ref/html/smartquote-nesting-twice.html
Normal file
@ -0,0 +1,11 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<p>When you said <em>that “he</em> surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.</p>
|
||||
<p>‘<span style="display: inline-block">box</span>’</p>
|
||||
</body>
|
||||
</html>
|
11
tests/ref/html/smartquotes-html.html
Normal file
11
tests/ref/html/smartquotes-html.html
Normal file
@ -0,0 +1,11 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<p>When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.</p>
|
||||
<p>‘<span style="display: inline-block;">box</span>’</p>
|
||||
</body>
|
||||
</html>
|
BIN
tests/ref/smartquote-nesting-twice.png
Normal file
BIN
tests/ref/smartquote-nesting-twice.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.2 KiB |
@ -113,6 +113,16 @@ Some people's thought on this would be #[#set smartquote(enabled: false); "stran
|
||||
"'test' statement" \
|
||||
"statement 'test'"
|
||||
|
||||
--- smartquote-nesting-twice render html ---
|
||||
When you said _that "he_ surely meant that 'she intended to say "I'm sorry"'", I was quite confused.
|
||||
|
||||
'#box[box]'
|
||||
|
||||
--- smartquote-inline-block html ---
|
||||
Applies across #html.span["inline-level] elements".
|
||||
|
||||
Does not apply across #html.div["block-level] elements".
|
||||
|
||||
--- smartquote-with-embedding-chars ---
|
||||
#set text(lang: "fr")
|
||||
"#"\u{202A}"bonjour#"\u{202C}"" \
|
||||
|
Loading…
x
Reference in New Issue
Block a user