mirror of
https://github.com/typst/typst
synced 2025-08-12 14:17:55 +08:00
Support smartquotes in HTML export (#6710)
Co-authored-by: Malo <57839069+MDLC01@users.noreply.github.com>
This commit is contained in:
parent
cfce90ec29
commit
298c293181
@ -5,20 +5,24 @@ use typst_library::foundations::{Content, StyleChain, Target, TargetElem};
|
|||||||
use typst_library::introspection::{SplitLocator, TagElem};
|
use typst_library::introspection::{SplitLocator, TagElem};
|
||||||
use typst_library::layout::{Abs, Axes, Region, Size};
|
use typst_library::layout::{Abs, Axes, Region, Size};
|
||||||
use typst_library::routines::Pair;
|
use typst_library::routines::Pair;
|
||||||
use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem};
|
use typst_library::text::{
|
||||||
|
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
|
||||||
|
is_default_ignorable,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::fragment::html_fragment;
|
use crate::fragment::{html_block_fragment, html_inline_fragment};
|
||||||
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag};
|
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag};
|
||||||
|
|
||||||
/// Converts realized content into HTML nodes.
|
/// Converts realized content into HTML nodes.
|
||||||
pub fn convert_to_nodes<'a>(
|
pub fn convert_to_nodes<'a>(
|
||||||
engine: &mut Engine,
|
engine: &mut Engine,
|
||||||
locator: &mut SplitLocator,
|
locator: &mut SplitLocator,
|
||||||
|
quoter: &mut SmartQuoter,
|
||||||
children: impl IntoIterator<Item = Pair<'a>>,
|
children: impl IntoIterator<Item = Pair<'a>>,
|
||||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||||
let mut output = EcoVec::new();
|
let mut output = EcoVec::new();
|
||||||
for (child, styles) in children {
|
for (child, styles) in children {
|
||||||
handle(engine, child, locator, styles, &mut output)?;
|
handle(engine, child, locator, styles, quoter, &mut output)?;
|
||||||
}
|
}
|
||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
@ -29,6 +33,7 @@ fn handle(
|
|||||||
child: &Content,
|
child: &Content,
|
||||||
locator: &mut SplitLocator,
|
locator: &mut SplitLocator,
|
||||||
styles: StyleChain,
|
styles: StyleChain,
|
||||||
|
quoter: &mut SmartQuoter,
|
||||||
output: &mut EcoVec<HtmlNode>,
|
output: &mut EcoVec<HtmlNode>,
|
||||||
) -> SourceResult<()> {
|
) -> SourceResult<()> {
|
||||||
if let Some(elem) = child.to_packed::<TagElem>() {
|
if let Some(elem) = child.to_packed::<TagElem>() {
|
||||||
@ -36,7 +41,22 @@ fn handle(
|
|||||||
} else if let Some(elem) = child.to_packed::<HtmlElem>() {
|
} else if let Some(elem) = child.to_packed::<HtmlElem>() {
|
||||||
let mut children = EcoVec::new();
|
let mut children = EcoVec::new();
|
||||||
if let Some(body) = elem.body.get_ref(styles) {
|
if let Some(body) = elem.body.get_ref(styles) {
|
||||||
children = html_fragment(engine, body, locator.next(&elem.span()), styles)?;
|
if tag::is_block_by_default(elem.tag) {
|
||||||
|
children = html_block_fragment(
|
||||||
|
engine,
|
||||||
|
body,
|
||||||
|
locator.next(&elem.span()),
|
||||||
|
styles,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Block-level elements reset the smart quoting state. This part
|
||||||
|
// is unfortunately untested as it's currently not possible to
|
||||||
|
// create inline-level content next to block-level content
|
||||||
|
// without a paragraph automatically appearing.
|
||||||
|
*quoter = SmartQuoter::new();
|
||||||
|
} else {
|
||||||
|
children = html_inline_fragment(engine, body, locator, quoter, styles)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let element = HtmlElement {
|
let element = HtmlElement {
|
||||||
tag: elem.tag,
|
tag: elem.tag,
|
||||||
@ -57,10 +77,20 @@ fn handle(
|
|||||||
} else if let Some(elem) = child.to_packed::<LinebreakElem>() {
|
} else if let Some(elem) = child.to_packed::<LinebreakElem>() {
|
||||||
output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
|
output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
|
||||||
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
|
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
|
||||||
output.push(HtmlNode::text(
|
let double = elem.double.get(styles);
|
||||||
if elem.double.get(styles) { '"' } else { '\'' },
|
if elem.enabled.get(styles) {
|
||||||
child.span(),
|
let before = last_char(output);
|
||||||
));
|
let quotes = SmartQuotes::get(
|
||||||
|
elem.quotes.get_ref(styles),
|
||||||
|
styles.get(TextElem::lang),
|
||||||
|
styles.get(TextElem::region),
|
||||||
|
elem.alternative.get(styles),
|
||||||
|
);
|
||||||
|
let quote = quoter.quote(before, "es, double);
|
||||||
|
output.push(HtmlNode::text(quote, child.span()));
|
||||||
|
} else {
|
||||||
|
output.push(HtmlNode::text(if double { '"' } else { '\'' }, child.span()));
|
||||||
|
}
|
||||||
} else if let Some(elem) = child.to_packed::<FrameElem>() {
|
} else if let Some(elem) = child.to_packed::<FrameElem>() {
|
||||||
let locator = locator.next(&elem.span());
|
let locator = locator.next(&elem.span());
|
||||||
let style = TargetElem::target.set(Target::Paged).wrap();
|
let style = TargetElem::target.set(Target::Paged).wrap();
|
||||||
@ -82,6 +112,20 @@ fn handle(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the last non-default ignorable character from the passed nodes.
|
||||||
|
fn last_char(nodes: &[HtmlNode]) -> Option<char> {
|
||||||
|
for node in nodes.iter().rev() {
|
||||||
|
if let Some(c) = match node {
|
||||||
|
HtmlNode::Text(s, _) => s.chars().rev().find(|&c| !is_default_ignorable(c)),
|
||||||
|
HtmlNode::Element(e) => last_char(&e.children),
|
||||||
|
_ => None,
|
||||||
|
} {
|
||||||
|
return Some(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
/// Checks whether the given element is an inline-level HTML element.
|
/// Checks whether the given element is an inline-level HTML element.
|
||||||
pub fn is_inline(elem: &Content) -> bool {
|
pub fn is_inline(elem: &Content) -> bool {
|
||||||
elem.to_packed::<HtmlElem>()
|
elem.to_packed::<HtmlElem>()
|
||||||
|
@ -13,6 +13,7 @@ use typst_library::introspection::{
|
|||||||
use typst_library::layout::{Point, Position, Transform};
|
use typst_library::layout::{Point, Position, Transform};
|
||||||
use typst_library::model::DocumentInfo;
|
use typst_library::model::DocumentInfo;
|
||||||
use typst_library::routines::{Arenas, RealizationKind, Routines};
|
use typst_library::routines::{Arenas, RealizationKind, Routines};
|
||||||
|
use typst_library::text::SmartQuoter;
|
||||||
use typst_syntax::Span;
|
use typst_syntax::Span;
|
||||||
use typst_utils::NonZeroExt;
|
use typst_utils::NonZeroExt;
|
||||||
|
|
||||||
@ -85,6 +86,7 @@ fn html_document_impl(
|
|||||||
let output = crate::convert::convert_to_nodes(
|
let output = crate::convert::convert_to_nodes(
|
||||||
&mut engine,
|
&mut engine,
|
||||||
&mut locator,
|
&mut locator,
|
||||||
|
&mut SmartQuoter::new(),
|
||||||
children.iter().copied(),
|
children.iter().copied(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -3,22 +3,24 @@ use ecow::EcoVec;
|
|||||||
use typst_library::diag::{At, SourceResult};
|
use typst_library::diag::{At, SourceResult};
|
||||||
use typst_library::engine::{Engine, Route, Sink, Traced};
|
use typst_library::engine::{Engine, Route, Sink, Traced};
|
||||||
use typst_library::foundations::{Content, StyleChain};
|
use typst_library::foundations::{Content, StyleChain};
|
||||||
use typst_library::introspection::{Introspector, Locator, LocatorLink};
|
use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator};
|
||||||
|
|
||||||
use typst_library::World;
|
use typst_library::World;
|
||||||
use typst_library::routines::{Arenas, FragmentKind, RealizationKind, Routines};
|
use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines};
|
||||||
|
use typst_library::text::SmartQuoter;
|
||||||
|
|
||||||
use crate::HtmlNode;
|
use crate::HtmlNode;
|
||||||
|
|
||||||
/// Produce HTML nodes from content.
|
/// Produces HTML nodes from content contained in an HTML element that is
|
||||||
#[typst_macros::time(name = "html fragment")]
|
/// block-level by default.
|
||||||
pub fn html_fragment(
|
#[typst_macros::time(name = "html block fragment")]
|
||||||
|
pub fn html_block_fragment(
|
||||||
engine: &mut Engine,
|
engine: &mut Engine,
|
||||||
content: &Content,
|
content: &Content,
|
||||||
locator: Locator,
|
locator: Locator,
|
||||||
styles: StyleChain,
|
styles: StyleChain,
|
||||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||||
html_fragment_impl(
|
html_block_fragment_impl(
|
||||||
engine.routines,
|
engine.routines,
|
||||||
engine.world,
|
engine.world,
|
||||||
engine.introspector,
|
engine.introspector,
|
||||||
@ -34,7 +36,7 @@ pub fn html_fragment(
|
|||||||
/// The cached, internal implementation of [`html_fragment`].
|
/// The cached, internal implementation of [`html_fragment`].
|
||||||
#[comemo::memoize]
|
#[comemo::memoize]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn html_fragment_impl(
|
fn html_block_fragment_impl(
|
||||||
routines: &Routines,
|
routines: &Routines,
|
||||||
world: Tracked<dyn World + '_>,
|
world: Tracked<dyn World + '_>,
|
||||||
introspector: Tracked<Introspector>,
|
introspector: Tracked<Introspector>,
|
||||||
@ -59,19 +61,65 @@ fn html_fragment_impl(
|
|||||||
engine.route.check_html_depth().at(content.span())?;
|
engine.route.check_html_depth().at(content.span())?;
|
||||||
|
|
||||||
let arenas = Arenas::default();
|
let arenas = Arenas::default();
|
||||||
let children = (engine.routines.realize)(
|
let children = realize_fragment(&mut engine, &mut locator, &arenas, content, styles)?;
|
||||||
// No need to know about the `FragmentKind` because we handle both
|
crate::convert::convert_to_nodes(
|
||||||
// uniformly.
|
&mut engine,
|
||||||
|
&mut locator,
|
||||||
|
&mut SmartQuoter::new(),
|
||||||
|
children.iter().copied(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Produces HTML nodes from content contained in an HTML element that is
|
||||||
|
/// inline-level by default.
|
||||||
|
///
|
||||||
|
/// The difference to block-level content is that inline-level content has
|
||||||
|
/// shared smartquoting state with surrounding inline-level content. This
|
||||||
|
/// requires mutable state, which is at odds with memoization. However, the
|
||||||
|
/// caching granularity would be unnecessarily high anyway if every single
|
||||||
|
/// fragment was cached, so this works out pretty well together.
|
||||||
|
#[typst_macros::time(name = "html inline fragment")]
|
||||||
|
pub fn html_inline_fragment(
|
||||||
|
engine: &mut Engine,
|
||||||
|
content: &Content,
|
||||||
|
locator: &mut SplitLocator,
|
||||||
|
quoter: &mut SmartQuoter,
|
||||||
|
styles: StyleChain,
|
||||||
|
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||||
|
engine.route.increase();
|
||||||
|
engine.route.check_html_depth().at(content.span())?;
|
||||||
|
|
||||||
|
let arenas = Arenas::default();
|
||||||
|
let children = realize_fragment(engine, locator, &arenas, content, styles)?;
|
||||||
|
let result = crate::convert::convert_to_nodes(
|
||||||
|
engine,
|
||||||
|
locator,
|
||||||
|
quoter,
|
||||||
|
children.iter().copied(),
|
||||||
|
);
|
||||||
|
|
||||||
|
engine.route.decrease();
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Realizes the body of an HTML fragment.
|
||||||
|
fn realize_fragment<'a>(
|
||||||
|
engine: &mut Engine,
|
||||||
|
locator: &mut SplitLocator,
|
||||||
|
arenas: &'a Arenas,
|
||||||
|
content: &'a Content,
|
||||||
|
styles: StyleChain<'a>,
|
||||||
|
) -> SourceResult<Vec<Pair<'a>>> {
|
||||||
|
(engine.routines.realize)(
|
||||||
RealizationKind::HtmlFragment {
|
RealizationKind::HtmlFragment {
|
||||||
|
// We ignore the `FragmentKind` because we handle both uniformly.
|
||||||
kind: &mut FragmentKind::Block,
|
kind: &mut FragmentKind::Block,
|
||||||
is_inline: crate::convert::is_inline,
|
is_inline: crate::convert::is_inline,
|
||||||
},
|
},
|
||||||
&mut engine,
|
engine,
|
||||||
&mut locator,
|
locator,
|
||||||
&arenas,
|
arenas,
|
||||||
content,
|
content,
|
||||||
styles,
|
styles,
|
||||||
)?;
|
)
|
||||||
|
|
||||||
crate::convert::convert_to_nodes(&mut engine, &mut locator, children.iter().copied())
|
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h2>Heading is no paragraph</h2>
|
<h2>Heading is no paragraph</h2>
|
||||||
<p>I'm a paragraph.</p>
|
<p>I’m a paragraph.</p>
|
||||||
<div>I'm not.</div>
|
<div>I’m not.</div>
|
||||||
<div>
|
<div>
|
||||||
<p>We are two.</p>
|
<p>We are two.</p>
|
||||||
<p>So we are paragraphs.</p>
|
<p>So we are paragraphs.</p>
|
||||||
|
@ -5,6 +5,6 @@
|
|||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p>When you said that “he surely meant that ‘she intended to say “I'm sorry”’”, I was quite confused.</p>
|
<p>When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.</p>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
13
tests/ref/html/smartquote-inline-block.html
Normal file
13
tests/ref/html/smartquote-inline-block.html
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>Applies across <span>“inline-level</span> elements”.</p>
|
||||||
|
<p>Does not apply across</p>
|
||||||
|
<div>“block-level</div>
|
||||||
|
<p>elements“.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
11
tests/ref/html/smartquote-nesting-twice.html
Normal file
11
tests/ref/html/smartquote-nesting-twice.html
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>When you said <em>that “he</em> surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.</p>
|
||||||
|
<p>‘<span style="display: inline-block">box</span>’</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
11
tests/ref/html/smartquotes-html.html
Normal file
11
tests/ref/html/smartquotes-html.html
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>When you said that “he surely meant that ‘she intended to say “I’m sorry”’”, I was quite confused.</p>
|
||||||
|
<p>‘<span style="display: inline-block;">box</span>’</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
BIN
tests/ref/smartquote-nesting-twice.png
Normal file
BIN
tests/ref/smartquote-nesting-twice.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.2 KiB |
@ -113,6 +113,16 @@ Some people's thought on this would be #[#set smartquote(enabled: false); "stran
|
|||||||
"'test' statement" \
|
"'test' statement" \
|
||||||
"statement 'test'"
|
"statement 'test'"
|
||||||
|
|
||||||
|
--- smartquote-nesting-twice render html ---
|
||||||
|
When you said _that "he_ surely meant that 'she intended to say "I'm sorry"'", I was quite confused.
|
||||||
|
|
||||||
|
'#box[box]'
|
||||||
|
|
||||||
|
--- smartquote-inline-block html ---
|
||||||
|
Applies across #html.span["inline-level] elements".
|
||||||
|
|
||||||
|
Does not apply across #html.div["block-level] elements".
|
||||||
|
|
||||||
--- smartquote-with-embedding-chars ---
|
--- smartquote-with-embedding-chars ---
|
||||||
#set text(lang: "fr")
|
#set text(lang: "fr")
|
||||||
"#"\u{202A}"bonjour#"\u{202C}"" \
|
"#"\u{202A}"bonjour#"\u{202C}"" \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user