HTML whitespace protection (#6750)

This commit is contained in:
Laurenz 2025-08-13 14:12:24 +02:00 committed by GitHub
parent 343a57b50d
commit 805fb24ca4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 656 additions and 82 deletions

View File

@ -1,7 +1,7 @@
use ecow::EcoVec;
use ecow::{EcoString, EcoVec, eco_vec};
use typst_library::diag::{SourceResult, warning};
use typst_library::engine::Engine;
use typst_library::foundations::{Content, StyleChain, Target, TargetElem};
use typst_library::foundations::{Content, Packed, StyleChain, Target, TargetElem};
use typst_library::introspection::{SplitLocator, TagElem};
use typst_library::layout::{Abs, Axes, Region, Size};
use typst_library::routines::Pair;
@ -9,101 +9,130 @@ use typst_library::text::{
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
is_default_ignorable,
};
use typst_syntax::Span;
use crate::fragment::{html_block_fragment, html_inline_fragment};
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag};
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, css, tag};
/// What and how to convert.
pub enum ConversionLevel<'a> {
/// Converts the top-level nodes or children of a block-level element. The
/// conversion has its own local smart quoting state and space protection.
Block,
/// Converts the children of an inline-level HTML element as part of a
/// larger context with shared smart quoting state and shared space
/// protection.
Inline(&'a mut SmartQuoter),
}
/// How to emit whitespace.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Whitespace {
/// Ensures that whitespace that would otherwise be collapsed by HTML
/// rendering engines[^1] is protected by spans with `white-space:
/// pre-wrap`. The affected by whitespace are ASCII spaces and ASCII tabs.
///
/// Tries to emit spans only when necessary.
/// - ASCII tabs and consecutive sequences of spaces and/or tabs are always
/// wrapped in spans in this mode. This happens directly during
/// conversion.
/// - Single ASCII spaces are only wrapped if they aren't supported by
/// normal elements on both sides. This happens in a separate pass that
/// runs for the whole block-level context as doing this properly needs
/// lookahead and lookbehind across different levels of the element
/// hierarchy.
///
/// [^1]: https://www.w3.org/TR/css-text-3/#white-space-rules
Normal,
/// The whitespace is emitted as-is. This happens in
/// - `<pre>` elements as they already have `white-space: pre`,
/// - raw and escapable raw text elements as normal white space rules do not
/// apply to them.
Pre,
}
/// Converts realized content into HTML nodes.
pub fn convert_to_nodes<'a>(
engine: &mut Engine,
locator: &mut SplitLocator,
quoter: &mut SmartQuoter,
children: impl IntoIterator<Item = Pair<'a>>,
level: ConversionLevel,
whitespace: Whitespace,
) -> SourceResult<EcoVec<HtmlNode>> {
let mut output = EcoVec::new();
let block = matches!(level, ConversionLevel::Block);
let mut converter = Converter {
engine,
locator,
quoter: match level {
ConversionLevel::Inline(quoter) => quoter,
ConversionLevel::Block => &mut SmartQuoter::new(),
},
whitespace,
output: EcoVec::new(),
trailing: None,
};
for (child, styles) in children {
handle(engine, child, locator, styles, quoter, &mut output)?;
handle(&mut converter, child, styles)?;
}
Ok(output)
let mut nodes = converter.finish();
if block && whitespace == Whitespace::Normal {
protect_spaces(&mut nodes);
}
Ok(nodes)
}
/// Convert one element into HTML node(s).
/// Converts one element into HTML node(s).
fn handle(
engine: &mut Engine,
converter: &mut Converter,
child: &Content,
locator: &mut SplitLocator,
styles: StyleChain,
quoter: &mut SmartQuoter,
output: &mut EcoVec<HtmlNode>,
) -> SourceResult<()> {
if let Some(elem) = child.to_packed::<TagElem>() {
output.push(HtmlNode::Tag(elem.tag.clone()));
converter.push(elem.tag.clone());
} else if let Some(elem) = child.to_packed::<HtmlElem>() {
let mut children = EcoVec::new();
if let Some(body) = elem.body.get_ref(styles) {
if tag::is_block_by_default(elem.tag) {
children = html_block_fragment(
engine,
body,
locator.next(&elem.span()),
styles,
)?;
// Block-level elements reset the smart quoting state. This part
// is unfortunately untested as it's currently not possible to
// create inline-level content next to block-level content
// without a paragraph automatically appearing.
*quoter = SmartQuoter::new();
} else {
children = html_inline_fragment(engine, body, locator, quoter, styles)?;
}
}
let element = HtmlElement {
tag: elem.tag,
attrs: elem.attrs.get_cloned(styles),
children,
span: elem.span(),
};
output.push(element.into());
handle_html_elem(converter, elem, styles)?;
} else if child.is::<SpaceElem>() {
output.push(HtmlNode::text(' ', child.span()));
converter.push(HtmlNode::text(' ', child.span()));
} else if let Some(elem) = child.to_packed::<TextElem>() {
let text = if let Some(case) = styles.get(TextElem::case) {
case.apply(&elem.text).into()
} else {
elem.text.clone()
};
output.push(HtmlNode::text(text, elem.span()));
handle_text(converter, text, elem.span());
} else if let Some(elem) = child.to_packed::<LinebreakElem>() {
output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
converter.push(HtmlElement::new(tag::br).spanned(elem.span()));
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
let double = elem.double.get(styles);
if elem.enabled.get(styles) {
let before = last_char(output);
let quote = if elem.enabled.get(styles) {
let before = last_char(&converter.output);
let quotes = SmartQuotes::get(
elem.quotes.get_ref(styles),
styles.get(TextElem::lang),
styles.get(TextElem::region),
elem.alternative.get(styles),
);
let quote = quoter.quote(before, &quotes, double);
output.push(HtmlNode::text(quote, child.span()));
converter.quoter.quote(before, &quotes, double)
} else {
output.push(HtmlNode::text(SmartQuotes::fallback(double), child.span()));
}
SmartQuotes::fallback(double)
};
handle_text(converter, quote.into(), child.span());
} else if let Some(elem) = child.to_packed::<FrameElem>() {
let locator = locator.next(&elem.span());
let locator = converter.locator.next(&elem.span());
let style = TargetElem::target.set(Target::Paged).wrap();
let frame = (engine.routines.layout_frame)(
engine,
let frame = (converter.engine.routines.layout_frame)(
converter.engine,
&elem.body,
locator,
styles.chain(&style),
Region::new(Size::splat(Abs::inf()), Axes::splat(false)),
)?;
output.push(HtmlNode::Frame(HtmlFrame::new(frame, styles, elem.span())));
converter.push(HtmlFrame::new(frame, styles, elem.span()));
} else {
engine.sink.warn(warning!(
converter.engine.sink.warn(warning!(
child.span(),
"{} was ignored during HTML export",
child.elem().name()
@ -112,6 +141,311 @@ fn handle(
Ok(())
}
/// Handles an HTML element.
fn handle_html_elem(
converter: &mut Converter,
elem: &Packed<HtmlElem>,
styles: StyleChain,
) -> SourceResult<()> {
let mut children = EcoVec::new();
if let Some(body) = elem.body.get_ref(styles) {
let whitespace = if converter.whitespace == Whitespace::Pre
|| elem.tag == tag::pre
|| tag::is_raw(elem.tag)
|| tag::is_escapable_raw(elem.tag)
{
Whitespace::Pre
} else {
Whitespace::Normal
};
if tag::is_block_by_default(elem.tag) {
children = html_block_fragment(
converter.engine,
body,
converter.locator.next(&elem.span()),
styles,
whitespace,
)?;
// Block-level elements reset the inline state. This part is
// unfortunately untested as it's currently not possible to
// create inline-level content next to block-level content
// without a paragraph automatically appearing.
*converter.quoter = SmartQuoter::new();
} else {
children = html_inline_fragment(
converter.engine,
body,
converter.locator,
converter.quoter,
styles,
whitespace,
)?;
}
}
converter.push(HtmlElement {
tag: elem.tag,
attrs: elem.attrs.get_cloned(styles),
children,
span: elem.span(),
pre_span: false,
});
Ok(())
}
/// Handles arbitrary text while taking care that no whitespace within will be
/// collapsed by browsers.
fn handle_text(converter: &mut Converter, text: EcoString, span: Span) {
/// Special kinds of characters.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum Kind {
/// ASCII space.
Space,
/// ASCII tab.
Tab,
/// CR, LF, or CR + LF.
Newline,
/// A Unicode default-ignorable. Does not protect spaces from
/// collapsing.
Ignorable,
}
impl Kind {
fn of(c: char) -> Option<Kind> {
match c {
' ' => Some(Kind::Space),
'\t' => Some(Kind::Tab),
'\r' | '\n' => Some(Kind::Newline),
c if is_default_ignorable(c) => Some(Kind::Ignorable),
_ => None,
}
}
}
if converter.whitespace == Whitespace::Pre {
converter.push(HtmlNode::Text(text, span));
return;
}
let mut emitted = 0;
let mut prev_kind = None;
for (i, c) in text.char_indices() {
let kind = Kind::of(c);
let prev_kind = prev_kind.replace(kind);
let Some(kind) = kind else { continue };
// A space that is surrounded by normal (i.e. not special) characters is
// already protected and doesn't need further treatment.
if kind == Kind::Space
&& let Some(None) = prev_kind
&& let Some(after) = text[i + 1..].chars().next()
&& Kind::of(after).is_none()
{
continue;
}
// Emit the unspecial text up to the special character.
if emitted < i {
converter.push_text(&text[emitted..i], span);
emitted = i;
}
// Process the special character.
match kind {
Kind::Space => converter.push_text(' ', span),
Kind::Tab => converter.push_text('\t', span),
Kind::Newline => {
if c == '\r' && text[i + 1..].starts_with('\n') {
// Skip the CR because the LF will already turn into
// a `<br>`.
emitted += 1;
continue;
}
converter.push(HtmlElement::new(tag::br).spanned(span));
}
Kind::Ignorable => converter.push_text(c, span),
}
emitted += c.len_utf8();
}
// Push the remaining unspecial text.
if emitted < text.len() {
converter.push_text(
// Try to reuse the `EcoString` if possible.
if emitted == 0 { text } else { text[emitted..].into() },
span,
);
}
}
/// State during conversion.
struct Converter<'a, 'y, 'z> {
engine: &'a mut Engine<'y>,
locator: &'a mut SplitLocator<'z>,
quoter: &'a mut SmartQuoter,
whitespace: Whitespace,
output: EcoVec<HtmlNode>,
trailing: Option<TrailingWhitespace>,
}
/// Keeps track of a trailing whitespace in the output.
struct TrailingWhitespace {
/// If `true`, the trailing whitespace consists of exactly one ASCII space.
single: bool,
/// The trailing whitespace starts at `output[from..]`.
from: usize,
}
impl Converter<'_, '_, '_> {
/// Returns the converted nodes.
fn finish(mut self) -> EcoVec<HtmlNode> {
self.flush_whitespace();
self.output
}
/// Pushes a node, taking care to protect consecutive whitespace.
fn push(&mut self, node: impl Into<HtmlNode>) {
let node = node.into();
if let HtmlNode::Text(text, _) = &node
&& (text == " " || text == "\t")
{
if let Some(ws) = &mut self.trailing {
ws.single = false;
} else {
self.trailing = Some(TrailingWhitespace {
single: text == " ",
from: self.output.len(),
});
}
} else if !matches!(node, HtmlNode::Tag(_)) {
self.flush_whitespace();
}
self.output.push(node);
}
/// Shorthand for pushing a text node.
fn push_text(&mut self, text: impl Into<EcoString>, span: Span) {
self.push(HtmlNode::text(text.into(), span));
}
/// If there is trailing whitespace in need of protection, protects it.
///
/// Does not protect single ASCII spaces. Those are handled in a separate
/// pass as they are more complex and require lookahead. See the
/// documentation of [`Whitespace`] for more information.
fn flush_whitespace(&mut self) {
if self.whitespace == Whitespace::Normal
&& let Some(TrailingWhitespace { single: false, from }) = self.trailing.take()
{
let nodes: EcoVec<_> = self.output[from..].iter().cloned().collect();
self.output.truncate(from);
self.output.push(HtmlNode::Element(pre_wrap(nodes)));
}
}
}
/// Protects all spaces in the given block-level `nodes` against collapsing.
///
/// Does not recurse into block-level elements as those are separate contexts
/// with their own space protection.
fn protect_spaces(nodes: &mut EcoVec<HtmlNode>) {
let mut p = Protector::new();
p.visit_nodes(nodes);
p.collapsing();
}
/// A state machine for whitespace protection.
enum Protector<'a> {
Collapsing,
Supportive,
Space(&'a mut HtmlNode),
}
impl<'a> Protector<'a> {
/// Creates a new protector.
fn new() -> Self {
Self::Collapsing
}
/// Visits the given nodes and protects single spaces that need to be saved
/// from collapsing.
fn visit_nodes(&mut self, nodes: &'a mut EcoVec<HtmlNode>) {
for node in nodes.make_mut().iter_mut() {
match node {
HtmlNode::Tag(_) => {}
HtmlNode::Text(text, _) => {
if text == " " {
match self {
Self::Collapsing => {
protect_space(node);
*self = Self::Supportive;
}
Self::Supportive => {
*self = Self::Space(node);
}
Self::Space(prev) => {
protect_space(prev);
*self = Self::Space(node);
}
}
} else if text.chars().any(|c| !is_default_ignorable(c)) {
self.supportive();
}
}
HtmlNode::Element(element) => {
if tag::is_block_by_default(element.tag) || element.tag == tag::br {
self.collapsing();
} else if !element.pre_span {
// Recursively visit the children of inline-level
// elements while making sure to not revisit pre-wrapped
// spans that we've generated ourselves.
self.visit_nodes(&mut element.children);
}
}
HtmlNode::Frame(_) => self.supportive(),
}
}
}
/// Called when visiting an element that would collapse adjacent single
/// spaces. A preceding, if any, and succeeding, if any, single space will
/// then be protected .
fn collapsing(&mut self) {
if let Self::Space(node) = std::mem::replace(self, Self::Collapsing) {
protect_space(node);
}
}
/// Called when visiting an element that supports adjacent single spaces.
fn supportive(&mut self) {
*self = Self::Supportive;
}
}
/// Protects a single spaces against collapsing.
fn protect_space(node: &mut HtmlNode) {
*node = pre_wrap(eco_vec![node.clone()]).into();
}
/// Wraps a collection of whitespace nodes in a
/// `<span style="white-space: pre-wrap">..</span>` to avoid them being
/// collapsed by HTML rendering engines.
fn pre_wrap(nodes: EcoVec<HtmlNode>) -> HtmlElement {
let span = Span::find(nodes.iter().map(|c| c.span()));
let mut elem = HtmlElement::new(tag::span)
.with_styles(css::Properties::new().with("white-space", "pre-wrap"))
.with_children(nodes)
.spanned(span);
elem.pre_span = true;
elem
}
/// Returns the last non-default ignorable character from the passed nodes.
fn last_char(nodes: &[HtmlNode]) -> Option<char> {
for node in nodes.iter().rev() {

View File

@ -13,10 +13,10 @@ use typst_library::introspection::{
use typst_library::layout::{Point, Position, Transform};
use typst_library::model::DocumentInfo;
use typst_library::routines::{Arenas, RealizationKind, Routines};
use typst_library::text::SmartQuoter;
use typst_syntax::Span;
use typst_utils::NonZeroExt;
use crate::convert::{ConversionLevel, Whitespace};
use crate::{HtmlDocument, HtmlElem, HtmlElement, HtmlNode, attr, tag};
/// Produce an HTML document from content.
@ -83,8 +83,9 @@ fn html_document_impl(
let output = crate::convert::convert_to_nodes(
&mut engine,
&mut locator,
&mut SmartQuoter::new(),
children.iter().copied(),
ConversionLevel::Block,
Whitespace::Normal,
)?;
let mut link_targets = FxHashSet::default();

View File

@ -10,7 +10,7 @@ use typst_library::text::TextElem;
use typst_syntax::Span;
use typst_utils::{PicoStr, ResolvedPicoStr};
use crate::charsets;
use crate::{attr, charsets, css};
/// An HTML document.
#[derive(Debug, Clone)]
@ -41,6 +41,22 @@ impl HtmlNode {
pub fn text(text: impl Into<EcoString>, span: Span) -> Self {
Self::Text(text.into(), span)
}
/// Returns the span, if any.
pub fn span(&self) -> Span {
match self {
Self::Tag(_) => Span::detached(),
Self::Text(_, span) => *span,
Self::Element(element) => element.span,
Self::Frame(frame) => frame.span,
}
}
}
impl From<Tag> for HtmlNode {
fn from(tag: Tag) -> Self {
Self::Tag(tag)
}
}
impl From<HtmlElement> for HtmlNode {
@ -49,6 +65,12 @@ impl From<HtmlElement> for HtmlNode {
}
}
impl From<HtmlFrame> for HtmlNode {
fn from(frame: HtmlFrame) -> Self {
Self::Frame(frame)
}
}
/// An HTML element.
#[derive(Debug, Clone, Hash)]
pub struct HtmlElement {
@ -60,6 +82,14 @@ pub struct HtmlElement {
pub children: EcoVec<HtmlNode>,
/// The span from which the element originated, if any.
pub span: Span,
/// Whether this is a span with `white-space: pre-wrap` generated by the
/// compiler to prevent whitespace from being collapsed.
///
/// For such spans, spaces and tabs in the element are emitted as escape
/// sequences. While this does not matter for browser engine rendering (as
/// the `white-space` CSS property is enough), it ensures that formatters
/// won't mess up the output.
pub pre_span: bool,
}
impl HtmlElement {
@ -70,6 +100,7 @@ impl HtmlElement {
attrs: HtmlAttrs::default(),
children: EcoVec::new(),
span: Span::detached(),
pre_span: false,
}
}
@ -87,6 +118,15 @@ impl HtmlElement {
self
}
/// Adds CSS styles to an element.
pub(crate) fn with_styles(self, properties: css::Properties) -> Self {
if let Some(value) = properties.into_inline_styles() {
self.with_attr(attr::style, value)
} else {
self
}
}
/// Attach a span to the element.
pub fn spanned(mut self, span: Span) -> Self {
self.span = span;

View File

@ -52,10 +52,10 @@ fn write_indent(w: &mut Writer) {
}
/// Encodes an HTML node into the writer.
fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> {
fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> {
match node {
HtmlNode::Tag(_) => {}
HtmlNode::Text(text, span) => write_text(w, text, *span)?,
HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?,
HtmlNode::Element(element) => write_element(w, element)?,
HtmlNode::Frame(frame) => write_frame(w, frame),
}
@ -63,12 +63,12 @@ fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> {
}
/// Encodes plain text into the writer.
fn write_text(w: &mut Writer, text: &str, span: Span) -> SourceResult<()> {
fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> {
for c in text.chars() {
if charsets::is_valid_in_normal_element_text(c) {
w.buf.push(c);
} else {
if escape || !charsets::is_valid_in_normal_element_text(c) {
write_escape(w, c).at(span)?;
} else {
w.buf.push(c);
}
}
Ok(())
@ -152,7 +152,7 @@ fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
if core::mem::take(&mut indent) || pretty_around {
write_indent(w);
}
write_node(w, c)?;
write_node(w, c, element.pre_span)?;
indent = pretty_around;
}
w.level -= 1;
@ -213,7 +213,7 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
/// Encodes the contents of an escapable raw text element.
fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
walk_raw_text(element, |piece, span| write_text(w, piece, span))
walk_raw_text(element, |piece, span| write_text(w, piece, span, false))
}
/// Collects the textual contents of a raw text element.

View File

@ -1,14 +1,14 @@
use comemo::{Track, Tracked, TrackedMut};
use ecow::EcoVec;
use typst_library::World;
use typst_library::diag::{At, SourceResult};
use typst_library::engine::{Engine, Route, Sink, Traced};
use typst_library::foundations::{Content, StyleChain};
use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator};
use typst_library::World;
use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines};
use typst_library::text::SmartQuoter;
use crate::convert::{ConversionLevel, Whitespace};
use crate::{HtmlElem, HtmlNode};
/// Produces HTML nodes from content contained in an HTML element that is
@ -19,6 +19,7 @@ pub fn html_block_fragment(
content: &Content,
locator: Locator,
styles: StyleChain,
whitespace: Whitespace,
) -> SourceResult<EcoVec<HtmlNode>> {
html_block_fragment_impl(
engine.routines,
@ -30,6 +31,7 @@ pub fn html_block_fragment(
content,
locator.track(),
styles,
whitespace,
)
}
@ -46,6 +48,7 @@ fn html_block_fragment_impl(
content: &Content,
locator: Tracked<Locator>,
styles: StyleChain,
whitespace: Whitespace,
) -> SourceResult<EcoVec<HtmlNode>> {
let link = LocatorLink::new(locator);
let mut locator = Locator::link(&link).split();
@ -65,8 +68,9 @@ fn html_block_fragment_impl(
crate::convert::convert_to_nodes(
&mut engine,
&mut locator,
&mut SmartQuoter::new(),
children.iter().copied(),
ConversionLevel::Block,
whitespace,
)
}
@ -85,6 +89,7 @@ pub fn html_inline_fragment(
locator: &mut SplitLocator,
quoter: &mut SmartQuoter,
styles: StyleChain,
whitespace: Whitespace,
) -> SourceResult<EcoVec<HtmlNode>> {
engine.route.increase();
engine.route.check_html_depth().at(content.span())?;
@ -94,8 +99,9 @@ pub fn html_inline_fragment(
let result = crate::convert::convert_to_nodes(
engine,
locator,
quoter,
children.iter().copied(),
ConversionLevel::Inline(quoter),
whitespace,
);
engine.route.decrease();

View File

@ -428,20 +428,16 @@ const RAW_RULE: ShowFn<RawElem> = |elem, _, styles| {
seq.push(line.clone().pack());
}
let mut inline = css::Properties::new();
let block = elem.block.get(styles);
if !block {
// Without the `<pre>` tag, whitespace would be collapsed by default.
inline.push("white-space", "pre-wrap");
}
let code = HtmlElem::new(tag::code)
.with_styles(inline)
.with_body(Some(Content::sequence(seq)))
.pack()
.spanned(elem.span());
Ok(if block { HtmlElem::new(tag::pre).with_body(Some(code)).pack() } else { code })
Ok(if elem.block.get(styles) {
HtmlElem::new(tag::pre).with_body(Some(code)).pack()
} else {
code
})
};
/// This is used by `RawElem::synthesize` through a routine.

View File

@ -0,0 +1,51 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<h2>Single spaces</h2>
<p>A B</p>
<p>A B</p>
<p><span>A</span> B</p>
<p>A<span style="white-space: pre-wrap">&#x20;</span><span></span> B</p>
<p>A<span style="white-space: pre-wrap">&#x20;&#x20;</span>B</p>
<p><span>A </span>B</p>
<h2>Consecutive whitespace</h2>
<p>A<span style="white-space: pre-wrap">&#x20;&#x20;</span>B<span style="white-space: pre-wrap">&#x20;&#x20;&#x20;</span>C</p>
<p>A<span style="white-space: pre-wrap">&#x20;&#x20;</span>B<span style="white-space: pre-wrap">&#x20;&#x20;&#x20;</span>C</p>
<p><span>A<span style="white-space: pre-wrap">&#x20;</span></span> B</p>
<p><span>A </span><span style="white-space: pre-wrap">&#x20;&#x20;</span>B</p>
<p><span>A<span style="white-space: pre-wrap">&#x20;&#x20;</span></span> B</p>
<p><span>A<span style="white-space: pre-wrap">&#x20;&#x20;</span></span><span style="white-space: pre-wrap">&#x20;&#x20;</span>B</p>
<p>A<span style="white-space: pre-wrap">&#x20;&#x20;</span><span></span><span style="white-space: pre-wrap">&#x20;&#x20;</span>B</p>
<p>A<span style="white-space: pre-wrap">&#x20;&#x20;&#x20;</span>B</p>
<h2>Leading whitespace</h2>
<p><span style="white-space: pre-wrap">&#x20;</span>A</p>
<p><span><span style="white-space: pre-wrap">&#x20;</span></span>A</p>
<p><span></span><span style="white-space: pre-wrap">&#x20;</span>A</p>
<h2>Trailing whitespace</h2>
<p>A<span style="white-space: pre-wrap">&#x20;</span></p>
<p><span>A<span style="white-space: pre-wrap">&#x20;</span></span></p>
<p><span>A<span style="white-space: pre-wrap">&#x20;</span></span><span></span></p>
<h2>Tabs</h2>
<p>A<span style="white-space: pre-wrap">&#x9;</span>B</p>
<p>A<span style="white-space: pre-wrap">&#x9;</span>B</p>
<p>A<span style="white-space: pre-wrap">&#x20;&#x9;&#x20;</span>B</p>
<h2>Newlines</h2>
<p>A<br>B</p>
<p>A<br>B</p>
<p>A<span style="white-space: pre-wrap">&#x20;</span><br><span style="white-space: pre-wrap">&#x20;</span>B</p>
<p>A<span style="white-space: pre-wrap">&#x20;</span><br><span style="white-space: pre-wrap">&#x20;</span>B</p>
<p>A<span style="white-space: pre-wrap">&#x20;</span><span><br></span><span style="white-space: pre-wrap">&#x20;</span>B</p>
<h2>With default ignorables</h2>
<p>A<span style="white-space: pre-wrap">&#x20;</span> B</p>
<p>A<span style="white-space: pre-wrap">&#x20;&#x20;</span><span style="white-space: pre-wrap">&#x20;&#x20;</span>B</p>
<h2>Everything</h2>
<p><span><span style="white-space: pre-wrap">&#x20;&#x20;</span>A<span style="white-space: pre-wrap">&#x20;</span></span><br><span style="white-space: pre-wrap">&#x9;</span>B<span style="white-space: pre-wrap">&#x20;</span><span></span></p>
<h2>Special</h2>
<textarea>A B</textarea>
<pre>A B</pre>
</body>
</html>

View File

@ -5,6 +5,6 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<p>This has <code style="white-space: pre-wrap">double spaces inside</code>, which should be kept.</p>
<p>This has <code>double<span style="white-space: pre-wrap">&#x20;&#x20;</span>spaces<span style="white-space: pre-wrap">&#x20;&#x20;</span>inside</code>, which should be kept.</p>
</body>
</html>

View File

@ -5,7 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<p>This is <code style="white-space: pre-wrap"><strong>*</strong><strong>inline</strong><strong>*</strong></code>.</p>
<pre><code><span style="color: #d73a49">#</span><span style="color: #d73a49">set</span> <span style="color: #4b69c6">text</span>(blue)<br><strong>*</strong><strong>Hello</strong><strong>*</strong> <em>_</em><em>world</em><em>_</em>!</code></pre>
<p>This is <code><strong>*</strong><strong>inline</strong><strong>*</strong></code>.</p>
<pre><code>#[<br> <span style="color: #d73a49">#</span><span style="color: #d73a49">set</span> <span style="color: #4b69c6">text</span>(blue)<br> <strong>*</strong><strong>Hello</strong><strong>*</strong> <em>_</em><em>world</em><em>_</em>!<br>]</code></pre>
</body>
</html>

View File

@ -2,6 +2,150 @@
// Error: 2-27 HTML void elements must not have children
#html.elem("img", [Hello])
--- html-space-collapsing html ---
// Note: <s>..</s> = <span style="white-space: pre-wrap">..</span>
#import html: span
= Single spaces
// No collapsing.
#"A B"
// -> A B
// No collapsing, multiple text elements.
#"A"#" "#"B"
// -> A B
// Across span boundaries: 0-1.
#span[A] B
// -> <span>A</span> B
// With span in between.
#"A "#span()#" B"
// -> A<s> </s><span></span> B
// With metadata in between.
#"A "#metadata(none)#" B"
// -> A<s> </s>B
// Within span.
#span("A ")B
// -> <span>A </span>B
= Consecutive whitespace
// Single text element.
#"A B C"
// -> A<s> </s>B<s> </s>C
// Multiple text elements.
A#" "B#" C"
// -> A<s> </s>B<s> </s>C
// Across span boundaries: 1-1.
#span("A ") B
// -> <span>A<s> </s></span> B
// Across span boundaries: 1-2.
#span("A ")#" B"
// -> <span>A </span><s> </s>B
// Across span boundaries: 2-1.
#span("A ") B
// -> <span>A<s> </s></span> B
// Across span boundaries: 2-2.
#span("A ")#" B"
// -> <span>A<s> </s></span><s> </s>B
// With span in between.
#"A "#span()#" B"
// -> A<s> </s><span></span><s> </s>B
// With metadata in between.
#"A "#metadata(none)#" B"
// -> A<s> </s>B
= Leading whitespace
// Leading space.
#" A"
// -> <s> </s>A
// Leading space in span.
#span(" ")A
// -> <span><s> </s></span>A
// Leading space with preceding empty element.
#span()#" "A
// -> <span></span><s> </s>A
= Trailing whitespace
// Trailing space.
#"A "
// -> A<s> </s>
// Trailing space in element.
#span("A ")
// -> A<span><s> </s></span>
// Trailing space in element with following empty element.
#span("A ")#span()
// -> <span>A<s> </s></span><span></span>
= Tabs
// Single text element.
#"A\tB"
// -> A<s>&#9;</s>B
// Multiple text elements.
#"A"#"\t"#"B"
// -> A<s>&#9;</s>B
// Spaces + Tab.
#"A \t B"
// -> A<s> &#9; </s>B
= Newlines
// Normal line feed.
#"A\nB"
// -> A<br>B
// CLRF.
#"A\r\nB"
// -> A<br>B
// Spaces + newline.
#"A \n B"
// -> A<s> </s><br><s> </s>B
// Explicit `<br>` element.
#"A "#html.br()#" B"
// -> A<s> </s><br><s> </s>B
// Newline in span.
#"A "#span("\n")#" B"
// -> A<s> </s><span><br></span><s> </s>B
= With default ignorables
// With default ignorable in between.
#"A \u{200D} B"
// -> A<s> </s>&#x200D; B
#"A \u{200D} B"
// -> A<s> </s>&#x200D;<s> </s>B
= Everything
// Everything at once.
#span(" A ")#"\r\n\t"B#" "#span()
// -> <span><s> </s>A<s> </s></span><br><s>&#9;</s>B<s> </s><span></span>
= Special
// Escapable raw.
#html.textarea("A B")
// -> <textarea>A B</textarea>
// Preformatted.
#html.pre("A B")
// -> <pre>A B</pre>
--- html-pre-starting-with-newline html ---
#html.pre("hello")
#html.pre("\nhello")

View File

@ -490,8 +490,10 @@ test
--- raw-html html ---
This is ```typ *inline*```.
```typ
#set text(blue)
*Hello* _world_!
#[
#set text(blue)
*Hello* _world_!
]
```
--- raw-html-inline-spaces html ---