From 805fb24ca4827490fef5e14dd5bf73c984b2fac1 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 13 Aug 2025 14:12:24 +0200 Subject: [PATCH] HTML whitespace protection (#6750) --- crates/typst-html/src/convert.rs | 440 ++++++++++++++++++--- crates/typst-html/src/document.rs | 5 +- crates/typst-html/src/dom.rs | 42 +- crates/typst-html/src/encode.rs | 16 +- crates/typst-html/src/fragment.rs | 14 +- crates/typst-html/src/rules.rs | 14 +- tests/ref/html/html-space-collapsing.html | 51 +++ tests/ref/html/raw-html-inline-spaces.html | 2 +- tests/ref/html/raw-html.html | 4 +- tests/suite/html/syntax.typ | 144 +++++++ tests/suite/text/raw.typ | 6 +- 11 files changed, 656 insertions(+), 82 deletions(-) create mode 100644 tests/ref/html/html-space-collapsing.html diff --git a/crates/typst-html/src/convert.rs b/crates/typst-html/src/convert.rs index 40e1b4364..0475881fa 100644 --- a/crates/typst-html/src/convert.rs +++ b/crates/typst-html/src/convert.rs @@ -1,7 +1,7 @@ -use ecow::EcoVec; +use ecow::{EcoString, EcoVec, eco_vec}; use typst_library::diag::{SourceResult, warning}; use typst_library::engine::Engine; -use typst_library::foundations::{Content, StyleChain, Target, TargetElem}; +use typst_library::foundations::{Content, Packed, StyleChain, Target, TargetElem}; use typst_library::introspection::{SplitLocator, TagElem}; use typst_library::layout::{Abs, Axes, Region, Size}; use typst_library::routines::Pair; @@ -9,101 +9,130 @@ use typst_library::text::{ LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem, is_default_ignorable, }; +use typst_syntax::Span; use crate::fragment::{html_block_fragment, html_inline_fragment}; -use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag}; +use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, css, tag}; + +/// What and how to convert. +pub enum ConversionLevel<'a> { + /// Converts the top-level nodes or children of a block-level element. The + /// conversion has its own local smart quoting state and space protection. + Block, + /// Converts the children of an inline-level HTML element as part of a + /// larger context with shared smart quoting state and shared space + /// protection. + Inline(&'a mut SmartQuoter), +} + +/// How to emit whitespace. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum Whitespace { + /// Ensures that whitespace that would otherwise be collapsed by HTML + /// rendering engines[^1] is protected by spans with `white-space: + /// pre-wrap`. The affected by whitespace are ASCII spaces and ASCII tabs. + /// + /// Tries to emit spans only when necessary. + /// - ASCII tabs and consecutive sequences of spaces and/or tabs are always + /// wrapped in spans in this mode. This happens directly during + /// conversion. + /// - Single ASCII spaces are only wrapped if they aren't supported by + /// normal elements on both sides. This happens in a separate pass that + /// runs for the whole block-level context as doing this properly needs + /// lookahead and lookbehind across different levels of the element + /// hierarchy. + /// + /// [^1]: https://www.w3.org/TR/css-text-3/#white-space-rules + Normal, + /// The whitespace is emitted as-is. This happens in + /// - `
` elements as they already have `white-space: pre`,
+    /// - raw and escapable raw text elements as normal white space rules do not
+    ///   apply to them.
+    Pre,
+}
 
 /// Converts realized content into HTML nodes.
 pub fn convert_to_nodes<'a>(
     engine: &mut Engine,
     locator: &mut SplitLocator,
-    quoter: &mut SmartQuoter,
     children: impl IntoIterator>,
+    level: ConversionLevel,
+    whitespace: Whitespace,
 ) -> SourceResult> {
-    let mut output = EcoVec::new();
+    let block = matches!(level, ConversionLevel::Block);
+    let mut converter = Converter {
+        engine,
+        locator,
+        quoter: match level {
+            ConversionLevel::Inline(quoter) => quoter,
+            ConversionLevel::Block => &mut SmartQuoter::new(),
+        },
+        whitespace,
+        output: EcoVec::new(),
+        trailing: None,
+    };
+
     for (child, styles) in children {
-        handle(engine, child, locator, styles, quoter, &mut output)?;
+        handle(&mut converter, child, styles)?;
     }
-    Ok(output)
+
+    let mut nodes = converter.finish();
+    if block && whitespace == Whitespace::Normal {
+        protect_spaces(&mut nodes);
+    }
+
+    Ok(nodes)
 }
 
-/// Convert one element into HTML node(s).
+/// Converts one element into HTML node(s).
 fn handle(
-    engine: &mut Engine,
+    converter: &mut Converter,
     child: &Content,
-    locator: &mut SplitLocator,
     styles: StyleChain,
-    quoter: &mut SmartQuoter,
-    output: &mut EcoVec,
 ) -> SourceResult<()> {
     if let Some(elem) = child.to_packed::() {
-        output.push(HtmlNode::Tag(elem.tag.clone()));
+        converter.push(elem.tag.clone());
     } else if let Some(elem) = child.to_packed::() {
-        let mut children = EcoVec::new();
-        if let Some(body) = elem.body.get_ref(styles) {
-            if tag::is_block_by_default(elem.tag) {
-                children = html_block_fragment(
-                    engine,
-                    body,
-                    locator.next(&elem.span()),
-                    styles,
-                )?;
-
-                // Block-level elements reset the smart quoting state. This part
-                // is unfortunately untested as it's currently not possible to
-                // create inline-level content next to block-level content
-                // without a paragraph automatically appearing.
-                *quoter = SmartQuoter::new();
-            } else {
-                children = html_inline_fragment(engine, body, locator, quoter, styles)?;
-            }
-        }
-        let element = HtmlElement {
-            tag: elem.tag,
-            attrs: elem.attrs.get_cloned(styles),
-            children,
-            span: elem.span(),
-        };
-        output.push(element.into());
+        handle_html_elem(converter, elem, styles)?;
     } else if child.is::() {
-        output.push(HtmlNode::text(' ', child.span()));
+        converter.push(HtmlNode::text(' ', child.span()));
     } else if let Some(elem) = child.to_packed::() {
         let text = if let Some(case) = styles.get(TextElem::case) {
             case.apply(&elem.text).into()
         } else {
             elem.text.clone()
         };
-        output.push(HtmlNode::text(text, elem.span()));
+        handle_text(converter, text, elem.span());
     } else if let Some(elem) = child.to_packed::() {
-        output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
+        converter.push(HtmlElement::new(tag::br).spanned(elem.span()));
     } else if let Some(elem) = child.to_packed::() {
         let double = elem.double.get(styles);
-        if elem.enabled.get(styles) {
-            let before = last_char(output);
+        let quote = if elem.enabled.get(styles) {
+            let before = last_char(&converter.output);
             let quotes = SmartQuotes::get(
                 elem.quotes.get_ref(styles),
                 styles.get(TextElem::lang),
                 styles.get(TextElem::region),
                 elem.alternative.get(styles),
             );
-            let quote = quoter.quote(before, "es, double);
-            output.push(HtmlNode::text(quote, child.span()));
+            converter.quoter.quote(before, "es, double)
         } else {
-            output.push(HtmlNode::text(SmartQuotes::fallback(double), child.span()));
-        }
+            SmartQuotes::fallback(double)
+        };
+        handle_text(converter, quote.into(), child.span());
     } else if let Some(elem) = child.to_packed::() {
-        let locator = locator.next(&elem.span());
+        let locator = converter.locator.next(&elem.span());
         let style = TargetElem::target.set(Target::Paged).wrap();
-        let frame = (engine.routines.layout_frame)(
-            engine,
+        let frame = (converter.engine.routines.layout_frame)(
+            converter.engine,
             &elem.body,
             locator,
             styles.chain(&style),
             Region::new(Size::splat(Abs::inf()), Axes::splat(false)),
         )?;
-        output.push(HtmlNode::Frame(HtmlFrame::new(frame, styles, elem.span())));
+        converter.push(HtmlFrame::new(frame, styles, elem.span()));
     } else {
-        engine.sink.warn(warning!(
+        converter.engine.sink.warn(warning!(
             child.span(),
             "{} was ignored during HTML export",
             child.elem().name()
@@ -112,6 +141,311 @@ fn handle(
     Ok(())
 }
 
+/// Handles an HTML element.
+fn handle_html_elem(
+    converter: &mut Converter,
+    elem: &Packed,
+    styles: StyleChain,
+) -> SourceResult<()> {
+    let mut children = EcoVec::new();
+    if let Some(body) = elem.body.get_ref(styles) {
+        let whitespace = if converter.whitespace == Whitespace::Pre
+            || elem.tag == tag::pre
+            || tag::is_raw(elem.tag)
+            || tag::is_escapable_raw(elem.tag)
+        {
+            Whitespace::Pre
+        } else {
+            Whitespace::Normal
+        };
+
+        if tag::is_block_by_default(elem.tag) {
+            children = html_block_fragment(
+                converter.engine,
+                body,
+                converter.locator.next(&elem.span()),
+                styles,
+                whitespace,
+            )?;
+
+            // Block-level elements reset the inline state. This part is
+            // unfortunately untested as it's currently not possible to
+            // create inline-level content next to block-level content
+            // without a paragraph automatically appearing.
+            *converter.quoter = SmartQuoter::new();
+        } else {
+            children = html_inline_fragment(
+                converter.engine,
+                body,
+                converter.locator,
+                converter.quoter,
+                styles,
+                whitespace,
+            )?;
+        }
+    }
+
+    converter.push(HtmlElement {
+        tag: elem.tag,
+        attrs: elem.attrs.get_cloned(styles),
+        children,
+        span: elem.span(),
+        pre_span: false,
+    });
+
+    Ok(())
+}
+
+/// Handles arbitrary text while taking care that no whitespace within will be
+/// collapsed by browsers.
+fn handle_text(converter: &mut Converter, text: EcoString, span: Span) {
+    /// Special kinds of characters.
+    #[derive(Debug, Copy, Clone, Eq, PartialEq)]
+    enum Kind {
+        /// ASCII space.
+        Space,
+        /// ASCII tab.
+        Tab,
+        /// CR, LF, or CR + LF.
+        Newline,
+        /// A Unicode default-ignorable. Does not protect spaces from
+        /// collapsing.
+        Ignorable,
+    }
+
+    impl Kind {
+        fn of(c: char) -> Option {
+            match c {
+                ' ' => Some(Kind::Space),
+                '\t' => Some(Kind::Tab),
+                '\r' | '\n' => Some(Kind::Newline),
+                c if is_default_ignorable(c) => Some(Kind::Ignorable),
+                _ => None,
+            }
+        }
+    }
+
+    if converter.whitespace == Whitespace::Pre {
+        converter.push(HtmlNode::Text(text, span));
+        return;
+    }
+
+    let mut emitted = 0;
+    let mut prev_kind = None;
+
+    for (i, c) in text.char_indices() {
+        let kind = Kind::of(c);
+        let prev_kind = prev_kind.replace(kind);
+        let Some(kind) = kind else { continue };
+
+        // A space that is surrounded by normal (i.e. not special) characters is
+        // already protected and doesn't need further treatment.
+        if kind == Kind::Space
+            && let Some(None) = prev_kind
+            && let Some(after) = text[i + 1..].chars().next()
+            && Kind::of(after).is_none()
+        {
+            continue;
+        }
+
+        // Emit the unspecial text up to the special character.
+        if emitted < i {
+            converter.push_text(&text[emitted..i], span);
+            emitted = i;
+        }
+
+        // Process the special character.
+        match kind {
+            Kind::Space => converter.push_text(' ', span),
+            Kind::Tab => converter.push_text('\t', span),
+            Kind::Newline => {
+                if c == '\r' && text[i + 1..].starts_with('\n') {
+                    // Skip the CR because the LF will already turn into
+                    // a `
`. + emitted += 1; + continue; + } + converter.push(HtmlElement::new(tag::br).spanned(span)); + } + Kind::Ignorable => converter.push_text(c, span), + } + emitted += c.len_utf8(); + } + + // Push the remaining unspecial text. + if emitted < text.len() { + converter.push_text( + // Try to reuse the `EcoString` if possible. + if emitted == 0 { text } else { text[emitted..].into() }, + span, + ); + } +} + +/// State during conversion. +struct Converter<'a, 'y, 'z> { + engine: &'a mut Engine<'y>, + locator: &'a mut SplitLocator<'z>, + quoter: &'a mut SmartQuoter, + whitespace: Whitespace, + output: EcoVec, + trailing: Option, +} + +/// Keeps track of a trailing whitespace in the output. +struct TrailingWhitespace { + /// If `true`, the trailing whitespace consists of exactly one ASCII space. + single: bool, + /// The trailing whitespace starts at `output[from..]`. + from: usize, +} + +impl Converter<'_, '_, '_> { + /// Returns the converted nodes. + fn finish(mut self) -> EcoVec { + self.flush_whitespace(); + self.output + } + + /// Pushes a node, taking care to protect consecutive whitespace. + fn push(&mut self, node: impl Into) { + let node = node.into(); + + if let HtmlNode::Text(text, _) = &node + && (text == " " || text == "\t") + { + if let Some(ws) = &mut self.trailing { + ws.single = false; + } else { + self.trailing = Some(TrailingWhitespace { + single: text == " ", + from: self.output.len(), + }); + } + } else if !matches!(node, HtmlNode::Tag(_)) { + self.flush_whitespace(); + } + + self.output.push(node); + } + + /// Shorthand for pushing a text node. + fn push_text(&mut self, text: impl Into, span: Span) { + self.push(HtmlNode::text(text.into(), span)); + } + + /// If there is trailing whitespace in need of protection, protects it. + /// + /// Does not protect single ASCII spaces. Those are handled in a separate + /// pass as they are more complex and require lookahead. See the + /// documentation of [`Whitespace`] for more information. + fn flush_whitespace(&mut self) { + if self.whitespace == Whitespace::Normal + && let Some(TrailingWhitespace { single: false, from }) = self.trailing.take() + { + let nodes: EcoVec<_> = self.output[from..].iter().cloned().collect(); + self.output.truncate(from); + self.output.push(HtmlNode::Element(pre_wrap(nodes))); + } + } +} + +/// Protects all spaces in the given block-level `nodes` against collapsing. +/// +/// Does not recurse into block-level elements as those are separate contexts +/// with their own space protection. +fn protect_spaces(nodes: &mut EcoVec) { + let mut p = Protector::new(); + p.visit_nodes(nodes); + p.collapsing(); +} + +/// A state machine for whitespace protection. +enum Protector<'a> { + Collapsing, + Supportive, + Space(&'a mut HtmlNode), +} + +impl<'a> Protector<'a> { + /// Creates a new protector. + fn new() -> Self { + Self::Collapsing + } + + /// Visits the given nodes and protects single spaces that need to be saved + /// from collapsing. + fn visit_nodes(&mut self, nodes: &'a mut EcoVec) { + for node in nodes.make_mut().iter_mut() { + match node { + HtmlNode::Tag(_) => {} + HtmlNode::Text(text, _) => { + if text == " " { + match self { + Self::Collapsing => { + protect_space(node); + *self = Self::Supportive; + } + Self::Supportive => { + *self = Self::Space(node); + } + Self::Space(prev) => { + protect_space(prev); + *self = Self::Space(node); + } + } + } else if text.chars().any(|c| !is_default_ignorable(c)) { + self.supportive(); + } + } + HtmlNode::Element(element) => { + if tag::is_block_by_default(element.tag) || element.tag == tag::br { + self.collapsing(); + } else if !element.pre_span { + // Recursively visit the children of inline-level + // elements while making sure to not revisit pre-wrapped + // spans that we've generated ourselves. + self.visit_nodes(&mut element.children); + } + } + HtmlNode::Frame(_) => self.supportive(), + } + } + } + + /// Called when visiting an element that would collapse adjacent single + /// spaces. A preceding, if any, and succeeding, if any, single space will + /// then be protected . + fn collapsing(&mut self) { + if let Self::Space(node) = std::mem::replace(self, Self::Collapsing) { + protect_space(node); + } + } + + /// Called when visiting an element that supports adjacent single spaces. + fn supportive(&mut self) { + *self = Self::Supportive; + } +} + +/// Protects a single spaces against collapsing. +fn protect_space(node: &mut HtmlNode) { + *node = pre_wrap(eco_vec![node.clone()]).into(); +} + +/// Wraps a collection of whitespace nodes in a +/// `..` to avoid them being +/// collapsed by HTML rendering engines. +fn pre_wrap(nodes: EcoVec) -> HtmlElement { + let span = Span::find(nodes.iter().map(|c| c.span())); + let mut elem = HtmlElement::new(tag::span) + .with_styles(css::Properties::new().with("white-space", "pre-wrap")) + .with_children(nodes) + .spanned(span); + elem.pre_span = true; + elem +} + /// Returns the last non-default ignorable character from the passed nodes. fn last_char(nodes: &[HtmlNode]) -> Option { for node in nodes.iter().rev() { diff --git a/crates/typst-html/src/document.rs b/crates/typst-html/src/document.rs index ec9d9a50e..43c06e70f 100644 --- a/crates/typst-html/src/document.rs +++ b/crates/typst-html/src/document.rs @@ -13,10 +13,10 @@ use typst_library::introspection::{ use typst_library::layout::{Point, Position, Transform}; use typst_library::model::DocumentInfo; use typst_library::routines::{Arenas, RealizationKind, Routines}; -use typst_library::text::SmartQuoter; use typst_syntax::Span; use typst_utils::NonZeroExt; +use crate::convert::{ConversionLevel, Whitespace}; use crate::{HtmlDocument, HtmlElem, HtmlElement, HtmlNode, attr, tag}; /// Produce an HTML document from content. @@ -83,8 +83,9 @@ fn html_document_impl( let output = crate::convert::convert_to_nodes( &mut engine, &mut locator, - &mut SmartQuoter::new(), children.iter().copied(), + ConversionLevel::Block, + Whitespace::Normal, )?; let mut link_targets = FxHashSet::default(); diff --git a/crates/typst-html/src/dom.rs b/crates/typst-html/src/dom.rs index 44b4f9156..d3f9c403f 100644 --- a/crates/typst-html/src/dom.rs +++ b/crates/typst-html/src/dom.rs @@ -10,7 +10,7 @@ use typst_library::text::TextElem; use typst_syntax::Span; use typst_utils::{PicoStr, ResolvedPicoStr}; -use crate::charsets; +use crate::{attr, charsets, css}; /// An HTML document. #[derive(Debug, Clone)] @@ -41,6 +41,22 @@ impl HtmlNode { pub fn text(text: impl Into, span: Span) -> Self { Self::Text(text.into(), span) } + + /// Returns the span, if any. + pub fn span(&self) -> Span { + match self { + Self::Tag(_) => Span::detached(), + Self::Text(_, span) => *span, + Self::Element(element) => element.span, + Self::Frame(frame) => frame.span, + } + } +} + +impl From for HtmlNode { + fn from(tag: Tag) -> Self { + Self::Tag(tag) + } } impl From for HtmlNode { @@ -49,6 +65,12 @@ impl From for HtmlNode { } } +impl From for HtmlNode { + fn from(frame: HtmlFrame) -> Self { + Self::Frame(frame) + } +} + /// An HTML element. #[derive(Debug, Clone, Hash)] pub struct HtmlElement { @@ -60,6 +82,14 @@ pub struct HtmlElement { pub children: EcoVec, /// The span from which the element originated, if any. pub span: Span, + /// Whether this is a span with `white-space: pre-wrap` generated by the + /// compiler to prevent whitespace from being collapsed. + /// + /// For such spans, spaces and tabs in the element are emitted as escape + /// sequences. While this does not matter for browser engine rendering (as + /// the `white-space` CSS property is enough), it ensures that formatters + /// won't mess up the output. + pub pre_span: bool, } impl HtmlElement { @@ -70,6 +100,7 @@ impl HtmlElement { attrs: HtmlAttrs::default(), children: EcoVec::new(), span: Span::detached(), + pre_span: false, } } @@ -87,6 +118,15 @@ impl HtmlElement { self } + /// Adds CSS styles to an element. + pub(crate) fn with_styles(self, properties: css::Properties) -> Self { + if let Some(value) = properties.into_inline_styles() { + self.with_attr(attr::style, value) + } else { + self + } + } + /// Attach a span to the element. pub fn spanned(mut self, span: Span) -> Self { self.span = span; diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs index 71fcefcd2..06e4c4632 100644 --- a/crates/typst-html/src/encode.rs +++ b/crates/typst-html/src/encode.rs @@ -52,10 +52,10 @@ fn write_indent(w: &mut Writer) { } /// Encodes an HTML node into the writer. -fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> { +fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> { match node { HtmlNode::Tag(_) => {} - HtmlNode::Text(text, span) => write_text(w, text, *span)?, + HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?, HtmlNode::Element(element) => write_element(w, element)?, HtmlNode::Frame(frame) => write_frame(w, frame), } @@ -63,12 +63,12 @@ fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> { } /// Encodes plain text into the writer. -fn write_text(w: &mut Writer, text: &str, span: Span) -> SourceResult<()> { +fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> { for c in text.chars() { - if charsets::is_valid_in_normal_element_text(c) { - w.buf.push(c); - } else { + if escape || !charsets::is_valid_in_normal_element_text(c) { write_escape(w, c).at(span)?; + } else { + w.buf.push(c); } } Ok(()) @@ -152,7 +152,7 @@ fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { if core::mem::take(&mut indent) || pretty_around { write_indent(w); } - write_node(w, c)?; + write_node(w, c, element.pre_span)?; indent = pretty_around; } w.level -= 1; @@ -213,7 +213,7 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { /// Encodes the contents of an escapable raw text element. fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { - walk_raw_text(element, |piece, span| write_text(w, piece, span)) + walk_raw_text(element, |piece, span| write_text(w, piece, span, false)) } /// Collects the textual contents of a raw text element. diff --git a/crates/typst-html/src/fragment.rs b/crates/typst-html/src/fragment.rs index efce44548..1a601fabe 100644 --- a/crates/typst-html/src/fragment.rs +++ b/crates/typst-html/src/fragment.rs @@ -1,14 +1,14 @@ use comemo::{Track, Tracked, TrackedMut}; use ecow::EcoVec; +use typst_library::World; use typst_library::diag::{At, SourceResult}; use typst_library::engine::{Engine, Route, Sink, Traced}; use typst_library::foundations::{Content, StyleChain}; use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator}; - -use typst_library::World; use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines}; use typst_library::text::SmartQuoter; +use crate::convert::{ConversionLevel, Whitespace}; use crate::{HtmlElem, HtmlNode}; /// Produces HTML nodes from content contained in an HTML element that is @@ -19,6 +19,7 @@ pub fn html_block_fragment( content: &Content, locator: Locator, styles: StyleChain, + whitespace: Whitespace, ) -> SourceResult> { html_block_fragment_impl( engine.routines, @@ -30,6 +31,7 @@ pub fn html_block_fragment( content, locator.track(), styles, + whitespace, ) } @@ -46,6 +48,7 @@ fn html_block_fragment_impl( content: &Content, locator: Tracked, styles: StyleChain, + whitespace: Whitespace, ) -> SourceResult> { let link = LocatorLink::new(locator); let mut locator = Locator::link(&link).split(); @@ -65,8 +68,9 @@ fn html_block_fragment_impl( crate::convert::convert_to_nodes( &mut engine, &mut locator, - &mut SmartQuoter::new(), children.iter().copied(), + ConversionLevel::Block, + whitespace, ) } @@ -85,6 +89,7 @@ pub fn html_inline_fragment( locator: &mut SplitLocator, quoter: &mut SmartQuoter, styles: StyleChain, + whitespace: Whitespace, ) -> SourceResult> { engine.route.increase(); engine.route.check_html_depth().at(content.span())?; @@ -94,8 +99,9 @@ pub fn html_inline_fragment( let result = crate::convert::convert_to_nodes( engine, locator, - quoter, children.iter().copied(), + ConversionLevel::Inline(quoter), + whitespace, ); engine.route.decrease(); diff --git a/crates/typst-html/src/rules.rs b/crates/typst-html/src/rules.rs index a024d384e..6faea1340 100644 --- a/crates/typst-html/src/rules.rs +++ b/crates/typst-html/src/rules.rs @@ -428,20 +428,16 @@ const RAW_RULE: ShowFn = |elem, _, styles| { seq.push(line.clone().pack()); } - let mut inline = css::Properties::new(); - let block = elem.block.get(styles); - if !block { - // Without the `
` tag, whitespace would be collapsed by default.
-        inline.push("white-space", "pre-wrap");
-    }
-
     let code = HtmlElem::new(tag::code)
-        .with_styles(inline)
         .with_body(Some(Content::sequence(seq)))
         .pack()
         .spanned(elem.span());
 
-    Ok(if block { HtmlElem::new(tag::pre).with_body(Some(code)).pack() } else { code })
+    Ok(if elem.block.get(styles) {
+        HtmlElem::new(tag::pre).with_body(Some(code)).pack()
+    } else {
+        code
+    })
 };
 
 /// This is used by `RawElem::synthesize` through a routine.
diff --git a/tests/ref/html/html-space-collapsing.html b/tests/ref/html/html-space-collapsing.html
new file mode 100644
index 000000000..7e5acb2f9
--- /dev/null
+++ b/tests/ref/html/html-space-collapsing.html
@@ -0,0 +1,51 @@
+
+
+  
+    
+    
+  
+  
+    

Single spaces

+

A B

+

A B

+

A B

+

A B

+

A B

+

A B

+

Consecutive whitespace

+

A B C

+

A B C

+

A B

+

A B

+

A B

+

A B

+

A B

+

A B

+

Leading whitespace

+

A

+

A

+

A

+

Trailing whitespace

+

A

+

A

+

A

+

Tabs

+

A B

+

A B

+

A B

+

Newlines

+

A
B

+

A
B

+

A
B

+

A
B

+

A
B

+

With default ignorables

+

A ‍ B

+

A B

+

Everything

+

A
B

+

Special

+ +
A  B
+ + diff --git a/tests/ref/html/raw-html-inline-spaces.html b/tests/ref/html/raw-html-inline-spaces.html index 193d0425c..e14fa76e9 100644 --- a/tests/ref/html/raw-html-inline-spaces.html +++ b/tests/ref/html/raw-html-inline-spaces.html @@ -5,6 +5,6 @@ -

This has double spaces inside, which should be kept.

+

This has double spaces inside, which should be kept.

diff --git a/tests/ref/html/raw-html.html b/tests/ref/html/raw-html.html index cd476c8b4..ba1db63a0 100644 --- a/tests/ref/html/raw-html.html +++ b/tests/ref/html/raw-html.html @@ -5,7 +5,7 @@ -

This is *inline*.

-
#set text(blue)
*Hello* _world_!
+

This is *inline*.

+
#[
#set text(blue)
*Hello* _world_!
]
diff --git a/tests/suite/html/syntax.typ b/tests/suite/html/syntax.typ index 0c56fe76c..d28ec81e2 100644 --- a/tests/suite/html/syntax.typ +++ b/tests/suite/html/syntax.typ @@ -2,6 +2,150 @@ // Error: 2-27 HTML void elements must not have children #html.elem("img", [Hello]) +--- html-space-collapsing html --- +// Note: .. = .. +#import html: span + += Single spaces +// No collapsing. +#"A B" +// -> A B + +// No collapsing, multiple text elements. +#"A"#" "#"B" +// -> A B + +// Across span boundaries: 0-1. +#span[A] B +// -> A B + +// With span in between. +#"A "#span()#" B" +// -> A B + +// With metadata in between. +#"A "#metadata(none)#" B" +// -> A B + +// Within span. +#span("A ")B +// -> A B + += Consecutive whitespace +// Single text element. +#"A B C" +// -> A B C + +// Multiple text elements. +A#" "B#" C" +// -> A B C + +// Across span boundaries: 1-1. +#span("A ") B +// -> A B + +// Across span boundaries: 1-2. +#span("A ")#" B" +// -> A B + +// Across span boundaries: 2-1. +#span("A ") B +// -> A B + +// Across span boundaries: 2-2. +#span("A ")#" B" +// -> A B + +// With span in between. +#"A "#span()#" B" +// -> A B + +// With metadata in between. +#"A "#metadata(none)#" B" +// -> A B + += Leading whitespace +// Leading space. +#" A" +// -> A + +// Leading space in span. +#span(" ")A +// -> A + +// Leading space with preceding empty element. +#span()#" "A +// -> A + += Trailing whitespace +// Trailing space. +#"A " +// -> A + +// Trailing space in element. +#span("A ") +// -> A + +// Trailing space in element with following empty element. +#span("A ")#span() +// -> A + += Tabs +// Single text element. +#"A\tB" +// -> A B + +// Multiple text elements. +#"A"#"\t"#"B" +// -> A B + +// Spaces + Tab. +#"A \t B" +// -> A B + += Newlines +// Normal line feed. +#"A\nB" +// -> A
B + +// CLRF. +#"A\r\nB" +// -> A
B + +// Spaces + newline. +#"A \n B" +// -> A
B + +// Explicit `
` element. +#"A "#html.br()#" B" +// -> A
B + +// Newline in span. +#"A "#span("\n")#" B" +// -> A
B + += With default ignorables +// With default ignorable in between. +#"A \u{200D} B" +// -> A ‍ B + +#"A \u{200D} B" +// -> A B + += Everything +// Everything at once. +#span(" A ")#"\r\n\t"B#" "#span() +// -> A
B + += Special +// Escapable raw. +#html.textarea("A B") +// -> + +// Preformatted. +#html.pre("A B") +// ->
A  B
+ --- html-pre-starting-with-newline html --- #html.pre("hello") #html.pre("\nhello") diff --git a/tests/suite/text/raw.typ b/tests/suite/text/raw.typ index 7add8cdc1..bd61cc1f0 100644 --- a/tests/suite/text/raw.typ +++ b/tests/suite/text/raw.typ @@ -490,8 +490,10 @@ test --- raw-html html --- This is ```typ *inline*```. ```typ -#set text(blue) -*Hello* _world_! +#[ + #set text(blue) + *Hello* _world_! +] ``` --- raw-html-inline-spaces html ---