mirror of
https://github.com/typst/typst
synced 2025-08-19 17:38:32 +08:00
HTML whitespace protection (#6750)
This commit is contained in:
parent
343a57b50d
commit
805fb24ca4
@ -1,7 +1,7 @@
|
||||
use ecow::EcoVec;
|
||||
use ecow::{EcoString, EcoVec, eco_vec};
|
||||
use typst_library::diag::{SourceResult, warning};
|
||||
use typst_library::engine::Engine;
|
||||
use typst_library::foundations::{Content, StyleChain, Target, TargetElem};
|
||||
use typst_library::foundations::{Content, Packed, StyleChain, Target, TargetElem};
|
||||
use typst_library::introspection::{SplitLocator, TagElem};
|
||||
use typst_library::layout::{Abs, Axes, Region, Size};
|
||||
use typst_library::routines::Pair;
|
||||
@ -9,101 +9,130 @@ use typst_library::text::{
|
||||
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
|
||||
is_default_ignorable,
|
||||
};
|
||||
use typst_syntax::Span;
|
||||
|
||||
use crate::fragment::{html_block_fragment, html_inline_fragment};
|
||||
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, tag};
|
||||
use crate::{FrameElem, HtmlElem, HtmlElement, HtmlFrame, HtmlNode, css, tag};
|
||||
|
||||
/// What and how to convert.
|
||||
pub enum ConversionLevel<'a> {
|
||||
/// Converts the top-level nodes or children of a block-level element. The
|
||||
/// conversion has its own local smart quoting state and space protection.
|
||||
Block,
|
||||
/// Converts the children of an inline-level HTML element as part of a
|
||||
/// larger context with shared smart quoting state and shared space
|
||||
/// protection.
|
||||
Inline(&'a mut SmartQuoter),
|
||||
}
|
||||
|
||||
/// How to emit whitespace.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
|
||||
pub enum Whitespace {
|
||||
/// Ensures that whitespace that would otherwise be collapsed by HTML
|
||||
/// rendering engines[^1] is protected by spans with `white-space:
|
||||
/// pre-wrap`. The affected by whitespace are ASCII spaces and ASCII tabs.
|
||||
///
|
||||
/// Tries to emit spans only when necessary.
|
||||
/// - ASCII tabs and consecutive sequences of spaces and/or tabs are always
|
||||
/// wrapped in spans in this mode. This happens directly during
|
||||
/// conversion.
|
||||
/// - Single ASCII spaces are only wrapped if they aren't supported by
|
||||
/// normal elements on both sides. This happens in a separate pass that
|
||||
/// runs for the whole block-level context as doing this properly needs
|
||||
/// lookahead and lookbehind across different levels of the element
|
||||
/// hierarchy.
|
||||
///
|
||||
/// [^1]: https://www.w3.org/TR/css-text-3/#white-space-rules
|
||||
Normal,
|
||||
/// The whitespace is emitted as-is. This happens in
|
||||
/// - `<pre>` elements as they already have `white-space: pre`,
|
||||
/// - raw and escapable raw text elements as normal white space rules do not
|
||||
/// apply to them.
|
||||
Pre,
|
||||
}
|
||||
|
||||
/// Converts realized content into HTML nodes.
|
||||
pub fn convert_to_nodes<'a>(
|
||||
engine: &mut Engine,
|
||||
locator: &mut SplitLocator,
|
||||
quoter: &mut SmartQuoter,
|
||||
children: impl IntoIterator<Item = Pair<'a>>,
|
||||
level: ConversionLevel,
|
||||
whitespace: Whitespace,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
let mut output = EcoVec::new();
|
||||
let block = matches!(level, ConversionLevel::Block);
|
||||
let mut converter = Converter {
|
||||
engine,
|
||||
locator,
|
||||
quoter: match level {
|
||||
ConversionLevel::Inline(quoter) => quoter,
|
||||
ConversionLevel::Block => &mut SmartQuoter::new(),
|
||||
},
|
||||
whitespace,
|
||||
output: EcoVec::new(),
|
||||
trailing: None,
|
||||
};
|
||||
|
||||
for (child, styles) in children {
|
||||
handle(engine, child, locator, styles, quoter, &mut output)?;
|
||||
handle(&mut converter, child, styles)?;
|
||||
}
|
||||
Ok(output)
|
||||
|
||||
let mut nodes = converter.finish();
|
||||
if block && whitespace == Whitespace::Normal {
|
||||
protect_spaces(&mut nodes);
|
||||
}
|
||||
|
||||
Ok(nodes)
|
||||
}
|
||||
|
||||
/// Convert one element into HTML node(s).
|
||||
/// Converts one element into HTML node(s).
|
||||
fn handle(
|
||||
engine: &mut Engine,
|
||||
converter: &mut Converter,
|
||||
child: &Content,
|
||||
locator: &mut SplitLocator,
|
||||
styles: StyleChain,
|
||||
quoter: &mut SmartQuoter,
|
||||
output: &mut EcoVec<HtmlNode>,
|
||||
) -> SourceResult<()> {
|
||||
if let Some(elem) = child.to_packed::<TagElem>() {
|
||||
output.push(HtmlNode::Tag(elem.tag.clone()));
|
||||
converter.push(elem.tag.clone());
|
||||
} else if let Some(elem) = child.to_packed::<HtmlElem>() {
|
||||
let mut children = EcoVec::new();
|
||||
if let Some(body) = elem.body.get_ref(styles) {
|
||||
if tag::is_block_by_default(elem.tag) {
|
||||
children = html_block_fragment(
|
||||
engine,
|
||||
body,
|
||||
locator.next(&elem.span()),
|
||||
styles,
|
||||
)?;
|
||||
|
||||
// Block-level elements reset the smart quoting state. This part
|
||||
// is unfortunately untested as it's currently not possible to
|
||||
// create inline-level content next to block-level content
|
||||
// without a paragraph automatically appearing.
|
||||
*quoter = SmartQuoter::new();
|
||||
} else {
|
||||
children = html_inline_fragment(engine, body, locator, quoter, styles)?;
|
||||
}
|
||||
}
|
||||
let element = HtmlElement {
|
||||
tag: elem.tag,
|
||||
attrs: elem.attrs.get_cloned(styles),
|
||||
children,
|
||||
span: elem.span(),
|
||||
};
|
||||
output.push(element.into());
|
||||
handle_html_elem(converter, elem, styles)?;
|
||||
} else if child.is::<SpaceElem>() {
|
||||
output.push(HtmlNode::text(' ', child.span()));
|
||||
converter.push(HtmlNode::text(' ', child.span()));
|
||||
} else if let Some(elem) = child.to_packed::<TextElem>() {
|
||||
let text = if let Some(case) = styles.get(TextElem::case) {
|
||||
case.apply(&elem.text).into()
|
||||
} else {
|
||||
elem.text.clone()
|
||||
};
|
||||
output.push(HtmlNode::text(text, elem.span()));
|
||||
handle_text(converter, text, elem.span());
|
||||
} else if let Some(elem) = child.to_packed::<LinebreakElem>() {
|
||||
output.push(HtmlElement::new(tag::br).spanned(elem.span()).into());
|
||||
converter.push(HtmlElement::new(tag::br).spanned(elem.span()));
|
||||
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
|
||||
let double = elem.double.get(styles);
|
||||
if elem.enabled.get(styles) {
|
||||
let before = last_char(output);
|
||||
let quote = if elem.enabled.get(styles) {
|
||||
let before = last_char(&converter.output);
|
||||
let quotes = SmartQuotes::get(
|
||||
elem.quotes.get_ref(styles),
|
||||
styles.get(TextElem::lang),
|
||||
styles.get(TextElem::region),
|
||||
elem.alternative.get(styles),
|
||||
);
|
||||
let quote = quoter.quote(before, "es, double);
|
||||
output.push(HtmlNode::text(quote, child.span()));
|
||||
converter.quoter.quote(before, "es, double)
|
||||
} else {
|
||||
output.push(HtmlNode::text(SmartQuotes::fallback(double), child.span()));
|
||||
}
|
||||
SmartQuotes::fallback(double)
|
||||
};
|
||||
handle_text(converter, quote.into(), child.span());
|
||||
} else if let Some(elem) = child.to_packed::<FrameElem>() {
|
||||
let locator = locator.next(&elem.span());
|
||||
let locator = converter.locator.next(&elem.span());
|
||||
let style = TargetElem::target.set(Target::Paged).wrap();
|
||||
let frame = (engine.routines.layout_frame)(
|
||||
engine,
|
||||
let frame = (converter.engine.routines.layout_frame)(
|
||||
converter.engine,
|
||||
&elem.body,
|
||||
locator,
|
||||
styles.chain(&style),
|
||||
Region::new(Size::splat(Abs::inf()), Axes::splat(false)),
|
||||
)?;
|
||||
output.push(HtmlNode::Frame(HtmlFrame::new(frame, styles, elem.span())));
|
||||
converter.push(HtmlFrame::new(frame, styles, elem.span()));
|
||||
} else {
|
||||
engine.sink.warn(warning!(
|
||||
converter.engine.sink.warn(warning!(
|
||||
child.span(),
|
||||
"{} was ignored during HTML export",
|
||||
child.elem().name()
|
||||
@ -112,6 +141,311 @@ fn handle(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handles an HTML element.
|
||||
fn handle_html_elem(
|
||||
converter: &mut Converter,
|
||||
elem: &Packed<HtmlElem>,
|
||||
styles: StyleChain,
|
||||
) -> SourceResult<()> {
|
||||
let mut children = EcoVec::new();
|
||||
if let Some(body) = elem.body.get_ref(styles) {
|
||||
let whitespace = if converter.whitespace == Whitespace::Pre
|
||||
|| elem.tag == tag::pre
|
||||
|| tag::is_raw(elem.tag)
|
||||
|| tag::is_escapable_raw(elem.tag)
|
||||
{
|
||||
Whitespace::Pre
|
||||
} else {
|
||||
Whitespace::Normal
|
||||
};
|
||||
|
||||
if tag::is_block_by_default(elem.tag) {
|
||||
children = html_block_fragment(
|
||||
converter.engine,
|
||||
body,
|
||||
converter.locator.next(&elem.span()),
|
||||
styles,
|
||||
whitespace,
|
||||
)?;
|
||||
|
||||
// Block-level elements reset the inline state. This part is
|
||||
// unfortunately untested as it's currently not possible to
|
||||
// create inline-level content next to block-level content
|
||||
// without a paragraph automatically appearing.
|
||||
*converter.quoter = SmartQuoter::new();
|
||||
} else {
|
||||
children = html_inline_fragment(
|
||||
converter.engine,
|
||||
body,
|
||||
converter.locator,
|
||||
converter.quoter,
|
||||
styles,
|
||||
whitespace,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
converter.push(HtmlElement {
|
||||
tag: elem.tag,
|
||||
attrs: elem.attrs.get_cloned(styles),
|
||||
children,
|
||||
span: elem.span(),
|
||||
pre_span: false,
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handles arbitrary text while taking care that no whitespace within will be
|
||||
/// collapsed by browsers.
|
||||
fn handle_text(converter: &mut Converter, text: EcoString, span: Span) {
|
||||
/// Special kinds of characters.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
enum Kind {
|
||||
/// ASCII space.
|
||||
Space,
|
||||
/// ASCII tab.
|
||||
Tab,
|
||||
/// CR, LF, or CR + LF.
|
||||
Newline,
|
||||
/// A Unicode default-ignorable. Does not protect spaces from
|
||||
/// collapsing.
|
||||
Ignorable,
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
fn of(c: char) -> Option<Kind> {
|
||||
match c {
|
||||
' ' => Some(Kind::Space),
|
||||
'\t' => Some(Kind::Tab),
|
||||
'\r' | '\n' => Some(Kind::Newline),
|
||||
c if is_default_ignorable(c) => Some(Kind::Ignorable),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if converter.whitespace == Whitespace::Pre {
|
||||
converter.push(HtmlNode::Text(text, span));
|
||||
return;
|
||||
}
|
||||
|
||||
let mut emitted = 0;
|
||||
let mut prev_kind = None;
|
||||
|
||||
for (i, c) in text.char_indices() {
|
||||
let kind = Kind::of(c);
|
||||
let prev_kind = prev_kind.replace(kind);
|
||||
let Some(kind) = kind else { continue };
|
||||
|
||||
// A space that is surrounded by normal (i.e. not special) characters is
|
||||
// already protected and doesn't need further treatment.
|
||||
if kind == Kind::Space
|
||||
&& let Some(None) = prev_kind
|
||||
&& let Some(after) = text[i + 1..].chars().next()
|
||||
&& Kind::of(after).is_none()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Emit the unspecial text up to the special character.
|
||||
if emitted < i {
|
||||
converter.push_text(&text[emitted..i], span);
|
||||
emitted = i;
|
||||
}
|
||||
|
||||
// Process the special character.
|
||||
match kind {
|
||||
Kind::Space => converter.push_text(' ', span),
|
||||
Kind::Tab => converter.push_text('\t', span),
|
||||
Kind::Newline => {
|
||||
if c == '\r' && text[i + 1..].starts_with('\n') {
|
||||
// Skip the CR because the LF will already turn into
|
||||
// a `<br>`.
|
||||
emitted += 1;
|
||||
continue;
|
||||
}
|
||||
converter.push(HtmlElement::new(tag::br).spanned(span));
|
||||
}
|
||||
Kind::Ignorable => converter.push_text(c, span),
|
||||
}
|
||||
emitted += c.len_utf8();
|
||||
}
|
||||
|
||||
// Push the remaining unspecial text.
|
||||
if emitted < text.len() {
|
||||
converter.push_text(
|
||||
// Try to reuse the `EcoString` if possible.
|
||||
if emitted == 0 { text } else { text[emitted..].into() },
|
||||
span,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// State during conversion.
|
||||
struct Converter<'a, 'y, 'z> {
|
||||
engine: &'a mut Engine<'y>,
|
||||
locator: &'a mut SplitLocator<'z>,
|
||||
quoter: &'a mut SmartQuoter,
|
||||
whitespace: Whitespace,
|
||||
output: EcoVec<HtmlNode>,
|
||||
trailing: Option<TrailingWhitespace>,
|
||||
}
|
||||
|
||||
/// Keeps track of a trailing whitespace in the output.
|
||||
struct TrailingWhitespace {
|
||||
/// If `true`, the trailing whitespace consists of exactly one ASCII space.
|
||||
single: bool,
|
||||
/// The trailing whitespace starts at `output[from..]`.
|
||||
from: usize,
|
||||
}
|
||||
|
||||
impl Converter<'_, '_, '_> {
|
||||
/// Returns the converted nodes.
|
||||
fn finish(mut self) -> EcoVec<HtmlNode> {
|
||||
self.flush_whitespace();
|
||||
self.output
|
||||
}
|
||||
|
||||
/// Pushes a node, taking care to protect consecutive whitespace.
|
||||
fn push(&mut self, node: impl Into<HtmlNode>) {
|
||||
let node = node.into();
|
||||
|
||||
if let HtmlNode::Text(text, _) = &node
|
||||
&& (text == " " || text == "\t")
|
||||
{
|
||||
if let Some(ws) = &mut self.trailing {
|
||||
ws.single = false;
|
||||
} else {
|
||||
self.trailing = Some(TrailingWhitespace {
|
||||
single: text == " ",
|
||||
from: self.output.len(),
|
||||
});
|
||||
}
|
||||
} else if !matches!(node, HtmlNode::Tag(_)) {
|
||||
self.flush_whitespace();
|
||||
}
|
||||
|
||||
self.output.push(node);
|
||||
}
|
||||
|
||||
/// Shorthand for pushing a text node.
|
||||
fn push_text(&mut self, text: impl Into<EcoString>, span: Span) {
|
||||
self.push(HtmlNode::text(text.into(), span));
|
||||
}
|
||||
|
||||
/// If there is trailing whitespace in need of protection, protects it.
|
||||
///
|
||||
/// Does not protect single ASCII spaces. Those are handled in a separate
|
||||
/// pass as they are more complex and require lookahead. See the
|
||||
/// documentation of [`Whitespace`] for more information.
|
||||
fn flush_whitespace(&mut self) {
|
||||
if self.whitespace == Whitespace::Normal
|
||||
&& let Some(TrailingWhitespace { single: false, from }) = self.trailing.take()
|
||||
{
|
||||
let nodes: EcoVec<_> = self.output[from..].iter().cloned().collect();
|
||||
self.output.truncate(from);
|
||||
self.output.push(HtmlNode::Element(pre_wrap(nodes)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Protects all spaces in the given block-level `nodes` against collapsing.
|
||||
///
|
||||
/// Does not recurse into block-level elements as those are separate contexts
|
||||
/// with their own space protection.
|
||||
fn protect_spaces(nodes: &mut EcoVec<HtmlNode>) {
|
||||
let mut p = Protector::new();
|
||||
p.visit_nodes(nodes);
|
||||
p.collapsing();
|
||||
}
|
||||
|
||||
/// A state machine for whitespace protection.
|
||||
enum Protector<'a> {
|
||||
Collapsing,
|
||||
Supportive,
|
||||
Space(&'a mut HtmlNode),
|
||||
}
|
||||
|
||||
impl<'a> Protector<'a> {
|
||||
/// Creates a new protector.
|
||||
fn new() -> Self {
|
||||
Self::Collapsing
|
||||
}
|
||||
|
||||
/// Visits the given nodes and protects single spaces that need to be saved
|
||||
/// from collapsing.
|
||||
fn visit_nodes(&mut self, nodes: &'a mut EcoVec<HtmlNode>) {
|
||||
for node in nodes.make_mut().iter_mut() {
|
||||
match node {
|
||||
HtmlNode::Tag(_) => {}
|
||||
HtmlNode::Text(text, _) => {
|
||||
if text == " " {
|
||||
match self {
|
||||
Self::Collapsing => {
|
||||
protect_space(node);
|
||||
*self = Self::Supportive;
|
||||
}
|
||||
Self::Supportive => {
|
||||
*self = Self::Space(node);
|
||||
}
|
||||
Self::Space(prev) => {
|
||||
protect_space(prev);
|
||||
*self = Self::Space(node);
|
||||
}
|
||||
}
|
||||
} else if text.chars().any(|c| !is_default_ignorable(c)) {
|
||||
self.supportive();
|
||||
}
|
||||
}
|
||||
HtmlNode::Element(element) => {
|
||||
if tag::is_block_by_default(element.tag) || element.tag == tag::br {
|
||||
self.collapsing();
|
||||
} else if !element.pre_span {
|
||||
// Recursively visit the children of inline-level
|
||||
// elements while making sure to not revisit pre-wrapped
|
||||
// spans that we've generated ourselves.
|
||||
self.visit_nodes(&mut element.children);
|
||||
}
|
||||
}
|
||||
HtmlNode::Frame(_) => self.supportive(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Called when visiting an element that would collapse adjacent single
|
||||
/// spaces. A preceding, if any, and succeeding, if any, single space will
|
||||
/// then be protected .
|
||||
fn collapsing(&mut self) {
|
||||
if let Self::Space(node) = std::mem::replace(self, Self::Collapsing) {
|
||||
protect_space(node);
|
||||
}
|
||||
}
|
||||
|
||||
/// Called when visiting an element that supports adjacent single spaces.
|
||||
fn supportive(&mut self) {
|
||||
*self = Self::Supportive;
|
||||
}
|
||||
}
|
||||
|
||||
/// Protects a single spaces against collapsing.
|
||||
fn protect_space(node: &mut HtmlNode) {
|
||||
*node = pre_wrap(eco_vec![node.clone()]).into();
|
||||
}
|
||||
|
||||
/// Wraps a collection of whitespace nodes in a
|
||||
/// `<span style="white-space: pre-wrap">..</span>` to avoid them being
|
||||
/// collapsed by HTML rendering engines.
|
||||
fn pre_wrap(nodes: EcoVec<HtmlNode>) -> HtmlElement {
|
||||
let span = Span::find(nodes.iter().map(|c| c.span()));
|
||||
let mut elem = HtmlElement::new(tag::span)
|
||||
.with_styles(css::Properties::new().with("white-space", "pre-wrap"))
|
||||
.with_children(nodes)
|
||||
.spanned(span);
|
||||
elem.pre_span = true;
|
||||
elem
|
||||
}
|
||||
|
||||
/// Returns the last non-default ignorable character from the passed nodes.
|
||||
fn last_char(nodes: &[HtmlNode]) -> Option<char> {
|
||||
for node in nodes.iter().rev() {
|
||||
|
@ -13,10 +13,10 @@ use typst_library::introspection::{
|
||||
use typst_library::layout::{Point, Position, Transform};
|
||||
use typst_library::model::DocumentInfo;
|
||||
use typst_library::routines::{Arenas, RealizationKind, Routines};
|
||||
use typst_library::text::SmartQuoter;
|
||||
use typst_syntax::Span;
|
||||
use typst_utils::NonZeroExt;
|
||||
|
||||
use crate::convert::{ConversionLevel, Whitespace};
|
||||
use crate::{HtmlDocument, HtmlElem, HtmlElement, HtmlNode, attr, tag};
|
||||
|
||||
/// Produce an HTML document from content.
|
||||
@ -83,8 +83,9 @@ fn html_document_impl(
|
||||
let output = crate::convert::convert_to_nodes(
|
||||
&mut engine,
|
||||
&mut locator,
|
||||
&mut SmartQuoter::new(),
|
||||
children.iter().copied(),
|
||||
ConversionLevel::Block,
|
||||
Whitespace::Normal,
|
||||
)?;
|
||||
|
||||
let mut link_targets = FxHashSet::default();
|
||||
|
@ -10,7 +10,7 @@ use typst_library::text::TextElem;
|
||||
use typst_syntax::Span;
|
||||
use typst_utils::{PicoStr, ResolvedPicoStr};
|
||||
|
||||
use crate::charsets;
|
||||
use crate::{attr, charsets, css};
|
||||
|
||||
/// An HTML document.
|
||||
#[derive(Debug, Clone)]
|
||||
@ -41,6 +41,22 @@ impl HtmlNode {
|
||||
pub fn text(text: impl Into<EcoString>, span: Span) -> Self {
|
||||
Self::Text(text.into(), span)
|
||||
}
|
||||
|
||||
/// Returns the span, if any.
|
||||
pub fn span(&self) -> Span {
|
||||
match self {
|
||||
Self::Tag(_) => Span::detached(),
|
||||
Self::Text(_, span) => *span,
|
||||
Self::Element(element) => element.span,
|
||||
Self::Frame(frame) => frame.span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Tag> for HtmlNode {
|
||||
fn from(tag: Tag) -> Self {
|
||||
Self::Tag(tag)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HtmlElement> for HtmlNode {
|
||||
@ -49,6 +65,12 @@ impl From<HtmlElement> for HtmlNode {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HtmlFrame> for HtmlNode {
|
||||
fn from(frame: HtmlFrame) -> Self {
|
||||
Self::Frame(frame)
|
||||
}
|
||||
}
|
||||
|
||||
/// An HTML element.
|
||||
#[derive(Debug, Clone, Hash)]
|
||||
pub struct HtmlElement {
|
||||
@ -60,6 +82,14 @@ pub struct HtmlElement {
|
||||
pub children: EcoVec<HtmlNode>,
|
||||
/// The span from which the element originated, if any.
|
||||
pub span: Span,
|
||||
/// Whether this is a span with `white-space: pre-wrap` generated by the
|
||||
/// compiler to prevent whitespace from being collapsed.
|
||||
///
|
||||
/// For such spans, spaces and tabs in the element are emitted as escape
|
||||
/// sequences. While this does not matter for browser engine rendering (as
|
||||
/// the `white-space` CSS property is enough), it ensures that formatters
|
||||
/// won't mess up the output.
|
||||
pub pre_span: bool,
|
||||
}
|
||||
|
||||
impl HtmlElement {
|
||||
@ -70,6 +100,7 @@ impl HtmlElement {
|
||||
attrs: HtmlAttrs::default(),
|
||||
children: EcoVec::new(),
|
||||
span: Span::detached(),
|
||||
pre_span: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,6 +118,15 @@ impl HtmlElement {
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds CSS styles to an element.
|
||||
pub(crate) fn with_styles(self, properties: css::Properties) -> Self {
|
||||
if let Some(value) = properties.into_inline_styles() {
|
||||
self.with_attr(attr::style, value)
|
||||
} else {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach a span to the element.
|
||||
pub fn spanned(mut self, span: Span) -> Self {
|
||||
self.span = span;
|
||||
|
@ -52,10 +52,10 @@ fn write_indent(w: &mut Writer) {
|
||||
}
|
||||
|
||||
/// Encodes an HTML node into the writer.
|
||||
fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> {
|
||||
fn write_node(w: &mut Writer, node: &HtmlNode, escape_text: bool) -> SourceResult<()> {
|
||||
match node {
|
||||
HtmlNode::Tag(_) => {}
|
||||
HtmlNode::Text(text, span) => write_text(w, text, *span)?,
|
||||
HtmlNode::Text(text, span) => write_text(w, text, *span, escape_text)?,
|
||||
HtmlNode::Element(element) => write_element(w, element)?,
|
||||
HtmlNode::Frame(frame) => write_frame(w, frame),
|
||||
}
|
||||
@ -63,12 +63,12 @@ fn write_node(w: &mut Writer, node: &HtmlNode) -> SourceResult<()> {
|
||||
}
|
||||
|
||||
/// Encodes plain text into the writer.
|
||||
fn write_text(w: &mut Writer, text: &str, span: Span) -> SourceResult<()> {
|
||||
fn write_text(w: &mut Writer, text: &str, span: Span, escape: bool) -> SourceResult<()> {
|
||||
for c in text.chars() {
|
||||
if charsets::is_valid_in_normal_element_text(c) {
|
||||
w.buf.push(c);
|
||||
} else {
|
||||
if escape || !charsets::is_valid_in_normal_element_text(c) {
|
||||
write_escape(w, c).at(span)?;
|
||||
} else {
|
||||
w.buf.push(c);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@ -152,7 +152,7 @@ fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
if core::mem::take(&mut indent) || pretty_around {
|
||||
write_indent(w);
|
||||
}
|
||||
write_node(w, c)?;
|
||||
write_node(w, c, element.pre_span)?;
|
||||
indent = pretty_around;
|
||||
}
|
||||
w.level -= 1;
|
||||
@ -213,7 +213,7 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
|
||||
/// Encodes the contents of an escapable raw text element.
|
||||
fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
walk_raw_text(element, |piece, span| write_text(w, piece, span))
|
||||
walk_raw_text(element, |piece, span| write_text(w, piece, span, false))
|
||||
}
|
||||
|
||||
/// Collects the textual contents of a raw text element.
|
||||
|
@ -1,14 +1,14 @@
|
||||
use comemo::{Track, Tracked, TrackedMut};
|
||||
use ecow::EcoVec;
|
||||
use typst_library::World;
|
||||
use typst_library::diag::{At, SourceResult};
|
||||
use typst_library::engine::{Engine, Route, Sink, Traced};
|
||||
use typst_library::foundations::{Content, StyleChain};
|
||||
use typst_library::introspection::{Introspector, Locator, LocatorLink, SplitLocator};
|
||||
|
||||
use typst_library::World;
|
||||
use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind, Routines};
|
||||
use typst_library::text::SmartQuoter;
|
||||
|
||||
use crate::convert::{ConversionLevel, Whitespace};
|
||||
use crate::{HtmlElem, HtmlNode};
|
||||
|
||||
/// Produces HTML nodes from content contained in an HTML element that is
|
||||
@ -19,6 +19,7 @@ pub fn html_block_fragment(
|
||||
content: &Content,
|
||||
locator: Locator,
|
||||
styles: StyleChain,
|
||||
whitespace: Whitespace,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
html_block_fragment_impl(
|
||||
engine.routines,
|
||||
@ -30,6 +31,7 @@ pub fn html_block_fragment(
|
||||
content,
|
||||
locator.track(),
|
||||
styles,
|
||||
whitespace,
|
||||
)
|
||||
}
|
||||
|
||||
@ -46,6 +48,7 @@ fn html_block_fragment_impl(
|
||||
content: &Content,
|
||||
locator: Tracked<Locator>,
|
||||
styles: StyleChain,
|
||||
whitespace: Whitespace,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
let link = LocatorLink::new(locator);
|
||||
let mut locator = Locator::link(&link).split();
|
||||
@ -65,8 +68,9 @@ fn html_block_fragment_impl(
|
||||
crate::convert::convert_to_nodes(
|
||||
&mut engine,
|
||||
&mut locator,
|
||||
&mut SmartQuoter::new(),
|
||||
children.iter().copied(),
|
||||
ConversionLevel::Block,
|
||||
whitespace,
|
||||
)
|
||||
}
|
||||
|
||||
@ -85,6 +89,7 @@ pub fn html_inline_fragment(
|
||||
locator: &mut SplitLocator,
|
||||
quoter: &mut SmartQuoter,
|
||||
styles: StyleChain,
|
||||
whitespace: Whitespace,
|
||||
) -> SourceResult<EcoVec<HtmlNode>> {
|
||||
engine.route.increase();
|
||||
engine.route.check_html_depth().at(content.span())?;
|
||||
@ -94,8 +99,9 @@ pub fn html_inline_fragment(
|
||||
let result = crate::convert::convert_to_nodes(
|
||||
engine,
|
||||
locator,
|
||||
quoter,
|
||||
children.iter().copied(),
|
||||
ConversionLevel::Inline(quoter),
|
||||
whitespace,
|
||||
);
|
||||
|
||||
engine.route.decrease();
|
||||
|
@ -428,20 +428,16 @@ const RAW_RULE: ShowFn<RawElem> = |elem, _, styles| {
|
||||
seq.push(line.clone().pack());
|
||||
}
|
||||
|
||||
let mut inline = css::Properties::new();
|
||||
let block = elem.block.get(styles);
|
||||
if !block {
|
||||
// Without the `<pre>` tag, whitespace would be collapsed by default.
|
||||
inline.push("white-space", "pre-wrap");
|
||||
}
|
||||
|
||||
let code = HtmlElem::new(tag::code)
|
||||
.with_styles(inline)
|
||||
.with_body(Some(Content::sequence(seq)))
|
||||
.pack()
|
||||
.spanned(elem.span());
|
||||
|
||||
Ok(if block { HtmlElem::new(tag::pre).with_body(Some(code)).pack() } else { code })
|
||||
Ok(if elem.block.get(styles) {
|
||||
HtmlElem::new(tag::pre).with_body(Some(code)).pack()
|
||||
} else {
|
||||
code
|
||||
})
|
||||
};
|
||||
|
||||
/// This is used by `RawElem::synthesize` through a routine.
|
||||
|
51
tests/ref/html/html-space-collapsing.html
Normal file
51
tests/ref/html/html-space-collapsing.html
Normal file
@ -0,0 +1,51 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<h2>Single spaces</h2>
|
||||
<p>A B</p>
|
||||
<p>A B</p>
|
||||
<p><span>A</span> B</p>
|
||||
<p>A<span style="white-space: pre-wrap"> </span><span></span> B</p>
|
||||
<p>A<span style="white-space: pre-wrap">  </span>B</p>
|
||||
<p><span>A </span>B</p>
|
||||
<h2>Consecutive whitespace</h2>
|
||||
<p>A<span style="white-space: pre-wrap">  </span>B<span style="white-space: pre-wrap">   </span>C</p>
|
||||
<p>A<span style="white-space: pre-wrap">  </span>B<span style="white-space: pre-wrap">   </span>C</p>
|
||||
<p><span>A<span style="white-space: pre-wrap"> </span></span> B</p>
|
||||
<p><span>A </span><span style="white-space: pre-wrap">  </span>B</p>
|
||||
<p><span>A<span style="white-space: pre-wrap">  </span></span> B</p>
|
||||
<p><span>A<span style="white-space: pre-wrap">  </span></span><span style="white-space: pre-wrap">  </span>B</p>
|
||||
<p>A<span style="white-space: pre-wrap">  </span><span></span><span style="white-space: pre-wrap">  </span>B</p>
|
||||
<p>A<span style="white-space: pre-wrap">   </span>B</p>
|
||||
<h2>Leading whitespace</h2>
|
||||
<p><span style="white-space: pre-wrap"> </span>A</p>
|
||||
<p><span><span style="white-space: pre-wrap"> </span></span>A</p>
|
||||
<p><span></span><span style="white-space: pre-wrap"> </span>A</p>
|
||||
<h2>Trailing whitespace</h2>
|
||||
<p>A<span style="white-space: pre-wrap"> </span></p>
|
||||
<p><span>A<span style="white-space: pre-wrap"> </span></span></p>
|
||||
<p><span>A<span style="white-space: pre-wrap"> </span></span><span></span></p>
|
||||
<h2>Tabs</h2>
|
||||
<p>A<span style="white-space: pre-wrap">	</span>B</p>
|
||||
<p>A<span style="white-space: pre-wrap">	</span>B</p>
|
||||
<p>A<span style="white-space: pre-wrap"> 	 </span>B</p>
|
||||
<h2>Newlines</h2>
|
||||
<p>A<br>B</p>
|
||||
<p>A<br>B</p>
|
||||
<p>A<span style="white-space: pre-wrap"> </span><br><span style="white-space: pre-wrap"> </span>B</p>
|
||||
<p>A<span style="white-space: pre-wrap"> </span><br><span style="white-space: pre-wrap"> </span>B</p>
|
||||
<p>A<span style="white-space: pre-wrap"> </span><span><br></span><span style="white-space: pre-wrap"> </span>B</p>
|
||||
<h2>With default ignorables</h2>
|
||||
<p>A<span style="white-space: pre-wrap"> </span> B</p>
|
||||
<p>A<span style="white-space: pre-wrap">  </span><span style="white-space: pre-wrap">  </span>B</p>
|
||||
<h2>Everything</h2>
|
||||
<p><span><span style="white-space: pre-wrap">  </span>A<span style="white-space: pre-wrap"> </span></span><br><span style="white-space: pre-wrap">	</span>B<span style="white-space: pre-wrap"> </span><span></span></p>
|
||||
<h2>Special</h2>
|
||||
<textarea>A B</textarea>
|
||||
<pre>A B</pre>
|
||||
</body>
|
||||
</html>
|
@ -5,6 +5,6 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<p>This has <code style="white-space: pre-wrap">double spaces inside</code>, which should be kept.</p>
|
||||
<p>This has <code>double<span style="white-space: pre-wrap">  </span>spaces<span style="white-space: pre-wrap">  </span>inside</code>, which should be kept.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -5,7 +5,7 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<p>This is <code style="white-space: pre-wrap"><strong>*</strong><strong>inline</strong><strong>*</strong></code>.</p>
|
||||
<pre><code><span style="color: #d73a49">#</span><span style="color: #d73a49">set</span> <span style="color: #4b69c6">text</span>(blue)<br><strong>*</strong><strong>Hello</strong><strong>*</strong> <em>_</em><em>world</em><em>_</em>!</code></pre>
|
||||
<p>This is <code><strong>*</strong><strong>inline</strong><strong>*</strong></code>.</p>
|
||||
<pre><code>#[<br> <span style="color: #d73a49">#</span><span style="color: #d73a49">set</span> <span style="color: #4b69c6">text</span>(blue)<br> <strong>*</strong><strong>Hello</strong><strong>*</strong> <em>_</em><em>world</em><em>_</em>!<br>]</code></pre>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -2,6 +2,150 @@
|
||||
// Error: 2-27 HTML void elements must not have children
|
||||
#html.elem("img", [Hello])
|
||||
|
||||
--- html-space-collapsing html ---
|
||||
// Note: <s>..</s> = <span style="white-space: pre-wrap">..</span>
|
||||
#import html: span
|
||||
|
||||
= Single spaces
|
||||
// No collapsing.
|
||||
#"A B"
|
||||
// -> A B
|
||||
|
||||
// No collapsing, multiple text elements.
|
||||
#"A"#" "#"B"
|
||||
// -> A B
|
||||
|
||||
// Across span boundaries: 0-1.
|
||||
#span[A] B
|
||||
// -> <span>A</span> B
|
||||
|
||||
// With span in between.
|
||||
#"A "#span()#" B"
|
||||
// -> A<s> </s><span></span> B
|
||||
|
||||
// With metadata in between.
|
||||
#"A "#metadata(none)#" B"
|
||||
// -> A<s> </s>B
|
||||
|
||||
// Within span.
|
||||
#span("A ")B
|
||||
// -> <span>A </span>B
|
||||
|
||||
= Consecutive whitespace
|
||||
// Single text element.
|
||||
#"A B C"
|
||||
// -> A<s> </s>B<s> </s>C
|
||||
|
||||
// Multiple text elements.
|
||||
A#" "B#" C"
|
||||
// -> A<s> </s>B<s> </s>C
|
||||
|
||||
// Across span boundaries: 1-1.
|
||||
#span("A ") B
|
||||
// -> <span>A<s> </s></span> B
|
||||
|
||||
// Across span boundaries: 1-2.
|
||||
#span("A ")#" B"
|
||||
// -> <span>A </span><s> </s>B
|
||||
|
||||
// Across span boundaries: 2-1.
|
||||
#span("A ") B
|
||||
// -> <span>A<s> </s></span> B
|
||||
|
||||
// Across span boundaries: 2-2.
|
||||
#span("A ")#" B"
|
||||
// -> <span>A<s> </s></span><s> </s>B
|
||||
|
||||
// With span in between.
|
||||
#"A "#span()#" B"
|
||||
// -> A<s> </s><span></span><s> </s>B
|
||||
|
||||
// With metadata in between.
|
||||
#"A "#metadata(none)#" B"
|
||||
// -> A<s> </s>B
|
||||
|
||||
= Leading whitespace
|
||||
// Leading space.
|
||||
#" A"
|
||||
// -> <s> </s>A
|
||||
|
||||
// Leading space in span.
|
||||
#span(" ")A
|
||||
// -> <span><s> </s></span>A
|
||||
|
||||
// Leading space with preceding empty element.
|
||||
#span()#" "A
|
||||
// -> <span></span><s> </s>A
|
||||
|
||||
= Trailing whitespace
|
||||
// Trailing space.
|
||||
#"A "
|
||||
// -> A<s> </s>
|
||||
|
||||
// Trailing space in element.
|
||||
#span("A ")
|
||||
// -> A<span><s> </s></span>
|
||||
|
||||
// Trailing space in element with following empty element.
|
||||
#span("A ")#span()
|
||||
// -> <span>A<s> </s></span><span></span>
|
||||
|
||||
= Tabs
|
||||
// Single text element.
|
||||
#"A\tB"
|
||||
// -> A<s>	</s>B
|
||||
|
||||
// Multiple text elements.
|
||||
#"A"#"\t"#"B"
|
||||
// -> A<s>	</s>B
|
||||
|
||||
// Spaces + Tab.
|
||||
#"A \t B"
|
||||
// -> A<s> 	 </s>B
|
||||
|
||||
= Newlines
|
||||
// Normal line feed.
|
||||
#"A\nB"
|
||||
// -> A<br>B
|
||||
|
||||
// CLRF.
|
||||
#"A\r\nB"
|
||||
// -> A<br>B
|
||||
|
||||
// Spaces + newline.
|
||||
#"A \n B"
|
||||
// -> A<s> </s><br><s> </s>B
|
||||
|
||||
// Explicit `<br>` element.
|
||||
#"A "#html.br()#" B"
|
||||
// -> A<s> </s><br><s> </s>B
|
||||
|
||||
// Newline in span.
|
||||
#"A "#span("\n")#" B"
|
||||
// -> A<s> </s><span><br></span><s> </s>B
|
||||
|
||||
= With default ignorables
|
||||
// With default ignorable in between.
|
||||
#"A \u{200D} B"
|
||||
// -> A<s> </s>‍ B
|
||||
|
||||
#"A \u{200D} B"
|
||||
// -> A<s> </s>‍<s> </s>B
|
||||
|
||||
= Everything
|
||||
// Everything at once.
|
||||
#span(" A ")#"\r\n\t"B#" "#span()
|
||||
// -> <span><s> </s>A<s> </s></span><br><s>	</s>B<s> </s><span></span>
|
||||
|
||||
= Special
|
||||
// Escapable raw.
|
||||
#html.textarea("A B")
|
||||
// -> <textarea>A B</textarea>
|
||||
|
||||
// Preformatted.
|
||||
#html.pre("A B")
|
||||
// -> <pre>A B</pre>
|
||||
|
||||
--- html-pre-starting-with-newline html ---
|
||||
#html.pre("hello")
|
||||
#html.pre("\nhello")
|
||||
|
@ -490,8 +490,10 @@ test
|
||||
--- raw-html html ---
|
||||
This is ```typ *inline*```.
|
||||
```typ
|
||||
#set text(blue)
|
||||
*Hello* _world_!
|
||||
#[
|
||||
#set text(blue)
|
||||
*Hello* _world_!
|
||||
]
|
||||
```
|
||||
|
||||
--- raw-html-inline-spaces html ---
|
||||
|
Loading…
x
Reference in New Issue
Block a user