From 17f20c6944d569d5f0bb57caee37d9f208d87d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20F=C3=A4rber?= <01mf02@gmail.com> Date: Tue, 10 Dec 2024 10:57:22 +0100 Subject: [PATCH] Basic HTML pretty-printing (#5533) Co-authored-by: Laurenz --- crates/typst-html/src/encode.rs | 48 ++++++++++++++++++++++-- crates/typst-library/src/html/dom.rs | 55 +++++++++++++++++++++++++++- crates/typst-realize/src/lib.rs | 2 +- 3 files changed, 100 insertions(+), 5 deletions(-) diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs index d4ff83d67..b87b0e1d6 100644 --- a/crates/typst-html/src/encode.rs +++ b/crates/typst-html/src/encode.rs @@ -8,14 +8,30 @@ use typst_syntax::Span; /// Encodes an HTML document into a string. pub fn html(document: &HtmlDocument) -> SourceResult { - let mut w = Writer { buf: String::new() }; + let mut w = Writer { pretty: true, ..Writer::default() }; w.buf.push_str(""); + write_indent(&mut w); write_element(&mut w, &document.root)?; Ok(w.buf) } +#[derive(Default)] struct Writer { buf: String, + /// current indentation level + level: usize, + /// pretty printing enabled? + pretty: bool, +} + +/// Write a newline and indent, if pretty printing is enabled. +fn write_indent(w: &mut Writer) { + if w.pretty { + w.buf.push('\n'); + for _ in 0..w.level { + w.buf.push_str(" "); + } + } } /// Encode an HTML node into the writer. @@ -67,9 +83,30 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { return Ok(()); } - for node in &element.children { - write_node(w, node)?; + let pretty = w.pretty; + if !element.children.is_empty() { + w.pretty &= is_pretty(element); + let mut indent = w.pretty; + + w.level += 1; + for c in &element.children { + let pretty_child = match c { + HtmlNode::Tag(_) => continue, + HtmlNode::Element(element) => is_pretty(element), + HtmlNode::Text(..) | HtmlNode::Frame(_) => false, + }; + + if core::mem::take(&mut indent) || pretty_child { + write_indent(w); + } + write_node(w, c)?; + indent = pretty_child; + } + w.level -= 1; + + write_indent(w) } + w.pretty = pretty; w.buf.push_str(" SourceResult<()> { Ok(()) } +/// Whether the element should be pretty-printed. +fn is_pretty(element: &HtmlElement) -> bool { + tag::is_block_by_default(element.tag) || matches!(element.tag, tag::meta) +} + /// Escape a character. fn write_escape(w: &mut Writer, c: char) -> StrResult<()> { // See diff --git a/crates/typst-library/src/html/dom.rs b/crates/typst-library/src/html/dom.rs index ee94279f2..3d558fb0f 100644 --- a/crates/typst-library/src/html/dom.rs +++ b/crates/typst-library/src/html/dom.rs @@ -470,6 +470,59 @@ pub mod tag { wbr } + /// Whether nodes with the tag have the CSS property `display: block` by + /// default. + /// + /// If this is true, then pretty-printing can insert spaces around such + /// nodes and around the contents of such nodes. + /// + /// However, when users change the properties of such tags via CSS, the + /// insertion of whitespace may actually impact the visual output; for + /// example, shows how + /// adding CSS rules to `

` can make it sensitive to whitespace. In such + /// cases, users should disable pretty-printing. + pub fn is_block_by_default(tag: HtmlTag) -> bool { + matches!( + tag, + self::html + | self::head + | self::body + | self::article + | self::aside + | self::h1 + | self::h2 + | self::h3 + | self::h4 + | self::h5 + | self::h6 + | self::hgroup + | self::nav + | self::section + | self::dd + | self::dl + | self::dt + | self::menu + | self::ol + | self::ul + | self::address + | self::blockquote + | self::dialog + | self::div + | self::fieldset + | self::figure + | self::figcaption + | self::footer + | self::form + | self::header + | self::hr + | self::legend + | self::main + | self::p + | self::pre + | self::search + ) + } + /// Whether the element is inline-level as opposed to being block-level. /// /// Not sure whether this distinction really makes sense. But we somehow @@ -480,7 +533,7 @@ pub mod tag { /// /// /// - pub fn is_inline(tag: HtmlTag) -> bool { + pub fn is_inline_by_default(tag: HtmlTag) -> bool { matches!( tag, self::abbr diff --git a/crates/typst-realize/src/lib.rs b/crates/typst-realize/src/lib.rs index fd43e8304..6ab6d81c5 100644 --- a/crates/typst-realize/src/lib.rs +++ b/crates/typst-realize/src/lib.rs @@ -823,7 +823,7 @@ static PAR: GroupingRule = GroupingRule { RealizationKind::HtmlDocument(_) | RealizationKind::HtmlFragment ) && content .to_packed::() - .is_some_and(|elem| tag::is_inline(elem.tag))) + .is_some_and(|elem| tag::is_inline_by_default(elem.tag))) }, inner: |content| content.elem() == SpaceElem::elem(), interrupt: |elem| elem == ParElem::elem() || elem == AlignElem::elem(),