Basic HTML pretty-printing (#5533)

Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
Michael Färber 2024-12-10 10:57:22 +01:00 committed by GitHub
parent bb0c814095
commit 17f20c6944
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 100 additions and 5 deletions

View File

@ -8,14 +8,30 @@ use typst_syntax::Span;
/// Encodes an HTML document into a string.
pub fn html(document: &HtmlDocument) -> SourceResult<String> {
let mut w = Writer { buf: String::new() };
let mut w = Writer { pretty: true, ..Writer::default() };
w.buf.push_str("<!DOCTYPE html>");
write_indent(&mut w);
write_element(&mut w, &document.root)?;
Ok(w.buf)
}
#[derive(Default)]
struct Writer {
buf: String,
/// current indentation level
level: usize,
/// pretty printing enabled?
pretty: bool,
}
/// Write a newline and indent, if pretty printing is enabled.
fn write_indent(w: &mut Writer) {
if w.pretty {
w.buf.push('\n');
for _ in 0..w.level {
w.buf.push_str(" ");
}
}
}
/// Encode an HTML node into the writer.
@ -67,9 +83,30 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
return Ok(());
}
for node in &element.children {
write_node(w, node)?;
let pretty = w.pretty;
if !element.children.is_empty() {
w.pretty &= is_pretty(element);
let mut indent = w.pretty;
w.level += 1;
for c in &element.children {
let pretty_child = match c {
HtmlNode::Tag(_) => continue,
HtmlNode::Element(element) => is_pretty(element),
HtmlNode::Text(..) | HtmlNode::Frame(_) => false,
};
if core::mem::take(&mut indent) || pretty_child {
write_indent(w);
}
write_node(w, c)?;
indent = pretty_child;
}
w.level -= 1;
write_indent(w)
}
w.pretty = pretty;
w.buf.push_str("</");
w.buf.push_str(&element.tag.resolve());
@ -78,6 +115,11 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
Ok(())
}
/// Whether the element should be pretty-printed.
fn is_pretty(element: &HtmlElement) -> bool {
tag::is_block_by_default(element.tag) || matches!(element.tag, tag::meta)
}
/// Escape a character.
fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
// See <https://html.spec.whatwg.org/multipage/syntax.html#syntax-charref>

View File

@ -470,6 +470,59 @@ pub mod tag {
wbr
}
/// Whether nodes with the tag have the CSS property `display: block` by
/// default.
///
/// If this is true, then pretty-printing can insert spaces around such
/// nodes and around the contents of such nodes.
///
/// However, when users change the properties of such tags via CSS, the
/// insertion of whitespace may actually impact the visual output; for
/// example, <https://www.w3.org/TR/css-text-3/#example-af2745cd> shows how
/// adding CSS rules to `<p>` can make it sensitive to whitespace. In such
/// cases, users should disable pretty-printing.
pub fn is_block_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
self::html
| self::head
| self::body
| self::article
| self::aside
| self::h1
| self::h2
| self::h3
| self::h4
| self::h5
| self::h6
| self::hgroup
| self::nav
| self::section
| self::dd
| self::dl
| self::dt
| self::menu
| self::ol
| self::ul
| self::address
| self::blockquote
| self::dialog
| self::div
| self::fieldset
| self::figure
| self::figcaption
| self::footer
| self::form
| self::header
| self::hr
| self::legend
| self::main
| self::p
| self::pre
| self::search
)
}
/// Whether the element is inline-level as opposed to being block-level.
///
/// Not sure whether this distinction really makes sense. But we somehow
@ -480,7 +533,7 @@ pub mod tag {
/// <https://www.w3.org/TR/html401/struct/global.html#block-inline>
/// <https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content>
/// <https://github.com/orgs/mdn/discussions/353>
pub fn is_inline(tag: HtmlTag) -> bool {
pub fn is_inline_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
self::abbr

View File

@ -823,7 +823,7 @@ static PAR: GroupingRule = GroupingRule {
RealizationKind::HtmlDocument(_) | RealizationKind::HtmlFragment
) && content
.to_packed::<HtmlElem>()
.is_some_and(|elem| tag::is_inline(elem.tag)))
.is_some_and(|elem| tag::is_inline_by_default(elem.tag)))
},
inner: |content| content.elem() == SpaceElem::elem(),
interrupt: |elem| elem == ParElem::elem() || elem == AlignElem::elem(),