` can make it sensitive to whitespace. For this reason, we
+/// should also respect the `style` tag in the future.
+fn allows_pretty_inside(tag: HtmlTag) -> bool {
+ (tag::is_block_by_default(tag) && tag != tag::pre)
+ || tag::is_tabular_by_default(tag)
+ || tag == tag::li
+}
+
+/// Whether newlines should be added before and after the element if the parent
+/// allows it.
+///
+/// In contrast to `allows_pretty_inside`, which is purely spec-driven, this is
+/// more subjective and depends on preference.
+fn wants_pretty_around(tag: HtmlTag) -> bool {
+ allows_pretty_inside(tag) || tag::is_metadata(tag) || tag == tag::pre
}
/// Escape a character.
diff --git a/crates/typst-library/src/html/dom.rs b/crates/typst-library/src/html/dom.rs
index 2acd839dd..1b725d543 100644
--- a/crates/typst-library/src/html/dom.rs
+++ b/crates/typst-library/src/html/dom.rs
@@ -475,17 +475,55 @@ pub mod tag {
wbr
}
+ /// Whether this is a void tag whose associated element may not have a
+ /// children.
+ pub fn is_void(tag: HtmlTag) -> bool {
+ matches!(
+ tag,
+ self::area
+ | self::base
+ | self::br
+ | self::col
+ | self::embed
+ | self::hr
+ | self::img
+ | self::input
+ | self::link
+ | self::meta
+ | self::param
+ | self::source
+ | self::track
+ | self::wbr
+ )
+ }
+
+ /// Whether this is a tag containing raw text.
+ pub fn is_raw(tag: HtmlTag) -> bool {
+ matches!(tag, self::script | self::style)
+ }
+
+ /// Whether this is a tag containing escapable raw text.
+ pub fn is_escapable_raw(tag: HtmlTag) -> bool {
+ matches!(tag, self::textarea | self::title)
+ }
+
+ /// Whether an element is considered metadata.
+ pub fn is_metadata(tag: HtmlTag) -> bool {
+ matches!(
+ tag,
+ self::base
+ | self::link
+ | self::meta
+ | self::noscript
+ | self::script
+ | self::style
+ | self::template
+ | self::title
+ )
+ }
+
/// Whether nodes with the tag have the CSS property `display: block` by
/// default.
- ///
- /// If this is true, then pretty-printing can insert spaces around such
- /// nodes and around the contents of such nodes.
- ///
- /// However, when users change the properties of such tags via CSS, the
- /// insertion of whitespace may actually impact the visual output; for
- /// example, ` can make it sensitive to whitespace. In such
- /// cases, users should disable pretty-printing.
pub fn is_block_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
@@ -572,37 +610,23 @@ pub mod tag {
)
}
- /// Whether this is a void tag whose associated element may not have a
- /// children.
- pub fn is_void(tag: HtmlTag) -> bool {
+ /// Whether nodes with the tag have the CSS property `display: table(-.*)?`
+ /// by default.
+ pub fn is_tabular_by_default(tag: HtmlTag) -> bool {
matches!(
tag,
- self::area
- | self::base
- | self::br
+ self::table
+ | self::thead
+ | self::tbody
+ | self::tfoot
+ | self::tr
+ | self::th
+ | self::td
+ | self::caption
| self::col
- | self::embed
- | self::hr
- | self::img
- | self::input
- | self::link
- | self::meta
- | self::param
- | self::source
- | self::track
- | self::wbr
+ | self::colgroup
)
}
-
- /// Whether this is a tag containing raw text.
- pub fn is_raw(tag: HtmlTag) -> bool {
- matches!(tag, self::script | self::style)
- }
-
- /// Whether this is a tag containing escapable raw text.
- pub fn is_escapable_raw(tag: HtmlTag) -> bool {
- matches!(tag, self::textarea | self::title)
- }
}
/// Predefined constants for HTML attributes.
diff --git a/tests/ref/html/basic-table.html b/tests/ref/html/basic-table.html
index 6ba1864ef..189a5b314 100644
--- a/tests/ref/html/basic-table.html
+++ b/tests/ref/html/basic-table.html
@@ -8,26 +8,36 @@
- Paragraph
- Paragraph
diff --git a/tests/ref/html/block-html.html b/tests/ref/html/block-html.html
index 98d971b88..d1716c6d7 100644
--- a/tests/ref/html/block-html.html
+++ b/tests/ref/html/block-html.html
@@ -5,11 +5,7 @@
-
-
The first and
+ The
+ first
+ and
-
the second row
+ the
+ second
+ row
-
Foo Baz Bar
+ Foo
+ Baz
+ Bar
-
1 2
+ 1
+ 2
-
3 4
+ 3
+ 4
-
The last row
+ The
+ last
+ row