diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs index 35301c107..71fcefcd2 100644 --- a/crates/typst-html/src/encode.rs +++ b/crates/typst-html/src/encode.rs @@ -1,5 +1,6 @@ use std::fmt::Write; +use ecow::{EcoString, eco_format}; use typst_library::diag::{At, SourceResult, StrResult, bail}; use typst_library::foundations::Repr; use typst_library::introspection::Introspector; @@ -107,8 +108,15 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { return Ok(()); } + // See HTML spec § 13.1.2.5. + if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) { + w.buf.push('\n'); + } + if tag::is_raw(element.tag) { write_raw(w, element)?; + } else if tag::is_escapable_raw(element.tag) { + write_escapable_raw(w, element)?; } else if !element.children.is_empty() { write_children(w, element)?; } @@ -122,11 +130,6 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { /// Encodes the children of an element. fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { - // See HTML spec § 13.1.2.5. - if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) { - w.buf.push('\n'); - } - let pretty = w.pretty; let pretty_inside = allows_pretty_inside(element.tag) && element.children.iter().any(|node| match node { @@ -208,20 +211,40 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { Ok(()) } +/// Encodes the contents of an escapable raw text element. +fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> { + walk_raw_text(element, |piece, span| write_text(w, piece, span)) +} + /// Collects the textual contents of a raw text element. fn collect_raw_text(element: &HtmlElement) -> SourceResult { - let mut output = String::new(); + let mut text = String::new(); + walk_raw_text(element, |piece, span| { + if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) { + return Err(unencodable(c)).at(span); + } + text.push_str(piece); + Ok(()) + })?; + Ok(text) +} + +/// Iterates over the textual contents of a raw text element. +fn walk_raw_text( + element: &HtmlElement, + mut f: impl FnMut(&str, Span) -> SourceResult<()>, +) -> SourceResult<()> { for c in &element.children { match c { HtmlNode::Tag(_) => continue, - HtmlNode::Text(text, _) => output.push_str(text), + HtmlNode::Text(text, span) => f(text, *span)?, HtmlNode::Element(HtmlElement { span, .. }) | HtmlNode::Frame(HtmlFrame { span, .. }) => { bail!(*span, "HTML raw text element cannot have non-text children") } - }; + } } - Ok(output) + Ok(()) } /// Finds a closing sequence for the given tag in the text, if it exists. @@ -302,11 +325,17 @@ fn write_escape(w: &mut Writer, c: char) -> StrResult<()> { c if charsets::is_w3c_text_char(c) && c != '\r' => { write!(w.buf, "&#x{:x};", c as u32).unwrap() } - _ => bail!("the character `{}` cannot be encoded in HTML", c.repr()), + _ => return Err(unencodable(c)), } Ok(()) } +/// The error message for a character that cannot be encoded. +#[cold] +fn unencodable(c: char) -> EcoString { + eco_format!("the character `{}` cannot be encoded in HTML", c.repr()) +} + /// Encode a laid out frame into the writer. fn write_frame(w: &mut Writer, frame: &HtmlFrame) { let svg = typst_svg::svg_html_frame( diff --git a/tests/suite/html/syntax.typ b/tests/suite/html/syntax.typ index 85be266e1..0c56fe76c 100644 --- a/tests/suite/html/syntax.typ +++ b/tests/suite/html/syntax.typ @@ -1,7 +1,3 @@ ---- html-non-char html --- -// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML -\u{fdd0} - --- html-void-element-with-children html --- // Error: 2-27 HTML void elements must not have children #html.elem("img", [Hello]) @@ -61,6 +57,22 @@ // Hint: 2-32 the sequence `") + +--- html-non-char html --- +// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML +\u{fdd0} + +--- html-raw-text-non-char html --- +// Error: 24-32 the character `"\u{fdd0}"` cannot be encoded in HTML +#html.script[const x = \u{fdd0}] + +--- html-escapable-raw-text-non-char html --- +// Error: 23-31 the character `"\u{fdd0}"` cannot be encoded in HTML +#html.textarea[Typing \u{fdd0}]