diff --git a/crates/typst-html/src/encode.rs b/crates/typst-html/src/encode.rs
index 35301c107..71fcefcd2 100644
--- a/crates/typst-html/src/encode.rs
+++ b/crates/typst-html/src/encode.rs
@@ -1,5 +1,6 @@
use std::fmt::Write;
+use ecow::{EcoString, eco_format};
use typst_library::diag::{At, SourceResult, StrResult, bail};
use typst_library::foundations::Repr;
use typst_library::introspection::Introspector;
@@ -107,8 +108,15 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
return Ok(());
}
+ // See HTML spec § 13.1.2.5.
+ if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
+ w.buf.push('\n');
+ }
+
if tag::is_raw(element.tag) {
write_raw(w, element)?;
+ } else if tag::is_escapable_raw(element.tag) {
+ write_escapable_raw(w, element)?;
} else if !element.children.is_empty() {
write_children(w, element)?;
}
@@ -122,11 +130,6 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
/// Encodes the children of an element.
fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
- // See HTML spec § 13.1.2.5.
- if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
- w.buf.push('\n');
- }
-
let pretty = w.pretty;
let pretty_inside = allows_pretty_inside(element.tag)
&& element.children.iter().any(|node| match node {
@@ -208,20 +211,40 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
Ok(())
}
+/// Encodes the contents of an escapable raw text element.
+fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
+ walk_raw_text(element, |piece, span| write_text(w, piece, span))
+}
+
/// Collects the textual contents of a raw text element.
fn collect_raw_text(element: &HtmlElement) -> SourceResult {
- let mut output = String::new();
+ let mut text = String::new();
+ walk_raw_text(element, |piece, span| {
+ if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
+ return Err(unencodable(c)).at(span);
+ }
+ text.push_str(piece);
+ Ok(())
+ })?;
+ Ok(text)
+}
+
+/// Iterates over the textual contents of a raw text element.
+fn walk_raw_text(
+ element: &HtmlElement,
+ mut f: impl FnMut(&str, Span) -> SourceResult<()>,
+) -> SourceResult<()> {
for c in &element.children {
match c {
HtmlNode::Tag(_) => continue,
- HtmlNode::Text(text, _) => output.push_str(text),
+ HtmlNode::Text(text, span) => f(text, *span)?,
HtmlNode::Element(HtmlElement { span, .. })
| HtmlNode::Frame(HtmlFrame { span, .. }) => {
bail!(*span, "HTML raw text element cannot have non-text children")
}
- };
+ }
}
- Ok(output)
+ Ok(())
}
/// Finds a closing sequence for the given tag in the text, if it exists.
@@ -302,11 +325,17 @@ fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
c if charsets::is_w3c_text_char(c) && c != '\r' => {
write!(w.buf, "{:x};", c as u32).unwrap()
}
- _ => bail!("the character `{}` cannot be encoded in HTML", c.repr()),
+ _ => return Err(unencodable(c)),
}
Ok(())
}
+/// The error message for a character that cannot be encoded.
+#[cold]
+fn unencodable(c: char) -> EcoString {
+ eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
+}
+
/// Encode a laid out frame into the writer.
fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
let svg = typst_svg::svg_html_frame(
diff --git a/tests/suite/html/syntax.typ b/tests/suite/html/syntax.typ
index 85be266e1..0c56fe76c 100644
--- a/tests/suite/html/syntax.typ
+++ b/tests/suite/html/syntax.typ
@@ -1,7 +1,3 @@
---- html-non-char html ---
-// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML
-\u{fdd0}
-
--- html-void-element-with-children html ---
// Error: 2-27 HTML void elements must not have children
#html.elem("img", [Hello])
@@ -61,6 +57,22 @@
// Hint: 2-32 the sequence `")
+
+--- html-non-char html ---
+// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML
+\u{fdd0}
+
+--- html-raw-text-non-char html ---
+// Error: 24-32 the character `"\u{fdd0}"` cannot be encoded in HTML
+#html.script[const x = \u{fdd0}]
+
+--- html-escapable-raw-text-non-char html ---
+// Error: 23-31 the character `"\u{fdd0}"` cannot be encoded in HTML
+#html.textarea[Typing \u{fdd0}]