mirror of
https://github.com/typst/typst
synced 2025-08-11 21:57:55 +08:00
Encoding fixes for HTML raw text elements (#6720)
This commit is contained in:
parent
df9a9caee0
commit
bcc71ddb9b
@ -1,5 +1,6 @@
|
||||
use std::fmt::Write;
|
||||
|
||||
use ecow::{EcoString, eco_format};
|
||||
use typst_library::diag::{At, SourceResult, StrResult, bail};
|
||||
use typst_library::foundations::Repr;
|
||||
use typst_library::introspection::Introspector;
|
||||
@ -107,8 +108,15 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// See HTML spec § 13.1.2.5.
|
||||
if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
|
||||
w.buf.push('\n');
|
||||
}
|
||||
|
||||
if tag::is_raw(element.tag) {
|
||||
write_raw(w, element)?;
|
||||
} else if tag::is_escapable_raw(element.tag) {
|
||||
write_escapable_raw(w, element)?;
|
||||
} else if !element.children.is_empty() {
|
||||
write_children(w, element)?;
|
||||
}
|
||||
@ -122,11 +130,6 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
|
||||
/// Encodes the children of an element.
|
||||
fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
// See HTML spec § 13.1.2.5.
|
||||
if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
|
||||
w.buf.push('\n');
|
||||
}
|
||||
|
||||
let pretty = w.pretty;
|
||||
let pretty_inside = allows_pretty_inside(element.tag)
|
||||
&& element.children.iter().any(|node| match node {
|
||||
@ -208,20 +211,40 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Encodes the contents of an escapable raw text element.
|
||||
fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
walk_raw_text(element, |piece, span| write_text(w, piece, span))
|
||||
}
|
||||
|
||||
/// Collects the textual contents of a raw text element.
|
||||
fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
|
||||
let mut output = String::new();
|
||||
let mut text = String::new();
|
||||
walk_raw_text(element, |piece, span| {
|
||||
if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
|
||||
return Err(unencodable(c)).at(span);
|
||||
}
|
||||
text.push_str(piece);
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(text)
|
||||
}
|
||||
|
||||
/// Iterates over the textual contents of a raw text element.
|
||||
fn walk_raw_text(
|
||||
element: &HtmlElement,
|
||||
mut f: impl FnMut(&str, Span) -> SourceResult<()>,
|
||||
) -> SourceResult<()> {
|
||||
for c in &element.children {
|
||||
match c {
|
||||
HtmlNode::Tag(_) => continue,
|
||||
HtmlNode::Text(text, _) => output.push_str(text),
|
||||
HtmlNode::Text(text, span) => f(text, *span)?,
|
||||
HtmlNode::Element(HtmlElement { span, .. })
|
||||
| HtmlNode::Frame(HtmlFrame { span, .. }) => {
|
||||
bail!(*span, "HTML raw text element cannot have non-text children")
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finds a closing sequence for the given tag in the text, if it exists.
|
||||
@ -302,11 +325,17 @@ fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
|
||||
c if charsets::is_w3c_text_char(c) && c != '\r' => {
|
||||
write!(w.buf, "&#x{:x};", c as u32).unwrap()
|
||||
}
|
||||
_ => bail!("the character `{}` cannot be encoded in HTML", c.repr()),
|
||||
_ => return Err(unencodable(c)),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The error message for a character that cannot be encoded.
|
||||
#[cold]
|
||||
fn unencodable(c: char) -> EcoString {
|
||||
eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
|
||||
}
|
||||
|
||||
/// Encode a laid out frame into the writer.
|
||||
fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
|
||||
let svg = typst_svg::svg_html_frame(
|
||||
|
@ -1,7 +1,3 @@
|
||||
--- html-non-char html ---
|
||||
// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||
\u{fdd0}
|
||||
|
||||
--- html-void-element-with-children html ---
|
||||
// Error: 2-27 HTML void elements must not have children
|
||||
#html.elem("img", [Hello])
|
||||
@ -61,6 +57,22 @@
|
||||
// Hint: 2-32 the sequence `</SCRiPT` appears in the raw text
|
||||
#html.script("hello </SCRiPT ")
|
||||
|
||||
--- html-escapable-raw-text-contains-elem html ---
|
||||
// Error: 16-34 HTML raw text element cannot have non-text children
|
||||
#html.textarea(html.strong[Hello])
|
||||
|
||||
--- html-escapable-raw-text-contains-closing-tag html ---
|
||||
// This is okay because we escape it.
|
||||
#html.textarea("hello </textarea>")
|
||||
|
||||
--- html-non-char html ---
|
||||
// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||
\u{fdd0}
|
||||
|
||||
--- html-raw-text-non-char html ---
|
||||
// Error: 24-32 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||
#html.script[const x = \u{fdd0}]
|
||||
|
||||
--- html-escapable-raw-text-non-char html ---
|
||||
// Error: 23-31 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||
#html.textarea[Typing \u{fdd0}]
|
||||
|
Loading…
x
Reference in New Issue
Block a user