mirror of
https://github.com/typst/typst
synced 2025-08-12 06:07:54 +08:00
Encoding fixes for HTML raw text elements (#6720)
This commit is contained in:
parent
df9a9caee0
commit
bcc71ddb9b
@ -1,5 +1,6 @@
|
|||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
use ecow::{EcoString, eco_format};
|
||||||
use typst_library::diag::{At, SourceResult, StrResult, bail};
|
use typst_library::diag::{At, SourceResult, StrResult, bail};
|
||||||
use typst_library::foundations::Repr;
|
use typst_library::foundations::Repr;
|
||||||
use typst_library::introspection::Introspector;
|
use typst_library::introspection::Introspector;
|
||||||
@ -107,8 +108,15 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See HTML spec § 13.1.2.5.
|
||||||
|
if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
|
||||||
|
w.buf.push('\n');
|
||||||
|
}
|
||||||
|
|
||||||
if tag::is_raw(element.tag) {
|
if tag::is_raw(element.tag) {
|
||||||
write_raw(w, element)?;
|
write_raw(w, element)?;
|
||||||
|
} else if tag::is_escapable_raw(element.tag) {
|
||||||
|
write_escapable_raw(w, element)?;
|
||||||
} else if !element.children.is_empty() {
|
} else if !element.children.is_empty() {
|
||||||
write_children(w, element)?;
|
write_children(w, element)?;
|
||||||
}
|
}
|
||||||
@ -122,11 +130,6 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
|||||||
|
|
||||||
/// Encodes the children of an element.
|
/// Encodes the children of an element.
|
||||||
fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
fn write_children(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||||
// See HTML spec § 13.1.2.5.
|
|
||||||
if matches!(element.tag, tag::pre | tag::textarea) && starts_with_newline(element) {
|
|
||||||
w.buf.push('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
let pretty = w.pretty;
|
let pretty = w.pretty;
|
||||||
let pretty_inside = allows_pretty_inside(element.tag)
|
let pretty_inside = allows_pretty_inside(element.tag)
|
||||||
&& element.children.iter().any(|node| match node {
|
&& element.children.iter().any(|node| match node {
|
||||||
@ -208,20 +211,40 @@ fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Encodes the contents of an escapable raw text element.
|
||||||
|
fn write_escapable_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||||
|
walk_raw_text(element, |piece, span| write_text(w, piece, span))
|
||||||
|
}
|
||||||
|
|
||||||
/// Collects the textual contents of a raw text element.
|
/// Collects the textual contents of a raw text element.
|
||||||
fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
|
fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
|
||||||
let mut output = String::new();
|
let mut text = String::new();
|
||||||
|
walk_raw_text(element, |piece, span| {
|
||||||
|
if let Some(c) = piece.chars().find(|&c| !charsets::is_w3c_text_char(c)) {
|
||||||
|
return Err(unencodable(c)).at(span);
|
||||||
|
}
|
||||||
|
text.push_str(piece);
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
Ok(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterates over the textual contents of a raw text element.
|
||||||
|
fn walk_raw_text(
|
||||||
|
element: &HtmlElement,
|
||||||
|
mut f: impl FnMut(&str, Span) -> SourceResult<()>,
|
||||||
|
) -> SourceResult<()> {
|
||||||
for c in &element.children {
|
for c in &element.children {
|
||||||
match c {
|
match c {
|
||||||
HtmlNode::Tag(_) => continue,
|
HtmlNode::Tag(_) => continue,
|
||||||
HtmlNode::Text(text, _) => output.push_str(text),
|
HtmlNode::Text(text, span) => f(text, *span)?,
|
||||||
HtmlNode::Element(HtmlElement { span, .. })
|
HtmlNode::Element(HtmlElement { span, .. })
|
||||||
| HtmlNode::Frame(HtmlFrame { span, .. }) => {
|
| HtmlNode::Frame(HtmlFrame { span, .. }) => {
|
||||||
bail!(*span, "HTML raw text element cannot have non-text children")
|
bail!(*span, "HTML raw text element cannot have non-text children")
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
Ok(output)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Finds a closing sequence for the given tag in the text, if it exists.
|
/// Finds a closing sequence for the given tag in the text, if it exists.
|
||||||
@ -302,11 +325,17 @@ fn write_escape(w: &mut Writer, c: char) -> StrResult<()> {
|
|||||||
c if charsets::is_w3c_text_char(c) && c != '\r' => {
|
c if charsets::is_w3c_text_char(c) && c != '\r' => {
|
||||||
write!(w.buf, "&#x{:x};", c as u32).unwrap()
|
write!(w.buf, "&#x{:x};", c as u32).unwrap()
|
||||||
}
|
}
|
||||||
_ => bail!("the character `{}` cannot be encoded in HTML", c.repr()),
|
_ => return Err(unencodable(c)),
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The error message for a character that cannot be encoded.
|
||||||
|
#[cold]
|
||||||
|
fn unencodable(c: char) -> EcoString {
|
||||||
|
eco_format!("the character `{}` cannot be encoded in HTML", c.repr())
|
||||||
|
}
|
||||||
|
|
||||||
/// Encode a laid out frame into the writer.
|
/// Encode a laid out frame into the writer.
|
||||||
fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
|
fn write_frame(w: &mut Writer, frame: &HtmlFrame) {
|
||||||
let svg = typst_svg::svg_html_frame(
|
let svg = typst_svg::svg_html_frame(
|
||||||
|
@ -1,7 +1,3 @@
|
|||||||
--- html-non-char html ---
|
|
||||||
// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML
|
|
||||||
\u{fdd0}
|
|
||||||
|
|
||||||
--- html-void-element-with-children html ---
|
--- html-void-element-with-children html ---
|
||||||
// Error: 2-27 HTML void elements must not have children
|
// Error: 2-27 HTML void elements must not have children
|
||||||
#html.elem("img", [Hello])
|
#html.elem("img", [Hello])
|
||||||
@ -61,6 +57,22 @@
|
|||||||
// Hint: 2-32 the sequence `</SCRiPT` appears in the raw text
|
// Hint: 2-32 the sequence `</SCRiPT` appears in the raw text
|
||||||
#html.script("hello </SCRiPT ")
|
#html.script("hello </SCRiPT ")
|
||||||
|
|
||||||
|
--- html-escapable-raw-text-contains-elem html ---
|
||||||
|
// Error: 16-34 HTML raw text element cannot have non-text children
|
||||||
|
#html.textarea(html.strong[Hello])
|
||||||
|
|
||||||
--- html-escapable-raw-text-contains-closing-tag html ---
|
--- html-escapable-raw-text-contains-closing-tag html ---
|
||||||
// This is okay because we escape it.
|
// This is okay because we escape it.
|
||||||
#html.textarea("hello </textarea>")
|
#html.textarea("hello </textarea>")
|
||||||
|
|
||||||
|
--- html-non-char html ---
|
||||||
|
// Error: 1-9 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||||
|
\u{fdd0}
|
||||||
|
|
||||||
|
--- html-raw-text-non-char html ---
|
||||||
|
// Error: 24-32 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||||
|
#html.script[const x = \u{fdd0}]
|
||||||
|
|
||||||
|
--- html-escapable-raw-text-non-char html ---
|
||||||
|
// Error: 23-31 the character `"\u{fdd0}"` cannot be encoded in HTML
|
||||||
|
#html.textarea[Typing \u{fdd0}]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user