mirror of
https://github.com/typst/typst
synced 2025-06-24 22:32:54 +08:00
Properly handle raw text elements
This commit is contained in:
parent
c2e2fd99f6
commit
bf8ef2a4a5
@ -2,7 +2,9 @@ use std::fmt::Write;
|
||||
|
||||
use typst_library::diag::{bail, At, SourceResult, StrResult};
|
||||
use typst_library::foundations::Repr;
|
||||
use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag};
|
||||
use typst_library::html::{
|
||||
attr, charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag,
|
||||
};
|
||||
use typst_library::layout::Frame;
|
||||
use typst_syntax::Span;
|
||||
|
||||
@ -95,7 +97,9 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !element.children.is_empty() {
|
||||
if tag::is_raw(element.tag) {
|
||||
write_raw(w, element)?;
|
||||
} else if !element.children.is_empty() {
|
||||
write_children(w, element)?;
|
||||
}
|
||||
|
||||
@ -157,6 +161,108 @@ fn starts_with_newline(element: &HtmlElement) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Encodes the contents of a raw text element.
|
||||
fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||
let text = collect_raw_text(element)?;
|
||||
|
||||
if let Some(closing) = find_closing_tag(&text, element.tag) {
|
||||
bail!(
|
||||
element.span,
|
||||
"HTML raw text element cannot contain its own closing tag";
|
||||
hint: "the sequence `{closing}` appears in the raw text",
|
||||
)
|
||||
}
|
||||
|
||||
let mode = if w.pretty { RawMode::of(element, &text) } else { RawMode::Keep };
|
||||
match mode {
|
||||
RawMode::Keep => {
|
||||
w.buf.push_str(&text);
|
||||
}
|
||||
RawMode::Wrap => {
|
||||
w.buf.push('\n');
|
||||
w.buf.push_str(&text);
|
||||
write_indent(w);
|
||||
}
|
||||
RawMode::Indent => {
|
||||
w.level += 1;
|
||||
for line in text.lines() {
|
||||
write_indent(w);
|
||||
w.buf.push_str(line);
|
||||
}
|
||||
w.level -= 1;
|
||||
write_indent(w);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Collects the textual contents of a raw text element.
|
||||
fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
|
||||
let mut output = String::new();
|
||||
for c in &element.children {
|
||||
match c {
|
||||
HtmlNode::Tag(_) => continue,
|
||||
HtmlNode::Text(text, _) => output.push_str(text),
|
||||
HtmlNode::Element(_) | HtmlNode::Frame(_) => {
|
||||
let span = match c {
|
||||
HtmlNode::Element(child) => child.span,
|
||||
_ => element.span,
|
||||
};
|
||||
bail!(span, "HTML raw text element cannot have non-text children")
|
||||
}
|
||||
};
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
/// Finds a closing sequence for the given tag in the text, if it exists.
|
||||
///
|
||||
/// See HTML spec § 13.1.2.6.
|
||||
fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> {
|
||||
let s = tag.resolve();
|
||||
let len = s.len();
|
||||
text.match_indices("</").find_map(|(i, _)| {
|
||||
let rest = &text[i + 2..];
|
||||
let disallowed = rest.len() >= len
|
||||
&& rest[..len].eq_ignore_ascii_case(&s)
|
||||
&& rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']);
|
||||
disallowed.then(|| &text[i..i + 2 + len])
|
||||
})
|
||||
}
|
||||
|
||||
/// How to format the contents of a raw text element.
|
||||
enum RawMode {
|
||||
/// Just don't touch it.
|
||||
Keep,
|
||||
/// Newline after the opening and newline + indent before the closing tag.
|
||||
Wrap,
|
||||
/// Newlines after opening and before closing tag and each line indented.
|
||||
Indent,
|
||||
}
|
||||
|
||||
impl RawMode {
|
||||
fn of(element: &HtmlElement, text: &str) -> Self {
|
||||
match element.tag {
|
||||
tag::script
|
||||
if !element.attrs.0.iter().any(|(attr, value)| {
|
||||
*attr == attr::r#type && value != "text/javascript"
|
||||
}) =>
|
||||
{
|
||||
// Template literals can be multi-line, so indent may change
|
||||
// the semantics of the JavaScript.
|
||||
if text.contains('`') {
|
||||
Self::Wrap
|
||||
} else {
|
||||
Self::Indent
|
||||
}
|
||||
}
|
||||
tag::style => Self::Indent,
|
||||
_ => Self::Keep,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether we are allowed to add an extra newline at the start and end of the
|
||||
/// element's contents.
|
||||
///
|
||||
|
@ -0,0 +1,8 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body><textarea>hello </textarea></textarea></body>
|
||||
</html>
|
21
tests/ref/html/html-script.html
Normal file
21
tests/ref/html/html-script.html
Normal file
@ -0,0 +1,21 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
const x = 1
|
||||
const y = 2
|
||||
console.log(x < y, Math.max(1, 2))
|
||||
</script>
|
||||
<script>
|
||||
console.log(`Hello
|
||||
World`)
|
||||
</script>
|
||||
<script type="text/python">x = 1
|
||||
y = 2
|
||||
print(x < y, max(x, y))</script>
|
||||
</body>
|
||||
</html>
|
14
tests/ref/html/html-style.html
Normal file
14
tests/ref/html/html-style.html
Normal file
@ -0,0 +1,14 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
<style>
|
||||
body {
|
||||
text: red;
|
||||
}
|
||||
</style>
|
||||
</body>
|
||||
</html>
|
@ -10,3 +10,54 @@
|
||||
#html.pre("hello")
|
||||
#html.pre("\nhello")
|
||||
#html.pre("\n\nhello")
|
||||
|
||||
--- html-script html ---
|
||||
// This should be pretty and indented.
|
||||
#html.script(
|
||||
```js
|
||||
const x = 1
|
||||
const y = 2
|
||||
console.log(x < y, Math.max(1, 2))
|
||||
```.text,
|
||||
)
|
||||
|
||||
// This should have extra newlines, but no indent because of the multiline
|
||||
// string literal.
|
||||
#html.script("console.log(`Hello\nWorld`)")
|
||||
|
||||
// This should be untouched.
|
||||
#html.script(
|
||||
type: "text/python",
|
||||
```py
|
||||
x = 1
|
||||
y = 2
|
||||
print(x < y, max(x, y))
|
||||
```.text,
|
||||
)
|
||||
|
||||
--- html-style html ---
|
||||
// This should be pretty and indented.
|
||||
#html.style(
|
||||
```css
|
||||
body {
|
||||
text: red;
|
||||
}
|
||||
```.text,
|
||||
)
|
||||
|
||||
--- html-raw-text-contains-elem html ---
|
||||
// Error: 14-32 HTML raw text element cannot have non-text children
|
||||
#html.script(html.strong[Hello])
|
||||
|
||||
--- html-raw-text-contains-frame html ---
|
||||
// Error: 2-29 HTML raw text element cannot have non-text children
|
||||
#html.script(html.frame[Ok])
|
||||
|
||||
--- html-raw-text-contains-closing-tag html ---
|
||||
// Error: 2-32 HTML raw text element cannot contain its own closing tag
|
||||
// Hint: 2-32 the sequence `</SCRiPT` appears in the raw text
|
||||
#html.script("hello </SCRiPT ")
|
||||
|
||||
--- html-escapable-raw-text-contains-closing-tag html ---
|
||||
// This is okay because we escape it.
|
||||
#html.textarea("hello </textarea>")
|
||||
|
Loading…
x
Reference in New Issue
Block a user