mirror of
https://github.com/typst/typst
synced 2025-06-24 22:32:54 +08:00
Properly handle raw text elements
This commit is contained in:
parent
c2e2fd99f6
commit
bf8ef2a4a5
@ -2,7 +2,9 @@ use std::fmt::Write;
|
|||||||
|
|
||||||
use typst_library::diag::{bail, At, SourceResult, StrResult};
|
use typst_library::diag::{bail, At, SourceResult, StrResult};
|
||||||
use typst_library::foundations::Repr;
|
use typst_library::foundations::Repr;
|
||||||
use typst_library::html::{charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag};
|
use typst_library::html::{
|
||||||
|
attr, charsets, tag, HtmlDocument, HtmlElement, HtmlNode, HtmlTag,
|
||||||
|
};
|
||||||
use typst_library::layout::Frame;
|
use typst_library::layout::Frame;
|
||||||
use typst_syntax::Span;
|
use typst_syntax::Span;
|
||||||
|
|
||||||
@ -95,7 +97,9 @@ fn write_element(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
if !element.children.is_empty() {
|
if tag::is_raw(element.tag) {
|
||||||
|
write_raw(w, element)?;
|
||||||
|
} else if !element.children.is_empty() {
|
||||||
write_children(w, element)?;
|
write_children(w, element)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,6 +161,108 @@ fn starts_with_newline(element: &HtmlElement) -> bool {
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Encodes the contents of a raw text element.
|
||||||
|
fn write_raw(w: &mut Writer, element: &HtmlElement) -> SourceResult<()> {
|
||||||
|
let text = collect_raw_text(element)?;
|
||||||
|
|
||||||
|
if let Some(closing) = find_closing_tag(&text, element.tag) {
|
||||||
|
bail!(
|
||||||
|
element.span,
|
||||||
|
"HTML raw text element cannot contain its own closing tag";
|
||||||
|
hint: "the sequence `{closing}` appears in the raw text",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
let mode = if w.pretty { RawMode::of(element, &text) } else { RawMode::Keep };
|
||||||
|
match mode {
|
||||||
|
RawMode::Keep => {
|
||||||
|
w.buf.push_str(&text);
|
||||||
|
}
|
||||||
|
RawMode::Wrap => {
|
||||||
|
w.buf.push('\n');
|
||||||
|
w.buf.push_str(&text);
|
||||||
|
write_indent(w);
|
||||||
|
}
|
||||||
|
RawMode::Indent => {
|
||||||
|
w.level += 1;
|
||||||
|
for line in text.lines() {
|
||||||
|
write_indent(w);
|
||||||
|
w.buf.push_str(line);
|
||||||
|
}
|
||||||
|
w.level -= 1;
|
||||||
|
write_indent(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collects the textual contents of a raw text element.
|
||||||
|
fn collect_raw_text(element: &HtmlElement) -> SourceResult<String> {
|
||||||
|
let mut output = String::new();
|
||||||
|
for c in &element.children {
|
||||||
|
match c {
|
||||||
|
HtmlNode::Tag(_) => continue,
|
||||||
|
HtmlNode::Text(text, _) => output.push_str(text),
|
||||||
|
HtmlNode::Element(_) | HtmlNode::Frame(_) => {
|
||||||
|
let span = match c {
|
||||||
|
HtmlNode::Element(child) => child.span,
|
||||||
|
_ => element.span,
|
||||||
|
};
|
||||||
|
bail!(span, "HTML raw text element cannot have non-text children")
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(output)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds a closing sequence for the given tag in the text, if it exists.
|
||||||
|
///
|
||||||
|
/// See HTML spec § 13.1.2.6.
|
||||||
|
fn find_closing_tag(text: &str, tag: HtmlTag) -> Option<&str> {
|
||||||
|
let s = tag.resolve();
|
||||||
|
let len = s.len();
|
||||||
|
text.match_indices("</").find_map(|(i, _)| {
|
||||||
|
let rest = &text[i + 2..];
|
||||||
|
let disallowed = rest.len() >= len
|
||||||
|
&& rest[..len].eq_ignore_ascii_case(&s)
|
||||||
|
&& rest[len..].starts_with(['\t', '\n', '\u{c}', '\r', ' ', '>', '/']);
|
||||||
|
disallowed.then(|| &text[i..i + 2 + len])
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// How to format the contents of a raw text element.
|
||||||
|
enum RawMode {
|
||||||
|
/// Just don't touch it.
|
||||||
|
Keep,
|
||||||
|
/// Newline after the opening and newline + indent before the closing tag.
|
||||||
|
Wrap,
|
||||||
|
/// Newlines after opening and before closing tag and each line indented.
|
||||||
|
Indent,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RawMode {
|
||||||
|
fn of(element: &HtmlElement, text: &str) -> Self {
|
||||||
|
match element.tag {
|
||||||
|
tag::script
|
||||||
|
if !element.attrs.0.iter().any(|(attr, value)| {
|
||||||
|
*attr == attr::r#type && value != "text/javascript"
|
||||||
|
}) =>
|
||||||
|
{
|
||||||
|
// Template literals can be multi-line, so indent may change
|
||||||
|
// the semantics of the JavaScript.
|
||||||
|
if text.contains('`') {
|
||||||
|
Self::Wrap
|
||||||
|
} else {
|
||||||
|
Self::Indent
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tag::style => Self::Indent,
|
||||||
|
_ => Self::Keep,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether we are allowed to add an extra newline at the start and end of the
|
/// Whether we are allowed to add an extra newline at the start and end of the
|
||||||
/// element's contents.
|
/// element's contents.
|
||||||
///
|
///
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body><textarea>hello </textarea></textarea></body>
|
||||||
|
</html>
|
21
tests/ref/html/html-script.html
Normal file
21
tests/ref/html/html-script.html
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<script>
|
||||||
|
const x = 1
|
||||||
|
const y = 2
|
||||||
|
console.log(x < y, Math.max(1, 2))
|
||||||
|
</script>
|
||||||
|
<script>
|
||||||
|
console.log(`Hello
|
||||||
|
World`)
|
||||||
|
</script>
|
||||||
|
<script type="text/python">x = 1
|
||||||
|
y = 2
|
||||||
|
print(x < y, max(x, y))</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
14
tests/ref/html/html-style.html
Normal file
14
tests/ref/html/html-style.html
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
text: red;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -10,3 +10,54 @@
|
|||||||
#html.pre("hello")
|
#html.pre("hello")
|
||||||
#html.pre("\nhello")
|
#html.pre("\nhello")
|
||||||
#html.pre("\n\nhello")
|
#html.pre("\n\nhello")
|
||||||
|
|
||||||
|
--- html-script html ---
|
||||||
|
// This should be pretty and indented.
|
||||||
|
#html.script(
|
||||||
|
```js
|
||||||
|
const x = 1
|
||||||
|
const y = 2
|
||||||
|
console.log(x < y, Math.max(1, 2))
|
||||||
|
```.text,
|
||||||
|
)
|
||||||
|
|
||||||
|
// This should have extra newlines, but no indent because of the multiline
|
||||||
|
// string literal.
|
||||||
|
#html.script("console.log(`Hello\nWorld`)")
|
||||||
|
|
||||||
|
// This should be untouched.
|
||||||
|
#html.script(
|
||||||
|
type: "text/python",
|
||||||
|
```py
|
||||||
|
x = 1
|
||||||
|
y = 2
|
||||||
|
print(x < y, max(x, y))
|
||||||
|
```.text,
|
||||||
|
)
|
||||||
|
|
||||||
|
--- html-style html ---
|
||||||
|
// This should be pretty and indented.
|
||||||
|
#html.style(
|
||||||
|
```css
|
||||||
|
body {
|
||||||
|
text: red;
|
||||||
|
}
|
||||||
|
```.text,
|
||||||
|
)
|
||||||
|
|
||||||
|
--- html-raw-text-contains-elem html ---
|
||||||
|
// Error: 14-32 HTML raw text element cannot have non-text children
|
||||||
|
#html.script(html.strong[Hello])
|
||||||
|
|
||||||
|
--- html-raw-text-contains-frame html ---
|
||||||
|
// Error: 2-29 HTML raw text element cannot have non-text children
|
||||||
|
#html.script(html.frame[Ok])
|
||||||
|
|
||||||
|
--- html-raw-text-contains-closing-tag html ---
|
||||||
|
// Error: 2-32 HTML raw text element cannot contain its own closing tag
|
||||||
|
// Hint: 2-32 the sequence `</SCRiPT` appears in the raw text
|
||||||
|
#html.script("hello </SCRiPT ")
|
||||||
|
|
||||||
|
--- html-escapable-raw-text-contains-closing-tag html ---
|
||||||
|
// This is okay because we escape it.
|
||||||
|
#html.textarea("hello </textarea>")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user