mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
238 lines
7.0 KiB
Rust
238 lines
7.0 KiB
Rust
use unscanny::Scanner;
|
|
|
|
use super::{is_ident, is_newline, RawKind};
|
|
use crate::util::EcoString;
|
|
|
|
/// Resolve all escape sequences in a string.
|
|
pub fn resolve_string(string: &str) -> EcoString {
|
|
let mut out = EcoString::with_capacity(string.len());
|
|
let mut s = Scanner::new(string);
|
|
|
|
while let Some(c) = s.eat() {
|
|
if c != '\\' {
|
|
out.push(c);
|
|
continue;
|
|
}
|
|
|
|
let start = s.locate(-1);
|
|
match s.eat() {
|
|
Some('\\') => out.push('\\'),
|
|
Some('"') => out.push('"'),
|
|
Some('n') => out.push('\n'),
|
|
Some('r') => out.push('\r'),
|
|
Some('t') => out.push('\t'),
|
|
Some('u') if s.eat_if('{') => {
|
|
// TODO: Error if closing brace is missing.
|
|
let sequence = s.eat_while(char::is_ascii_hexdigit);
|
|
let _terminated = s.eat_if('}');
|
|
match resolve_hex(sequence) {
|
|
Some(c) => out.push(c),
|
|
None => out.push_str(s.from(start)),
|
|
}
|
|
}
|
|
|
|
_ => out.push_str(s.from(start)),
|
|
}
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
/// Resolve a hexadecimal escape sequence into a character
|
|
/// (only the inner hex letters without braces or `\u`).
|
|
pub fn resolve_hex(sequence: &str) -> Option<char> {
|
|
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
|
|
}
|
|
|
|
/// Resolve the language tag and trim the raw text.
|
|
pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawKind {
|
|
if backticks > 1 {
|
|
let (tag, inner) = split_at_lang_tag(text);
|
|
let (text, block) = trim_and_split_raw(column, inner);
|
|
RawKind {
|
|
lang: is_ident(tag).then(|| tag.into()),
|
|
text: text.into(),
|
|
block,
|
|
}
|
|
} else {
|
|
RawKind {
|
|
lang: None,
|
|
text: split_lines(text).join("\n").into(),
|
|
block: false,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse the lang tag and return it alongside the remaining inner raw text.
|
|
fn split_at_lang_tag(raw: &str) -> (&str, &str) {
|
|
let mut s = Scanner::new(raw);
|
|
(
|
|
s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)),
|
|
s.after(),
|
|
)
|
|
}
|
|
|
|
/// Trim raw text and splits it into lines.
|
|
///
|
|
/// Also returns whether at least one newline was contained in `raw`.
|
|
fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) {
|
|
// Trims one space at the start.
|
|
raw = raw.strip_prefix(' ').unwrap_or(raw);
|
|
|
|
// Trim one space at the end if the last non-whitespace char is a backtick.
|
|
if raw.trim_end().ends_with('`') {
|
|
raw = raw.strip_suffix(' ').unwrap_or(raw);
|
|
}
|
|
|
|
let mut lines = split_lines(raw);
|
|
|
|
// Dedent based on column, but not for the first line.
|
|
for line in lines.iter_mut().skip(1) {
|
|
let offset = line
|
|
.chars()
|
|
.take(column)
|
|
.take_while(|c| c.is_whitespace())
|
|
.map(char::len_utf8)
|
|
.sum();
|
|
*line = &line[offset ..];
|
|
}
|
|
|
|
let had_newline = lines.len() > 1;
|
|
let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace);
|
|
|
|
// Trims a sequence of whitespace followed by a newline at the start.
|
|
if lines.first().map_or(false, is_whitespace) {
|
|
lines.remove(0);
|
|
}
|
|
|
|
// Trims a newline followed by a sequence of whitespace at the end.
|
|
if lines.last().map_or(false, is_whitespace) {
|
|
lines.pop();
|
|
}
|
|
|
|
(lines.join("\n"), had_newline)
|
|
}
|
|
|
|
/// Split a string into a vector of lines
|
|
/// (respecting Unicode, Unix, Mac and Windows line breaks).
|
|
fn split_lines(text: &str) -> Vec<&str> {
|
|
let mut s = Scanner::new(text);
|
|
let mut lines = Vec::new();
|
|
let mut start = 0;
|
|
let mut end = 0;
|
|
|
|
while let Some(c) = s.eat() {
|
|
if is_newline(c) {
|
|
if c == '\r' {
|
|
s.eat_if('\n');
|
|
}
|
|
|
|
lines.push(&text[start .. end]);
|
|
start = s.cursor();
|
|
}
|
|
end = s.cursor();
|
|
}
|
|
|
|
lines.push(&text[start ..]);
|
|
lines
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[rustfmt::skip]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_resolve_strings() {
|
|
#[track_caller]
|
|
fn test(string: &str, expected: &str) {
|
|
assert_eq!(resolve_string(string), expected);
|
|
}
|
|
|
|
test(r#"hello world"#, "hello world");
|
|
test(r#"hello\nworld"#, "hello\nworld");
|
|
test(r#"a\"bc"#, "a\"bc");
|
|
test(r#"a\u{2603}bc"#, "a☃bc");
|
|
test(r#"a\u{26c3bg"#, "a𦰻g");
|
|
test(r#"av\u{6797"#, "av林");
|
|
test(r#"a\\"#, "a\\");
|
|
test(r#"a\\\nbc"#, "a\\\nbc");
|
|
test(r#"a\t\r\nbc"#, "a\t\r\nbc");
|
|
test(r"🌎", "🌎");
|
|
test(r"🌎\", r"🌎\");
|
|
test(r"\🌎", r"\🌎");
|
|
}
|
|
|
|
#[test]
|
|
fn test_split_at_lang_tag() {
|
|
#[track_caller]
|
|
fn test(text: &str, lang: &str, inner: &str) {
|
|
assert_eq!(split_at_lang_tag(text), (lang, inner));
|
|
}
|
|
|
|
test("typst it!", "typst", " it!");
|
|
test("typst\n it!", "typst", "\n it!");
|
|
test("typst\n it!", "typst", "\n it!");
|
|
test("abc`", "abc", "`");
|
|
test(" hi", "", " hi");
|
|
test("`", "", "`");
|
|
}
|
|
|
|
#[test]
|
|
fn test_resolve_raw() {
|
|
#[track_caller]
|
|
fn test(
|
|
column: usize,
|
|
backticks: usize,
|
|
raw: &str,
|
|
lang: Option<&str>,
|
|
text: &str,
|
|
block: bool,
|
|
) {
|
|
let node = resolve_raw(column, backticks, raw);
|
|
assert_eq!(node.lang.as_deref(), lang);
|
|
assert_eq!(node.text, text);
|
|
assert_eq!(node.block, block);
|
|
}
|
|
|
|
// Just one backtick.
|
|
test(0, 1, "py", None, "py", false);
|
|
test(0, 1, "1\n2", None, "1\n2", false);
|
|
test(0, 1, "1\r\n2", None, "1\n2", false);
|
|
|
|
// More than one backtick with lang tag.
|
|
test(0, 2, "js alert()", Some("js"), "alert()", false);
|
|
test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true);
|
|
test(0, 2, "♥", None, "", false);
|
|
|
|
// Trimming of whitespace (tested more thoroughly in separate test).
|
|
test(0, 2, " a", None, "a", false);
|
|
test(0, 2, " a", None, " a", false);
|
|
test(0, 2, " \na", None, "a", true);
|
|
|
|
// Dedenting
|
|
test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true);
|
|
}
|
|
|
|
#[test]
|
|
fn test_trim_raw() {
|
|
#[track_caller]
|
|
fn test(text: &str, expected: &str) {
|
|
assert_eq!(trim_and_split_raw(0, text).0, expected);
|
|
}
|
|
|
|
test(" hi", "hi");
|
|
test(" hi", " hi");
|
|
test("\nhi", "hi");
|
|
test(" \n hi", " hi");
|
|
test("hi` ", "hi`");
|
|
test("hi` ", "hi` ");
|
|
test("hi` ", "hi` ");
|
|
test("hi ", "hi ");
|
|
test("hi ", "hi ");
|
|
test("hi\n", "hi");
|
|
test("hi \n ", "hi ");
|
|
test(" \n hi \n ", " hi ");
|
|
}
|
|
}
|