use unscanny::Scanner; use super::{is_ident, is_newline, RawKind}; use crate::util::EcoString; /// Resolve all escape sequences in a string. pub fn resolve_string(string: &str) -> EcoString { let mut out = EcoString::with_capacity(string.len()); let mut s = Scanner::new(string); while let Some(c) = s.eat() { if c != '\\' { out.push(c); continue; } let start = s.locate(-1); match s.eat() { Some('\\') => out.push('\\'), Some('"') => out.push('"'), Some('n') => out.push('\n'), Some('r') => out.push('\r'), Some('t') => out.push('\t'), Some('u') if s.eat_if('{') => { // TODO: Error if closing brace is missing. let sequence = s.eat_while(char::is_ascii_hexdigit); let _terminated = s.eat_if('}'); match resolve_hex(sequence) { Some(c) => out.push(c), None => out.push_str(s.from(start)), } } _ => out.push_str(s.from(start)), } } out } /// Resolve a hexadecimal escape sequence into a character /// (only the inner hex letters without braces or `\u`). pub fn resolve_hex(sequence: &str) -> Option { u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) } /// Resolve the language tag and trim the raw text. pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawKind { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); RawKind { lang: is_ident(tag).then(|| tag.into()), text: text.into(), block, } } else { RawKind { lang: None, text: split_lines(text).join("\n").into(), block: false, } } } /// Parse the lang tag and return it alongside the remaining inner raw text. fn split_at_lang_tag(raw: &str) -> (&str, &str) { let mut s = Scanner::new(raw); (s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)), s.after()) } /// Trim raw text and splits it into lines. /// /// Also returns whether at least one newline was contained in `raw`. fn trim_and_split_raw(column: usize, mut raw: &str) -> (String, bool) { // Trims one space at the start. raw = raw.strip_prefix(' ').unwrap_or(raw); // Trim one space at the end if the last non-whitespace char is a backtick. if raw.trim_end().ends_with('`') { raw = raw.strip_suffix(' ').unwrap_or(raw); } let mut lines = split_lines(raw); // Dedent based on column, but not for the first line. for line in lines.iter_mut().skip(1) { let offset = line .chars() .take(column) .take_while(|c| c.is_whitespace()) .map(char::len_utf8) .sum(); *line = &line[offset..]; } let had_newline = lines.len() > 1; let is_whitespace = |line: &&str| line.chars().all(char::is_whitespace); // Trims a sequence of whitespace followed by a newline at the start. if lines.first().map_or(false, is_whitespace) { lines.remove(0); } // Trims a newline followed by a sequence of whitespace at the end. if lines.last().map_or(false, is_whitespace) { lines.pop(); } (lines.join("\n"), had_newline) } /// Split a string into a vector of lines /// (respecting Unicode, Unix, Mac and Windows line breaks). fn split_lines(text: &str) -> Vec<&str> { let mut s = Scanner::new(text); let mut lines = Vec::new(); let mut start = 0; let mut end = 0; while let Some(c) = s.eat() { if is_newline(c) { if c == '\r' { s.eat_if('\n'); } lines.push(&text[start..end]); start = s.cursor(); } end = s.cursor(); } lines.push(&text[start..]); lines } #[cfg(test)] #[rustfmt::skip] mod tests { use super::*; #[test] fn test_resolve_strings() { #[track_caller] fn test(string: &str, expected: &str) { assert_eq!(resolve_string(string), expected); } test(r#"hello world"#, "hello world"); test(r#"hello\nworld"#, "hello\nworld"); test(r#"a\"bc"#, "a\"bc"); test(r#"a\u{2603}bc"#, "a☃bc"); test(r#"a\u{26c3bg"#, "a𦰻g"); test(r#"av\u{6797"#, "av林"); test(r#"a\\"#, "a\\"); test(r#"a\\\nbc"#, "a\\\nbc"); test(r#"a\t\r\nbc"#, "a\t\r\nbc"); test(r"🌎", "🌎"); test(r"🌎\", r"🌎\"); test(r"\🌎", r"\🌎"); } #[test] fn test_split_at_lang_tag() { #[track_caller] fn test(text: &str, lang: &str, inner: &str) { assert_eq!(split_at_lang_tag(text), (lang, inner)); } test("typst it!", "typst", " it!"); test("typst\n it!", "typst", "\n it!"); test("typst\n it!", "typst", "\n it!"); test("abc`", "abc", "`"); test(" hi", "", " hi"); test("`", "", "`"); } #[test] fn test_resolve_raw() { #[track_caller] fn test( column: usize, backticks: usize, raw: &str, lang: Option<&str>, text: &str, block: bool, ) { let node = resolve_raw(column, backticks, raw); assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.text, text); assert_eq!(node.block, block); } // Just one backtick. test(0, 1, "py", None, "py", false); test(0, 1, "1\n2", None, "1\n2", false); test(0, 1, "1\r\n2", None, "1\n2", false); // More than one backtick with lang tag. test(0, 2, "js alert()", Some("js"), "alert()", false); test(0, 3, "py quit(\n\n)", Some("py"), "quit(\n\n)", true); test(0, 2, "♥", None, "", false); // Trimming of whitespace (tested more thoroughly in separate test). test(0, 2, " a", None, "a", false); test(0, 2, " a", None, " a", false); test(0, 2, " \na", None, "a", true); // Dedenting test(2, 3, " def foo():\n bar()", None, "def foo():\n bar()", true); } #[test] fn test_trim_raw() { #[track_caller] fn test(text: &str, expected: &str) { assert_eq!(trim_and_split_raw(0, text).0, expected); } test(" hi", "hi"); test(" hi", " hi"); test("\nhi", "hi"); test(" \n hi", " hi"); test("hi` ", "hi`"); test("hi` ", "hi` "); test("hi` ", "hi` "); test("hi ", "hi "); test("hi ", "hi "); test("hi\n", "hi"); test("hi \n ", "hi "); test(" \n hi \n ", " hi "); } }