use crate::syntax::tokens::is_newline_char; /// Resolves all escape sequences in a string. pub fn unescape_string(string: &str) -> String { let mut iter = string.chars().peekable(); let mut out = String::with_capacity(string.len()); while let Some(c) = iter.next() { if c == '\\' { match iter.next() { Some('\\') => out.push('\\'), Some('"') => out.push('"'), Some('u') if iter.peek() == Some(&'{') => { iter.next(); let mut sequence = String::new(); let terminated = loop { match iter.peek() { // TODO: Feedback that closing brace is missing. Some('}') => { iter.next(); break true; } Some(&c) if c.is_ascii_hexdigit() => { iter.next(); sequence.push(c); } _ => break false, } }; // TODO: Feedback that escape sequence is wrong. if let Some(c) = hex_to_char(&sequence) { out.push(c); } else { out.push_str("\\u{"); out.push_str(&sequence); if terminated { out.push('}'); } } } Some('n') => out.push('\n'), Some('t') => out.push('\t'), Some(c) => { out.push('\\'); out.push(c); } None => out.push('\\'), } } else { out.push(c); } } out } /// Resolves all escape sequences in raw markup (between backticks) and splits it into /// into lines. pub fn unescape_raw(raw: &str) -> Vec { let mut iter = raw.chars(); let mut text = String::new(); while let Some(c) = iter.next() { if c == '\\' { if let Some(c) = iter.next() { if c != '\\' && c != '`' { text.push('\\'); } text.push(c); } else { text.push('\\'); } } else { text.push(c); } } split_lines(&text) } /// Resolves all escape sequences in code markup (between triple backticks) and splits it /// into into lines. pub fn unescape_code(raw: &str) -> Vec { let mut iter = raw.chars().peekable(); let mut text = String::new(); let mut backticks = 0u32; let mut update_backtick_count; while let Some(c) = iter.next() { update_backtick_count = true; if c == '\\' && backticks > 0 { let mut tail = String::new(); let mut escape_success = false; let mut backticks_after_slash = 0u32; while let Some(&s) = iter.peek() { match s { '\\' => { if backticks_after_slash == 0 { tail.push('\\'); } else { // Pattern like `\`\` should fail // escape and just be printed verbantim. break; } } '`' => { tail.push(s); backticks_after_slash += 1; if backticks_after_slash == 2 { escape_success = true; iter.next(); break; } } _ => break, } iter.next(); } if !escape_success { text.push(c); backticks = backticks_after_slash; update_backtick_count = false; } else { backticks = 0; } text.push_str(&tail); } else { text.push(c); } if update_backtick_count { if c == '`' { backticks += 1; } else { backticks = 0; } } } split_lines(&text) } /// Converts a hexademical sequence (without braces or "\u") into a character. pub fn hex_to_char(sequence: &str) -> Option { u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) } /// Splits a string into a vector of lines (respecting Unicode & Windows line breaks). pub fn split_lines(text: &str) -> Vec { let mut iter = text.chars().peekable(); let mut line = String::new(); let mut lines = Vec::new(); while let Some(c) = iter.next() { if is_newline_char(c) { if c == '\r' && iter.peek() == Some(&'\n') { iter.next(); } lines.push(std::mem::take(&mut line)); } else { line.push(c); } } lines.push(line); lines } #[cfg(test)] mod tests { use super::*; #[test] #[rustfmt::skip] fn test_unescape_strings() { fn test(string: &str, expected: &str) { assert_eq!(unescape_string(string), expected.to_string()); } test(r#"hello world"#, "hello world"); test(r#"hello\nworld"#, "hello\nworld"); test(r#"a\"bc"#, "a\"bc"); test(r#"a\u{2603}bc"#, "a☃bc"); test(r#"a\u{26c3bg"#, "a𦰻g"); test(r#"av\u{6797"#, "av林"); test(r#"a\\"#, "a\\"); test(r#"a\\\nbc"#, "a\\\nbc"); test(r#"a\tbc"#, "a\tbc"); test(r"🌎", "🌎"); test(r"🌎\", r"🌎\"); test(r"\🌎", r"\🌎"); } #[test] #[rustfmt::skip] fn test_unescape_raws() { fn test(raw: &str, expected: Vec<&str>) { assert_eq!(unescape_raw(raw), expected); } test("raw\\`", vec!["raw`"]); test("raw\\\\`", vec!["raw\\`"]); test("raw\ntext", vec!["raw", "text"]); test("a\r\nb", vec!["a", "b"]); test("a\n\nb", vec!["a", "", "b"]); test("a\r\x0Bb", vec!["a", "", "b"]); test("a\r\n\r\nb", vec!["a", "", "b"]); test("raw\\a", vec!["raw\\a"]); test("raw\\", vec!["raw\\"]); } #[test] #[rustfmt::skip] fn test_unescape_code() { fn test(raw: &str, expected: Vec<&str>) { assert_eq!(unescape_code(raw), expected); } test("code\\`", vec!["code\\`"]); test("code`\\``", vec!["code```"]); test("code`\\`a", vec!["code`\\`a"]); test("code``hi`\\``", vec!["code``hi```"]); test("code`\\\\``", vec!["code`\\``"]); test("code`\\`\\`go", vec!["code`\\`\\`go"]); test("code`\\`\\``", vec!["code`\\```"]); test("code\ntext", vec!["code", "text"]); test("a\r\nb", vec!["a", "b"]); test("a\n\nb", vec!["a", "", "b"]); test("a\r\x0Bb", vec!["a", "", "b"]); test("a\r\n\r\nb", vec!["a", "", "b"]); test("code\\a", vec!["code\\a"]); test("code\\", vec!["code\\"]); } }