Move Nbsp logic to tokenizer 🚛

2025-07-02 10:12:52 +08:00 · 2020-08-31 14:47:52 +02:00 · 2020-08-31 14:47:52 +02:00 · 1942a25793
commit 1942a25793
parent 08433ab79f
2 changed files with 7 additions and 22 deletions
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@ -104,26 +104,7 @@ impl Parser<'_> {
                    self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
                }

-                Token::Text(text) => {
-                    let mut text_s = String::with_capacity(text.len());
-                    let mut iter = text.chars();
-                    while let Some(c) = iter.next() {
-                        match c {
-                            '~' => {
-                                // The escape sequence will separate
-                                // the ~ into its own text node, therefore
-                                // check the length here.
-                                if text.len() == 1 {
-                                    text_s.push('~');
-                                } else {
-                                    text_s.push('\u{00A0}');
-                                }
-                            },
-                            _ => text_s.push(c),
-                        }
-                    }
-                    self.with_span(SyntaxNode::Text(text_s.to_string()))
-                },
+                Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),

                Token::UnicodeEscape { sequence, terminated } => {
                    if !terminated {
@ -1025,7 +1006,7 @@ mod tests {
        t!("*hi"          => B, T("hi"));
        t!("hi_"          => T("hi"), I);
        t!("hi you"       => T("hi"), S, T("you"));
-        t!("special~name" => T("special\u{00A0}name"));
+        t!("special~name" => T("special"), T("\u{00A0}"), T("name"));
        t!("special\\~name" => T("special"), T("~"), T("name"));
        t!("\\u{1f303}"   => T("🌃"));
        t!("\n\n\nhello"  => P, T("hello"));
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@ -265,6 +265,9 @@ impl<'s> Iterator for Tokens<'s> {
            '_' if self.mode == Body => Underscore,
            '`' if self.mode == Body => self.read_raw_or_code(),

+            // Non-breaking spaces.
+            '~' if self.mode == Body => Text("\u{00A0}"),
+
            // An escaped thing.
            '\\' if self.mode == Body => self.read_escaped(),

@ -279,7 +282,7 @@ impl<'s> Iterator for Tokens<'s> {
                    let val = match n {
                        c if c.is_whitespace() => true,
                        '[' | ']' | '{' | '}' | '/' | '*' => true,
-                        '\\' | '_' | '`' if body => true,
+                        '\\' | '_' | '`' | '~' if body => true,
                        ':' | '=' | ',' | '"' | '(' | ')' if !body => true,
                        '+' | '-' if !body && !last_was_e => true,
                        _ => false,
@ -646,6 +649,7 @@ mod tests {
        t!(Body, "  \n\t \n  "  => S(2));
        t!(Body, "\n\r"         => S(2));
        t!(Body, " \r\r\n \x0D" => S(3));
+        t!(Body, "a~b"          => T("a"), T("\u{00A0}"), T("b"));
    }

    #[test]