From d0e252d11603d0cb3351a03b53df68542e658329 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 31 Aug 2020 12:11:34 +0200 Subject: [PATCH 1/3] =?UTF-8?q?Add=20non-breaking=20space=20=F0=9F=94=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/syntax/parsing.rs | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index ae9cfdb1b..e5873c7c7 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -104,7 +104,17 @@ impl Parser<'_> { self.with_span(SyntaxNode::Code(Code { lang, lines, block })) } - Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), + Token::Text(text) => { + let mut text_s = String::with_capacity(text.len()); + let mut iter = text.chars(); + while let Some(c) = iter.next() { + match c { + '~' => text_s.push('\u{00A0}'), + _ => text_s.push(c), + } + } + self.with_span(SyntaxNode::Text(text_s.to_string())) + }, Token::UnicodeEscape { sequence, terminated } => { if !terminated { @@ -1001,18 +1011,19 @@ mod tests { #[test] fn test_parse_simple_nodes() { - t!("" => ); - t!("hi" => T("hi")); - t!("*hi" => B, T("hi")); - t!("hi_" => T("hi"), I); - t!("hi you" => T("hi"), S, T("you")); - t!("\\u{1f303}" => T("🌃")); - t!("\n\n\nhello" => P, T("hello")); - t!(r"a\ b" => T("a"), L, S, T("b")); - t!("`py`" => R!["py"]); - t!("`hi\nyou" => R!["hi", "you"]); - e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); - t!("`hi\\`du`" => R!["hi`du"]); + t!("" => ); + t!("hi" => T("hi")); + t!("*hi" => B, T("hi")); + t!("hi_" => T("hi"), I); + t!("hi you" => T("hi"), S, T("you")); + t!("special~name" => T("special\u{00A0}name")); + t!("\\u{1f303}" => T("🌃")); + t!("\n\n\nhello" => P, T("hello")); + t!(r"a\ b" => T("a"), L, S, T("b")); + t!("`py`" => R!["py"]); + t!("`hi\nyou" => R!["hi", "you"]); + e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); + t!("`hi\\`du`" => R!["hi`du"]); t!("```java System.out.print```" => C![Some("java"), "System.out.print"]); t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]); From 08433ab79fa8e775c6574b75e1e6222ecdca7ef1 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 31 Aug 2020 12:20:45 +0200 Subject: [PATCH 2/3] =?UTF-8?q?Capability=20to=20escape=20the=20tilde=20sy?= =?UTF-8?q?mbol=20=F0=9F=92=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/syntax/parsing.rs | 12 +++++++++++- src/syntax/tokens.rs | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index e5873c7c7..d48b9ff60 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -109,7 +109,16 @@ impl Parser<'_> { let mut iter = text.chars(); while let Some(c) = iter.next() { match c { - '~' => text_s.push('\u{00A0}'), + '~' => { + // The escape sequence will separate + // the ~ into its own text node, therefore + // check the length here. + if text.len() == 1 { + text_s.push('~'); + } else { + text_s.push('\u{00A0}'); + } + }, _ => text_s.push(c), } } @@ -1017,6 +1026,7 @@ mod tests { t!("hi_" => T("hi"), I); t!("hi you" => T("hi"), S, T("you")); t!("special~name" => T("special\u{00A0}name")); + t!("special\\~name" => T("special"), T("~"), T("name")); t!("\\u{1f303}" => T("🌃")); t!("\n\n\nhello" => P, T("hello")); t!(r"a\ b" => T("a"), L, S, T("b")); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index d566363cc..f41babbc1 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -439,7 +439,7 @@ impl<'s> Tokens<'s> { fn read_escaped(&mut self) -> Token<'s> { fn is_escapable(c: char) -> bool { match c { - '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' => true, + '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '~' => true, _ => false, } } From 1942a25793ce11c2854deed8d1dcd56ae851e1d6 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Mon, 31 Aug 2020 14:47:52 +0200 Subject: [PATCH 3/3] =?UTF-8?q?Move=20Nbsp=20logic=20to=20tokenizer=20?= =?UTF-8?q?=F0=9F=9A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/syntax/parsing.rs | 23 ++--------------------- src/syntax/tokens.rs | 6 +++++- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index d48b9ff60..6a8b8103a 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -104,26 +104,7 @@ impl Parser<'_> { self.with_span(SyntaxNode::Code(Code { lang, lines, block })) } - Token::Text(text) => { - let mut text_s = String::with_capacity(text.len()); - let mut iter = text.chars(); - while let Some(c) = iter.next() { - match c { - '~' => { - // The escape sequence will separate - // the ~ into its own text node, therefore - // check the length here. - if text.len() == 1 { - text_s.push('~'); - } else { - text_s.push('\u{00A0}'); - } - }, - _ => text_s.push(c), - } - } - self.with_span(SyntaxNode::Text(text_s.to_string())) - }, + Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), Token::UnicodeEscape { sequence, terminated } => { if !terminated { @@ -1025,7 +1006,7 @@ mod tests { t!("*hi" => B, T("hi")); t!("hi_" => T("hi"), I); t!("hi you" => T("hi"), S, T("you")); - t!("special~name" => T("special\u{00A0}name")); + t!("special~name" => T("special"), T("\u{00A0}"), T("name")); t!("special\\~name" => T("special"), T("~"), T("name")); t!("\\u{1f303}" => T("🌃")); t!("\n\n\nhello" => P, T("hello")); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index f41babbc1..e333968b4 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -265,6 +265,9 @@ impl<'s> Iterator for Tokens<'s> { '_' if self.mode == Body => Underscore, '`' if self.mode == Body => self.read_raw_or_code(), + // Non-breaking spaces. + '~' if self.mode == Body => Text("\u{00A0}"), + // An escaped thing. '\\' if self.mode == Body => self.read_escaped(), @@ -279,7 +282,7 @@ impl<'s> Iterator for Tokens<'s> { let val = match n { c if c.is_whitespace() => true, '[' | ']' | '{' | '}' | '/' | '*' => true, - '\\' | '_' | '`' if body => true, + '\\' | '_' | '`' | '~' if body => true, ':' | '=' | ',' | '"' | '(' | ')' if !body => true, '+' | '-' if !body && !last_was_e => true, _ => false, @@ -646,6 +649,7 @@ mod tests { t!(Body, " \n\t \n " => S(2)); t!(Body, "\n\r" => S(2)); t!(Body, " \r\r\n \x0D" => S(3)); + t!(Body, "a~b" => T("a"), T("\u{00A0}"), T("b")); } #[test]