Move Nbsp logic to tokenizer 🚛

This commit is contained in:
Martin Haug 2020-08-31 14:47:52 +02:00
parent 08433ab79f
commit 1942a25793
2 changed files with 7 additions and 22 deletions

View File

@ -104,26 +104,7 @@ impl Parser<'_> {
self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
}
Token::Text(text) => {
let mut text_s = String::with_capacity(text.len());
let mut iter = text.chars();
while let Some(c) = iter.next() {
match c {
'~' => {
// The escape sequence will separate
// the ~ into its own text node, therefore
// check the length here.
if text.len() == 1 {
text_s.push('~');
} else {
text_s.push('\u{00A0}');
}
},
_ => text_s.push(c),
}
}
self.with_span(SyntaxNode::Text(text_s.to_string()))
},
Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
Token::UnicodeEscape { sequence, terminated } => {
if !terminated {
@ -1025,7 +1006,7 @@ mod tests {
t!("*hi" => B, T("hi"));
t!("hi_" => T("hi"), I);
t!("hi you" => T("hi"), S, T("you"));
t!("special~name" => T("special\u{00A0}name"));
t!("special~name" => T("special"), T("\u{00A0}"), T("name"));
t!("special\\~name" => T("special"), T("~"), T("name"));
t!("\\u{1f303}" => T("🌃"));
t!("\n\n\nhello" => P, T("hello"));

View File

@ -265,6 +265,9 @@ impl<'s> Iterator for Tokens<'s> {
'_' if self.mode == Body => Underscore,
'`' if self.mode == Body => self.read_raw_or_code(),
// Non-breaking spaces.
'~' if self.mode == Body => Text("\u{00A0}"),
// An escaped thing.
'\\' if self.mode == Body => self.read_escaped(),
@ -279,7 +282,7 @@ impl<'s> Iterator for Tokens<'s> {
let val = match n {
c if c.is_whitespace() => true,
'[' | ']' | '{' | '}' | '/' | '*' => true,
'\\' | '_' | '`' if body => true,
'\\' | '_' | '`' | '~' if body => true,
':' | '=' | ',' | '"' | '(' | ')' if !body => true,
'+' | '-' if !body && !last_was_e => true,
_ => false,
@ -646,6 +649,7 @@ mod tests {
t!(Body, " \n\t \n " => S(2));
t!(Body, "\n\r" => S(2));
t!(Body, " \r\r\n \x0D" => S(3));
t!(Body, "a~b" => T("a"), T("\u{00A0}"), T("b"));
}
#[test]