Simplify tokenizer a bit 🔄

This commit is contained in:
Laurenz 2020-10-01 11:03:42 +02:00
parent 4b9bc66028
commit c0998b4802

View File

@ -74,6 +74,7 @@ impl<'s> Iterator for Tokens<'s> {
'}' => Token::RightBrace, '}' => Token::RightBrace,
// Syntactic elements in body text. // Syntactic elements in body text.
'*' if self.mode == Body => Token::Star,
'_' if self.mode == Body => Token::Underscore, '_' if self.mode == Body => Token::Underscore,
'`' if self.mode == Body => self.read_raw(), '`' if self.mode == Body => self.read_raw(),
'#' if self.mode == Body => Token::Hashtag, '#' if self.mode == Body => Token::Hashtag,
@ -88,17 +89,14 @@ impl<'s> Iterator for Tokens<'s> {
'=' if self.mode == Header => Token::Equals, '=' if self.mode == Header => Token::Equals,
'>' if self.mode == Header && self.p.eat_if('>') => Token::Chain, '>' if self.mode == Header && self.p.eat_if('>') => Token::Chain,
// Expressions. // Expressions in headers.
'+' if self.mode == Header => Token::Plus, '+' if self.mode == Header => Token::Plus,
'-' if self.mode == Header => Token::Hyphen, '-' if self.mode == Header => Token::Hyphen,
'*' if self.mode == Header => Token::Star,
'/' if self.mode == Header => Token::Slash, '/' if self.mode == Header => Token::Slash,
'#' if self.mode == Header => self.read_hex(), '#' if self.mode == Header => self.read_hex(),
'"' if self.mode == Header => self.read_string(), '"' if self.mode == Header => self.read_string(),
// Star serves a double purpose as a style modifier
// and a expression operator in the header.
'*' => Token::Star,
// Expressions or just plain text. // Expressions or just plain text.
_ => self.read_text_or_expr(start), _ => self.read_text_or_expr(start),
}; };
@ -115,7 +113,8 @@ impl<'s> Tokens<'s> {
return Token::Space(0); return Token::Space(0);
} }
// Uneat the first char if it's a newline, so it's counted in the loop. // Uneat the first char if it's a newline, so that it's counted in the
// loop.
if is_newline_char(first) { if is_newline_char(first) {
self.p.uneat(); self.p.uneat();
} }
@ -143,34 +142,31 @@ impl<'s> Tokens<'s> {
fn read_block_comment(&mut self) -> Token<'s> { fn read_block_comment(&mut self) -> Token<'s> {
let start = self.p.index(); let start = self.p.index();
let mut state = '_';
let mut depth = 1; let mut depth = 1;
let mut state = ' ';
// Find the first `*/` that does not correspond to a nested `/*`. // Find the first `*/` that does not correspond to a nested `/*`.
while let Some(c) = self.p.eat() { while let Some(c) = self.p.eat() {
state = match (state, c) { state = match (state, c) {
('*', '/') if depth == 1 => {
depth = 0;
break;
}
('*', '/') => { ('*', '/') => {
depth -= 1; depth -= 1;
' ' if depth == 0 {
break;
}
'_'
} }
('/', '*') => { ('/', '*') => {
depth += 1; depth += 1;
' ' '_'
} }
_ => c, _ => c,
} }
} }
let mut read = self.p.eaten_from(start); let terminated = depth == 0;
if depth == 0 { let end = self.p.index() - if terminated { 2 } else { 0 };
read = read.strip_suffix("*/").unwrap_or(read);
}
Token::BlockComment(read) Token::BlockComment(self.p.get(start .. end))
} }
fn read_hex(&mut self) -> Token<'s> { fn read_hex(&mut self) -> Token<'s> {
@ -201,6 +197,7 @@ impl<'s> Tokens<'s> {
} }
let start = self.p.index(); let start = self.p.index();
let mut found = 0; let mut found = 0;
while found < backticks { while found < backticks {
match self.p.eat() { match self.p.eat() {
@ -288,11 +285,7 @@ fn parse_expr(text: &str) -> Token<'_> {
} }
fn parse_percent(text: &str) -> Option<f64> { fn parse_percent(text: &str) -> Option<f64> {
if text.ends_with('%') { text.strip_suffix('%').and_then(|num| num.parse::<f64>().ok())
text[.. text.len() - 1].parse::<f64>().ok()
} else {
None
}
} }
#[cfg(test)] #[cfg(test)]