Refresh tokenizer 🌊

This commit is contained in:
Laurenz 2021-02-17 21:47:24 +01:00
parent 2f4dc99cec
commit e143fd36ef
2 changed files with 53 additions and 62 deletions

View File

@ -73,7 +73,7 @@ impl<'s> Iterator for Tokens<'s> {
'{' => Token::LeftBrace, '{' => Token::LeftBrace,
'}' => Token::RightBrace, '}' => Token::RightBrace,
// Keywords, bracket functions, colors. // Keywords, variables, functions, colors.
'#' => self.hash(start), '#' => self.hash(start),
// Whitespace. // Whitespace.
@ -154,24 +154,27 @@ impl<'s> Iterator for Tokens<'s> {
impl<'s> Tokens<'s> { impl<'s> Tokens<'s> {
fn hash(&mut self, start: usize) -> Token<'s> { fn hash(&mut self, start: usize) -> Token<'s> {
if self.s.eat_if('[') { let read = self.s.eat_while(is_id_continue);
return Token::HashBracket;
}
self.s.eat_while(is_id_continue); match self.mode {
let read = self.s.eaten_from(start); TokenMode::Markup => {
if let Some(token) = keyword(read) {
return token;
}
if let Some(keyword) = keyword(read) { if read.chars().next().map_or(false, is_id_start) {
return keyword; return Token::Ident(read);
} }
}
if self.mode == TokenMode::Code { TokenMode::Code => {
if let Ok(color) = RgbaColor::from_str(read) { if let Ok(color) = RgbaColor::from_str(read) {
return Token::Color(color); return Token::Color(color);
}
} }
} }
Token::Invalid(read) Token::Invalid(self.s.eaten_from(start))
} }
fn whitespace(&mut self, first: char) -> Token<'s> { fn whitespace(&mut self, first: char) -> Token<'s> {
@ -329,7 +332,7 @@ impl<'s> Tokens<'s> {
"none" => Token::None, "none" => Token::None,
"true" => Token::Bool(true), "true" => Token::Bool(true),
"false" => Token::Bool(false), "false" => Token::Bool(false),
id => Token::Ident(id), id => keyword(id).unwrap_or(Token::Ident(id)),
} }
} }
@ -444,15 +447,15 @@ impl Debug for Tokens<'_> {
fn keyword(id: &str) -> Option<Token<'static>> { fn keyword(id: &str) -> Option<Token<'static>> {
Some(match id { Some(match id {
"#let" => Token::Let, "let" => Token::Let,
"#if" => Token::If, "if" => Token::If,
"#else" => Token::Else, "else" => Token::Else,
"#for" => Token::For, "for" => Token::For,
"#in" => Token::In, "in" => Token::In,
"#while" => Token::While, "while" => Token::While,
"#break" => Token::Break, "break" => Token::Break,
"#continue" => Token::Continue, "continue" => Token::Continue,
"#return" => Token::Return, "return" => Token::Return,
_ => return None, _ => return None,
}) })
} }
@ -530,7 +533,6 @@ mod tests {
('/', Some(Markup), "$ $", Math(" ", false, true)), ('/', Some(Markup), "$ $", Math(" ", false, true)),
('/', Some(Markup), r"\\", Text(r"\")), ('/', Some(Markup), r"\\", Text(r"\")),
('/', Some(Markup), "#let", Let), ('/', Some(Markup), "#let", Let),
('/', Some(Code), "#if", If),
('/', Some(Code), "(", LeftParen), ('/', Some(Code), "(", LeftParen),
('/', Some(Code), ":", Colon), ('/', Some(Code), ":", Colon),
('/', Some(Code), "+=", PlusEq), ('/', Some(Code), "+=", PlusEq),
@ -649,7 +651,7 @@ mod tests {
#[test] #[test]
fn test_tokenize_keywords() { fn test_tokenize_keywords() {
let both = [ let keywords = [
("let", Let), ("let", Let),
("if", If), ("if", If),
("else", Else), ("else", Else),
@ -661,33 +663,23 @@ mod tests {
("return", Return), ("return", Return),
]; ];
for &(s, t) in &both { for &(s, t) in &keywords {
t!(Both[" "]: format!("#{}", s) => t); t!(Markup[" "]: format!("#{}", s) => t);
t!(Both[" "]: format!("#{0}#{0}", s) => t, t); t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
t!(Markup[" /"]: format!("# {}", s) => Token::Invalid("#"), Space(0), Text(s)); t!(Markup[" /"]: format!("# {}", s) => Token::Invalid("#"), Space(0), Text(s));
} }
let code = [ for &(s, t) in &keywords {
("not", Not),
("and", And),
("or", Or),
("none", Token::None),
("false", Bool(false)),
("true", Bool(true)),
];
for &(s, t) in &code {
t!(Code[" "]: s => t); t!(Code[" "]: s => t);
t!(Markup[" /"]: s => Text(s)); t!(Markup[" /"]: s => Text(s));
} }
// Test invalid case. // Test simple identifier.
t!(Markup[" "]: "#letter" => Ident("letter"));
t!(Markup[" "]: "#123" => Invalid("#123"));
t!(Code[" /"]: "falser" => Ident("falser"));
t!(Code[" /"]: "None" => Ident("None")); t!(Code[" /"]: "None" => Ident("None"));
t!(Code[" /"]: "True" => Ident("True")); t!(Code[" /"]: "True" => Ident("True"));
// Test word that contains keyword.
t!(Markup[" "]: "#letter" => Invalid("#letter"));
t!(Code[" /"]: "falser" => Ident("falser"));
} }
#[test] #[test]
@ -963,7 +955,6 @@ mod tests {
// Test invalid keyword. // Test invalid keyword.
t!(Markup[" /"]: "#-" => Invalid("#-")); t!(Markup[" /"]: "#-" => Invalid("#-"));
t!(Markup[" /"]: "#do" => Invalid("#do"));
t!(Code[" /"]: r"#letter" => Invalid(r"#letter")); t!(Code[" /"]: r"#letter" => Invalid(r"#letter"));
} }
} }

View File

@ -74,23 +74,23 @@ pub enum Token<'s> {
Or, Or,
/// The none literal: `none`. /// The none literal: `none`.
None, None,
/// The `#let` keyword. /// The `let` keyword.
Let, Let,
/// The `#if` keyword. /// The `if` keyword.
If, If,
/// The `#else` keyword. /// The `else` keyword.
Else, Else,
/// The `#for` keyword. /// The `for` keyword.
For, For,
/// The `#in` keyword. /// The `in` keyword.
In, In,
/// The `#while` keyword. /// The `while` keyword.
While, While,
/// The `#break` keyword. /// The `break` keyword.
Break, Break,
/// The `#continue` keyword. /// The `continue` keyword.
Continue, Continue,
/// The `#return` keyword. /// The `return` keyword.
Return, Return,
/// One or more whitespace characters. /// One or more whitespace characters.
/// ///
@ -225,15 +225,15 @@ impl<'s> Token<'s> {
Self::And => "operator `and`", Self::And => "operator `and`",
Self::Or => "operator `or`", Self::Or => "operator `or`",
Self::None => "`none`", Self::None => "`none`",
Self::Let => "keyword `#let`", Self::Let => "keyword `let`",
Self::If => "keyword `#if`", Self::If => "keyword `if`",
Self::Else => "keyword `#else`", Self::Else => "keyword `else`",
Self::For => "keyword `#for`", Self::For => "keyword `for`",
Self::In => "keyword `#in`", Self::In => "keyword `in`",
Self::While => "keyword `#while`", Self::While => "keyword `while`",
Self::Break => "keyword `#break`", Self::Break => "keyword `break`",
Self::Continue => "keyword `#continue`", Self::Continue => "keyword `continue`",
Self::Return => "keyword `#return`", Self::Return => "keyword `return`",
Self::Space(_) => "space", Self::Space(_) => "space",
Self::Text(_) => "text", Self::Text(_) => "text",
Self::Raw(_) => "raw block", Self::Raw(_) => "raw block",