diff --git a/src/syntax/expr.rs b/src/syntax/expr.rs index a1680861f..ed5e50df8 100644 --- a/src/syntax/expr.rs +++ b/src/syntax/expr.rs @@ -166,27 +166,6 @@ impl Display for Ident { debug_display!(Ident); -/// Whether this word is a valid identifier. -pub fn is_identifier(string: &str) -> bool { - let mut chars = string.chars(); - - match chars.next() { - Some('-') => {} - Some(c) if UnicodeXID::is_xid_start(c) => {} - _ => return false, - } - - while let Some(c) = chars.next() { - match c { - '.' | '-' => {} - c if UnicodeXID::is_xid_continue(c) => {} - _ => return false, - } - } - - true -} - /// Kinds of expressions. pub trait ExpressionKind: Sized { const NAME: &'static str; diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index d355b3ccf..ae5cfe480 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -72,9 +72,11 @@ pub fn tokenize(src: &str) -> Tokens { /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { src: &'s str, - chars: Characters<'s>, state: State, stack: Vec, + iter: Peekable>, + position: Position, + index: usize, } #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -88,9 +90,11 @@ impl<'s> Tokens<'s> { pub fn new(src: &'s str) -> Tokens<'s> { Tokens { src, - chars: Characters::new(src), state: State::Body, stack: vec![], + iter: src.chars().peekable(), + position: Position::ZERO, + index: 0, } } } @@ -100,26 +104,29 @@ impl<'s> Iterator for Tokens<'s> { /// Parse the next token in the source code. fn next(&mut self) -> Option>> { - let start = self.chars.position(); - let first = self.chars.next()?; - let second = self.chars.peek(); + let start = self.pos(); + let first = self.eat()?; let token = match first { // Comments. - '/' if second == Some('/') => self.parse_line_comment(), - '/' if second == Some('*') => self.parse_block_comment(), - '*' if second == Some('/') => { self.eat(); StarSlash } + '/' if self.peek() == Some('/') => self.parse_line_comment(), + '/' if self.peek() == Some('*') => self.parse_block_comment(), + '*' if self.peek() == Some('/') => { self.eat(); StarSlash } // Whitespace. c if c.is_whitespace() => self.parse_whitespace(start), // Functions. - '[' => { self.set_state(Header); LeftBracket } + '[' => { + self.stack.push(self.state); + self.state = Header; + LeftBracket + } ']' => { - if self.state == Header && second == Some('[') { + if self.state == Header && self.peek() == Some('[') { self.state = StartBody; } else { - self.pop_state(); + self.state = self.stack.pop().unwrap_or(Body); } RightBracket @@ -164,7 +171,7 @@ impl<'s> Iterator for Tokens<'s> { } }; - let end = self.chars.position(); + let end = self.pos(); let span = Span { start, end }; Some(Spanned { v: token, span }) @@ -206,7 +213,7 @@ impl<'s> Tokens<'s> { fn parse_whitespace(&mut self, start: Position) -> Token<'s> { self.read_string_until(|n| !n.is_whitespace(), false, 0, 0); - let end = self.chars.position(); + let end = self.pos(); Whitespace(end.line - start.line) } @@ -234,9 +241,9 @@ impl<'s> Tokens<'s> { } } - let c = self.chars.peek().unwrap_or('n'); + let c = self.peek().unwrap_or('n'); if self.state == Body && is_escapable(c) { - let index = self.chars.index(); + let index = self.index(); self.eat(); Text(&self.src[index .. index + c.len_utf8()]) } else { @@ -267,22 +274,22 @@ impl<'s> Tokens<'s> { offset_start: isize, offset_end: isize, ) -> &'s str where F: FnMut(char) -> bool { - let start = ((self.chars.index() as isize) + offset_start) as usize; + let start = ((self.index() as isize) + offset_start) as usize; let mut matched = false; - while let Some(c) = self.chars.peek() { + while let Some(c) = self.peek() { if f(c) { matched = true; if eat_match { - self.chars.next(); + self.eat(); } break; } - self.chars.next(); + self.eat(); } - let mut end = self.chars.index(); + let mut end = self.index(); if matched { end = ((end as isize) + offset_end) as usize; } @@ -290,55 +297,7 @@ impl<'s> Tokens<'s> { &self.src[start .. end] } - fn set_state(&mut self, state: State) { - self.stack.push(self.state); - self.state = state; - } - - fn pop_state(&mut self) { - self.state = self.stack.pop().unwrap_or(Body); - } - - fn eat(&mut self) { - self.chars.next(); - } -} - -fn parse_percentage(text: &str) -> Option { - if text.ends_with('%') { - text[.. text.len() - 1].parse::().ok() - } else { - None - } -} - -/// Whether this character denotes a newline. -fn is_newline_char(character: char) -> bool { - match character { - // Line Feed, Vertical Tab, Form Feed, Carriage Return. - '\x0A' ..= '\x0D' => true, - // Next Line, Line Separator, Paragraph Separator. - '\u{0085}' | '\u{2028}' | '\u{2029}' => true, - _ => false, - } -} - -struct Characters<'s> { - iter: Peekable>, - position: Position, - index: usize, -} - -impl<'s> Characters<'s> { - fn new(src: &'s str) -> Characters<'s> { - Characters { - iter: src.chars().peekable(), - position: Position::ZERO, - index: 0, - } - } - - fn next(&mut self) -> Option { + fn eat(&mut self) -> Option { let c = self.iter.next()?; let len = c.len_utf8(); @@ -362,7 +321,47 @@ impl<'s> Characters<'s> { self.index } - fn position(&self) -> Position { + fn pos(&self) -> Position { self.position } } + +fn parse_percentage(text: &str) -> Option { + if text.ends_with('%') { + text[.. text.len() - 1].parse::().ok() + } else { + None + } +} + +/// Whether this character denotes a newline. +pub fn is_newline_char(character: char) -> bool { + match character { + // Line Feed, Vertical Tab, Form Feed, Carriage Return. + '\x0A' ..= '\x0D' => true, + // Next Line, Line Separator, Paragraph Separator. + '\u{0085}' | '\u{2028}' | '\u{2029}' => true, + _ => false, + } +} + +/// Whether this word is a valid identifier. +pub fn is_identifier(string: &str) -> bool { + let mut chars = string.chars(); + + match chars.next() { + Some('-') => {} + Some(c) if UnicodeXID::is_xid_start(c) => {} + _ => return false, + } + + while let Some(c) = chars.next() { + match c { + '.' | '-' => {} + c if UnicodeXID::is_xid_continue(c) => {} + _ => return false, + } + } + + true +} diff --git a/tests/parse.rs b/tests/parse.rs index 14a5b22d3..02c4f9b7b 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -9,18 +9,12 @@ use Token::{ LeftBracket as LB, RightBracket as RB, LeftParen as LP, RightParen as RP, LeftBrace as LBR, RightBrace as RBR, - Colon as CL, Comma as CM, Equals as EQ, Expr as E, + Colon as CL, Comma as CM, Equals as EQ, + ExprIdent as ID, ExprString as STR, ExprSize as SIZE, + ExprNumber as NUM, ExprBool as BOOL, Star as ST, Underscore as U, Backtick as B, Text as T, }; -use Expression as Expr; -fn ID(ident: &str) -> Token { E(Expr::Ident(Ident::new(ident.to_string()).unwrap())) } -fn STR(ident: &str) -> Token { E(Expr::Str(ident.to_string())) } -fn SIZE(size: Size) -> Token<'static> { E(Expr::Size(size)) } -fn NUM(num: f64) -> Token<'static> { E(Expr::Num(num)) } -fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) } - - /// Parses the test syntax. macro_rules! tokens { ($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({