Rename CharParser to Scanner ✏

2025-07-04 03:02:53 +08:00 · 2020-10-01 11:05:16 +02:00 · 2020-10-01 11:05:16 +02:00 · 16f0bd430e
commit 16f0bd430e
parent c0998b4802
5 changed files with 68 additions and 68 deletions
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -1,11 +1,11 @@
 //! Parsing and tokenization.

-mod chars;
 mod resolve;
+mod scanner;
 mod tokens;

-pub use chars::*;
 pub use resolve::*;
+pub use scanner::*;
 pub use tokens::*;

 use std::str::FromStr;
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@ -1,41 +1,41 @@
 //! Resolve strings and raw blocks.

-use super::{is_newline_char, CharParser};
+use super::{is_newline_char, Scanner};
 use crate::syntax::{Ident, Raw};

 /// Resolves all escape sequences in a string.
 pub fn resolve_string(string: &str) -> String {
    let mut out = String::with_capacity(string.len());
-    let mut p = CharParser::new(string);
+    let mut s = Scanner::new(string);

-    while let Some(c) = p.eat() {
+    while let Some(c) = s.eat() {
        if c != '\\' {
            out.push(c);
            continue;
        }

-        let start = p.prev_index();
-        match p.eat() {
+        let start = s.prev_index();
+        match s.eat() {
            Some('\\') => out.push('\\'),
            Some('"') => out.push('"'),

            Some('n') => out.push('\n'),
            Some('t') => out.push('\t'),
-            Some('u') if p.eat_if('{') => {
+            Some('u') if s.eat_if('{') => {
                // TODO: Feedback if closing brace is missing.
-                let sequence = p.eat_while(|c| c.is_ascii_hexdigit());
-                let _terminated = p.eat_if('}');
+                let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
+                let _terminated = s.eat_if('}');

                if let Some(c) = resolve_hex(sequence) {
                    out.push(c);
                } else {
                    // TODO: Feedback that escape sequence is wrong.
-                    out += p.eaten_from(start);
+                    out += s.eaten_from(start);
                }
            }

            // TODO: Feedback about invalid escape sequence.
-            _ => out += p.eaten_from(start),
+            _ => out += s.eaten_from(start),
        }
    }

@ -69,10 +69,10 @@ pub fn resolve_raw(raw: &str, backticks: usize) -> Raw {

 /// Parse the lang tag and return it alongside the remaining inner raw text.
 fn split_at_lang_tag(raw: &str) -> (&str, &str) {
-    let mut p = CharParser::new(raw);
+    let mut s = Scanner::new(raw);
    (
-        p.eat_until(|c| c == '`' || c.is_whitespace() || is_newline_char(c)),
-        p.rest(),
+        s.eat_until(|c| c == '`' || c.is_whitespace() || is_newline_char(c)),
+        s.rest(),
    )
 }

@ -104,11 +104,11 @@ fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
 /// Splits a string into a vector of lines (respecting Unicode & Windows line
 /// breaks).
 pub fn split_lines(text: &str) -> Vec<String> {
-    let mut p = CharParser::new(text);
+    let mut s = Scanner::new(text);
    let mut line = String::new();
    let mut lines = Vec::new();

-    while let Some(c) = p.eat_merging_crlf() {
+    while let Some(c) = s.eat_merging_crlf() {
        if is_newline_char(c) {
            lines.push(std::mem::take(&mut line));
        } else {
--- a/src/parse/scanner.rs
+++ b/src/parse/scanner.rs
@ -1,18 +1,18 @@
-//! Low-level char parser.
+//! Low-level char-based scanner.

 use std::fmt::{self, Debug, Formatter};
 use std::slice::SliceIndex;
 use std::str::Chars;

-/// A low-level featureful char parser.
-pub struct CharParser<'s> {
+/// A low-level featureful char scanner.
+pub struct Scanner<'s> {
    src: &'s str,
    iter: Chars<'s>,
    index: usize,
 }

-impl<'s> CharParser<'s> {
-    /// Create a new char parser.
+impl<'s> Scanner<'s> {
+    /// Create a new char scanner.
    pub fn new(src: &'s str) -> Self {
        Self { src, iter: src.chars(), index: 0 }
    }
@ -104,7 +104,7 @@ impl<'s> CharParser<'s> {
    }
 }

-impl<'s> CharParser<'s> {
+impl<'s> Scanner<'s> {
    /// Slice a part out of the source string.
    pub fn get<I>(&self, index: I) -> &'s str
    where
@ -153,9 +153,9 @@ impl<'s> CharParser<'s> {
    }
 }

-impl Debug for CharParser<'_> {
+impl Debug for Scanner<'_> {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "CharParser({}|{})", self.eaten(), self.rest())
+        write!(f, "Scanner({}|{})", self.eaten(), self.rest())
    }
 }

--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@ -1,6 +1,6 @@
 //! Tokenization.

-use super::{is_newline_char, CharParser};
+use super::{is_newline_char, Scanner};
 use crate::length::Length;
 use crate::syntax::{Ident, Pos, Span, SpanWith, Spanned, Token};

@ -9,7 +9,7 @@ use TokenMode::*;
 /// An iterator over the tokens of a string of source code.
 #[derive(Debug)]
 pub struct Tokens<'s> {
-    p: CharParser<'s>,
+    s: Scanner<'s>,
    mode: TokenMode,
    stack: Vec<TokenMode>,
 }
@ -27,7 +27,7 @@ impl<'s> Tokens<'s> {
    /// Create a new token iterator with the given mode.
    pub fn new(src: &'s str, mode: TokenMode) -> Self {
        Self {
-            p: CharParser::new(src),
+            s: Scanner::new(src),
            mode,
            stack: vec![],
        }
@ -48,7 +48,7 @@ impl<'s> Tokens<'s> {
    /// The position in the string at which the last token ends and next token
    /// will start.
    pub fn pos(&self) -> Pos {
-        self.p.index().into()
+        self.s.index().into()
    }
 }

@ -57,15 +57,15 @@ impl<'s> Iterator for Tokens<'s> {

    /// Parse the next token in the source code.
    fn next(&mut self) -> Option<Self::Item> {
-        let start = self.p.index();
-        let token = match self.p.eat()? {
+        let start = self.s.index();
+        let token = match self.s.eat()? {
            // Whitespace.
            c if c.is_whitespace() => self.read_whitespace(c),

            // Comments.
-            '/' if self.p.eat_if('/') => self.read_line_comment(),
-            '/' if self.p.eat_if('*') => self.read_block_comment(),
-            '*' if self.p.eat_if('/') => Token::Invalid("*/"),
+            '/' if self.s.eat_if('/') => self.read_line_comment(),
+            '/' if self.s.eat_if('*') => self.read_block_comment(),
+            '*' if self.s.eat_if('/') => Token::Invalid("*/"),

            // Functions.
            '[' => Token::LeftBracket,
@ -87,7 +87,7 @@ impl<'s> Iterator for Tokens<'s> {
            ':' if self.mode == Header => Token::Colon,
            ',' if self.mode == Header => Token::Comma,
            '=' if self.mode == Header => Token::Equals,
-            '>' if self.mode == Header && self.p.eat_if('>') => Token::Chain,
+            '>' if self.mode == Header && self.s.eat_if('>') => Token::Chain,

            // Expressions in headers.
            '+' if self.mode == Header => Token::Plus,
@ -101,7 +101,7 @@ impl<'s> Iterator for Tokens<'s> {
            _ => self.read_text_or_expr(start),
        };

-        let end = self.p.index();
+        let end = self.s.index();
        Some(token.span_with(Span::new(start, end)))
    }
 }
@ -109,21 +109,21 @@ impl<'s> Iterator for Tokens<'s> {
 impl<'s> Tokens<'s> {
    fn read_whitespace(&mut self, first: char) -> Token<'s> {
        // Shortcut for common case of exactly one space.
-        if first == ' ' && !self.p.check(|c| c.is_whitespace()) {
+        if first == ' ' && !self.s.check(|c| c.is_whitespace()) {
            return Token::Space(0);
        }

        // Uneat the first char if it's a newline, so that it's counted in the
        // loop.
        if is_newline_char(first) {
-            self.p.uneat();
+            self.s.uneat();
        }

        // Count the number of newlines.
        let mut newlines = 0;
-        while let Some(c) = self.p.eat_merging_crlf() {
+        while let Some(c) = self.s.eat_merging_crlf() {
            if !c.is_whitespace() {
-                self.p.uneat();
+                self.s.uneat();
                break;
            }

@ -136,17 +136,17 @@ impl<'s> Tokens<'s> {
    }

    fn read_line_comment(&mut self) -> Token<'s> {
-        Token::LineComment(self.p.eat_until(is_newline_char))
+        Token::LineComment(self.s.eat_until(is_newline_char))
    }

    fn read_block_comment(&mut self) -> Token<'s> {
-        let start = self.p.index();
+        let start = self.s.index();

        let mut state = '_';
        let mut depth = 1;

        // Find the first `*/` that does not correspond to a nested `/*`.
-        while let Some(c) = self.p.eat() {
+        while let Some(c) = self.s.eat() {
            state = match (state, c) {
                ('*', '/') => {
                    depth -= 1;
@ -164,21 +164,21 @@ impl<'s> Tokens<'s> {
        }

        let terminated = depth == 0;
-        let end = self.p.index() - if terminated { 2 } else { 0 };
+        let end = self.s.index() - if terminated { 2 } else { 0 };

-        Token::BlockComment(self.p.get(start .. end))
+        Token::BlockComment(self.s.get(start .. end))
    }

    fn read_hex(&mut self) -> Token<'s> {
        // This parses more than the permissable 0-9, a-f, A-F character ranges
        // to provide nicer error messages later.
-        Token::Hex(self.p.eat_while(|c| c.is_ascii_alphanumeric()))
+        Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric()))
    }

    fn read_string(&mut self) -> Token<'s> {
        let mut escaped = false;
        Token::Str {
-            string: self.p.eat_until(|c| {
+            string: self.s.eat_until(|c| {
                if c == '"' && !escaped {
                    true
                } else {
@ -186,21 +186,21 @@ impl<'s> Tokens<'s> {
                    false
                }
            }),
-            terminated: self.p.eat_if('"'),
+            terminated: self.s.eat_if('"'),
        }
    }

    fn read_raw(&mut self) -> Token<'s> {
        let mut backticks = 1;
-        while self.p.eat_if('`') {
+        while self.s.eat_if('`') {
            backticks += 1;
        }

-        let start = self.p.index();
+        let start = self.s.index();

        let mut found = 0;
        while found < backticks {
-            match self.p.eat() {
+            match self.s.eat() {
                Some('`') => found += 1,
                Some(_) => found = 0,
                None => break,
@ -208,29 +208,29 @@ impl<'s> Tokens<'s> {
        }

        let terminated = found == backticks;
-        let end = self.p.index() - if terminated { found } else { 0 };
+        let end = self.s.index() - if terminated { found } else { 0 };

        Token::Raw {
-            raw: self.p.get(start .. end),
+            raw: self.s.get(start .. end),
            backticks,
            terminated,
        }
    }

    fn read_escaped(&mut self) -> Token<'s> {
-        if let Some(c) = self.p.peek() {
+        if let Some(c) = self.s.peek() {
            match c {
                '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => {
-                    let start = self.p.index();
-                    self.p.eat_assert(c);
-                    Token::Text(&self.p.eaten_from(start))
+                    let start = self.s.index();
+                    self.s.eat_assert(c);
+                    Token::Text(&self.s.eaten_from(start))
                }
-                'u' if self.p.peek_nth(1) == Some('{') => {
-                    self.p.eat_assert('u');
-                    self.p.eat_assert('{');
+                'u' if self.s.peek_nth(1) == Some('{') => {
+                    self.s.eat_assert('u');
+                    self.s.eat_assert('{');
                    Token::UnicodeEscape {
-                        sequence: self.p.eat_while(|c| c.is_ascii_hexdigit()),
-                        terminated: self.p.eat_if('}'),
+                        sequence: self.s.eat_while(|c| c.is_ascii_hexdigit()),
+                        terminated: self.s.eat_if('}'),
                    }
                }
                c if c.is_whitespace() => Token::Backslash,
@ -246,7 +246,7 @@ impl<'s> Tokens<'s> {
        let header = self.mode == Header;

        let mut last_was_e = false;
-        self.p.eat_until(|c| {
+        self.s.eat_until(|c| {
            let end = match c {
                c if c.is_whitespace() => true,
                '[' | ']' | '*' | '/' => true,
@ -259,7 +259,7 @@ impl<'s> Tokens<'s> {
            end
        });

-        let read = self.p.eaten_from(start);
+        let read = self.s.eaten_from(start);
        if self.mode == Header {
            parse_expr(read)
        } else {
--- a/src/syntax/lines.rs
+++ b/src/syntax/lines.rs
@ -3,7 +3,7 @@
 use std::fmt::{self, Debug, Display, Formatter};

 use super::Pos;
-use crate::parse::{is_newline_char, CharParser};
+use crate::parse::{is_newline_char, Scanner};

 /// Enables conversion of byte position to locations.
 pub struct LineMap<'s> {
@ -15,11 +15,11 @@ impl<'s> LineMap<'s> {
    /// Create a new line map for a source string.
    pub fn new(src: &'s str) -> Self {
        let mut line_starts = vec![Pos::ZERO];
-        let mut p = CharParser::new(src);
+        let mut s = Scanner::new(src);

-        while let Some(c) = p.eat_merging_crlf() {
+        while let Some(c) = s.eat_merging_crlf() {
            if is_newline_char(c) {
-                line_starts.push(p.index().into());
+                line_starts.push(s.index().into());
            }
        }