Rename CharParser to Scanner ✏

This commit is contained in:
Laurenz 2020-10-01 11:05:16 +02:00
parent c0998b4802
commit 16f0bd430e
5 changed files with 68 additions and 68 deletions

View File

@ -1,11 +1,11 @@
//! Parsing and tokenization. //! Parsing and tokenization.
mod chars;
mod resolve; mod resolve;
mod scanner;
mod tokens; mod tokens;
pub use chars::*;
pub use resolve::*; pub use resolve::*;
pub use scanner::*;
pub use tokens::*; pub use tokens::*;
use std::str::FromStr; use std::str::FromStr;

View File

@ -1,41 +1,41 @@
//! Resolve strings and raw blocks. //! Resolve strings and raw blocks.
use super::{is_newline_char, CharParser}; use super::{is_newline_char, Scanner};
use crate::syntax::{Ident, Raw}; use crate::syntax::{Ident, Raw};
/// Resolves all escape sequences in a string. /// Resolves all escape sequences in a string.
pub fn resolve_string(string: &str) -> String { pub fn resolve_string(string: &str) -> String {
let mut out = String::with_capacity(string.len()); let mut out = String::with_capacity(string.len());
let mut p = CharParser::new(string); let mut s = Scanner::new(string);
while let Some(c) = p.eat() { while let Some(c) = s.eat() {
if c != '\\' { if c != '\\' {
out.push(c); out.push(c);
continue; continue;
} }
let start = p.prev_index(); let start = s.prev_index();
match p.eat() { match s.eat() {
Some('\\') => out.push('\\'), Some('\\') => out.push('\\'),
Some('"') => out.push('"'), Some('"') => out.push('"'),
Some('n') => out.push('\n'), Some('n') => out.push('\n'),
Some('t') => out.push('\t'), Some('t') => out.push('\t'),
Some('u') if p.eat_if('{') => { Some('u') if s.eat_if('{') => {
// TODO: Feedback if closing brace is missing. // TODO: Feedback if closing brace is missing.
let sequence = p.eat_while(|c| c.is_ascii_hexdigit()); let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
let _terminated = p.eat_if('}'); let _terminated = s.eat_if('}');
if let Some(c) = resolve_hex(sequence) { if let Some(c) = resolve_hex(sequence) {
out.push(c); out.push(c);
} else { } else {
// TODO: Feedback that escape sequence is wrong. // TODO: Feedback that escape sequence is wrong.
out += p.eaten_from(start); out += s.eaten_from(start);
} }
} }
// TODO: Feedback about invalid escape sequence. // TODO: Feedback about invalid escape sequence.
_ => out += p.eaten_from(start), _ => out += s.eaten_from(start),
} }
} }
@ -69,10 +69,10 @@ pub fn resolve_raw(raw: &str, backticks: usize) -> Raw {
/// Parse the lang tag and return it alongside the remaining inner raw text. /// Parse the lang tag and return it alongside the remaining inner raw text.
fn split_at_lang_tag(raw: &str) -> (&str, &str) { fn split_at_lang_tag(raw: &str) -> (&str, &str) {
let mut p = CharParser::new(raw); let mut s = Scanner::new(raw);
( (
p.eat_until(|c| c == '`' || c.is_whitespace() || is_newline_char(c)), s.eat_until(|c| c == '`' || c.is_whitespace() || is_newline_char(c)),
p.rest(), s.rest(),
) )
} }
@ -104,11 +104,11 @@ fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
/// Splits a string into a vector of lines (respecting Unicode & Windows line /// Splits a string into a vector of lines (respecting Unicode & Windows line
/// breaks). /// breaks).
pub fn split_lines(text: &str) -> Vec<String> { pub fn split_lines(text: &str) -> Vec<String> {
let mut p = CharParser::new(text); let mut s = Scanner::new(text);
let mut line = String::new(); let mut line = String::new();
let mut lines = Vec::new(); let mut lines = Vec::new();
while let Some(c) = p.eat_merging_crlf() { while let Some(c) = s.eat_merging_crlf() {
if is_newline_char(c) { if is_newline_char(c) {
lines.push(std::mem::take(&mut line)); lines.push(std::mem::take(&mut line));
} else { } else {

View File

@ -1,18 +1,18 @@
//! Low-level char parser. //! Low-level char-based scanner.
use std::fmt::{self, Debug, Formatter}; use std::fmt::{self, Debug, Formatter};
use std::slice::SliceIndex; use std::slice::SliceIndex;
use std::str::Chars; use std::str::Chars;
/// A low-level featureful char parser. /// A low-level featureful char scanner.
pub struct CharParser<'s> { pub struct Scanner<'s> {
src: &'s str, src: &'s str,
iter: Chars<'s>, iter: Chars<'s>,
index: usize, index: usize,
} }
impl<'s> CharParser<'s> { impl<'s> Scanner<'s> {
/// Create a new char parser. /// Create a new char scanner.
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
Self { src, iter: src.chars(), index: 0 } Self { src, iter: src.chars(), index: 0 }
} }
@ -104,7 +104,7 @@ impl<'s> CharParser<'s> {
} }
} }
impl<'s> CharParser<'s> { impl<'s> Scanner<'s> {
/// Slice a part out of the source string. /// Slice a part out of the source string.
pub fn get<I>(&self, index: I) -> &'s str pub fn get<I>(&self, index: I) -> &'s str
where where
@ -153,9 +153,9 @@ impl<'s> CharParser<'s> {
} }
} }
impl Debug for CharParser<'_> { impl Debug for Scanner<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "CharParser({}|{})", self.eaten(), self.rest()) write!(f, "Scanner({}|{})", self.eaten(), self.rest())
} }
} }

View File

@ -1,6 +1,6 @@
//! Tokenization. //! Tokenization.
use super::{is_newline_char, CharParser}; use super::{is_newline_char, Scanner};
use crate::length::Length; use crate::length::Length;
use crate::syntax::{Ident, Pos, Span, SpanWith, Spanned, Token}; use crate::syntax::{Ident, Pos, Span, SpanWith, Spanned, Token};
@ -9,7 +9,7 @@ use TokenMode::*;
/// An iterator over the tokens of a string of source code. /// An iterator over the tokens of a string of source code.
#[derive(Debug)] #[derive(Debug)]
pub struct Tokens<'s> { pub struct Tokens<'s> {
p: CharParser<'s>, s: Scanner<'s>,
mode: TokenMode, mode: TokenMode,
stack: Vec<TokenMode>, stack: Vec<TokenMode>,
} }
@ -27,7 +27,7 @@ impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode. /// Create a new token iterator with the given mode.
pub fn new(src: &'s str, mode: TokenMode) -> Self { pub fn new(src: &'s str, mode: TokenMode) -> Self {
Self { Self {
p: CharParser::new(src), s: Scanner::new(src),
mode, mode,
stack: vec![], stack: vec![],
} }
@ -48,7 +48,7 @@ impl<'s> Tokens<'s> {
/// The position in the string at which the last token ends and next token /// The position in the string at which the last token ends and next token
/// will start. /// will start.
pub fn pos(&self) -> Pos { pub fn pos(&self) -> Pos {
self.p.index().into() self.s.index().into()
} }
} }
@ -57,15 +57,15 @@ impl<'s> Iterator for Tokens<'s> {
/// Parse the next token in the source code. /// Parse the next token in the source code.
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let start = self.p.index(); let start = self.s.index();
let token = match self.p.eat()? { let token = match self.s.eat()? {
// Whitespace. // Whitespace.
c if c.is_whitespace() => self.read_whitespace(c), c if c.is_whitespace() => self.read_whitespace(c),
// Comments. // Comments.
'/' if self.p.eat_if('/') => self.read_line_comment(), '/' if self.s.eat_if('/') => self.read_line_comment(),
'/' if self.p.eat_if('*') => self.read_block_comment(), '/' if self.s.eat_if('*') => self.read_block_comment(),
'*' if self.p.eat_if('/') => Token::Invalid("*/"), '*' if self.s.eat_if('/') => Token::Invalid("*/"),
// Functions. // Functions.
'[' => Token::LeftBracket, '[' => Token::LeftBracket,
@ -87,7 +87,7 @@ impl<'s> Iterator for Tokens<'s> {
':' if self.mode == Header => Token::Colon, ':' if self.mode == Header => Token::Colon,
',' if self.mode == Header => Token::Comma, ',' if self.mode == Header => Token::Comma,
'=' if self.mode == Header => Token::Equals, '=' if self.mode == Header => Token::Equals,
'>' if self.mode == Header && self.p.eat_if('>') => Token::Chain, '>' if self.mode == Header && self.s.eat_if('>') => Token::Chain,
// Expressions in headers. // Expressions in headers.
'+' if self.mode == Header => Token::Plus, '+' if self.mode == Header => Token::Plus,
@ -101,7 +101,7 @@ impl<'s> Iterator for Tokens<'s> {
_ => self.read_text_or_expr(start), _ => self.read_text_or_expr(start),
}; };
let end = self.p.index(); let end = self.s.index();
Some(token.span_with(Span::new(start, end))) Some(token.span_with(Span::new(start, end)))
} }
} }
@ -109,21 +109,21 @@ impl<'s> Iterator for Tokens<'s> {
impl<'s> Tokens<'s> { impl<'s> Tokens<'s> {
fn read_whitespace(&mut self, first: char) -> Token<'s> { fn read_whitespace(&mut self, first: char) -> Token<'s> {
// Shortcut for common case of exactly one space. // Shortcut for common case of exactly one space.
if first == ' ' && !self.p.check(|c| c.is_whitespace()) { if first == ' ' && !self.s.check(|c| c.is_whitespace()) {
return Token::Space(0); return Token::Space(0);
} }
// Uneat the first char if it's a newline, so that it's counted in the // Uneat the first char if it's a newline, so that it's counted in the
// loop. // loop.
if is_newline_char(first) { if is_newline_char(first) {
self.p.uneat(); self.s.uneat();
} }
// Count the number of newlines. // Count the number of newlines.
let mut newlines = 0; let mut newlines = 0;
while let Some(c) = self.p.eat_merging_crlf() { while let Some(c) = self.s.eat_merging_crlf() {
if !c.is_whitespace() { if !c.is_whitespace() {
self.p.uneat(); self.s.uneat();
break; break;
} }
@ -136,17 +136,17 @@ impl<'s> Tokens<'s> {
} }
fn read_line_comment(&mut self) -> Token<'s> { fn read_line_comment(&mut self) -> Token<'s> {
Token::LineComment(self.p.eat_until(is_newline_char)) Token::LineComment(self.s.eat_until(is_newline_char))
} }
fn read_block_comment(&mut self) -> Token<'s> { fn read_block_comment(&mut self) -> Token<'s> {
let start = self.p.index(); let start = self.s.index();
let mut state = '_'; let mut state = '_';
let mut depth = 1; let mut depth = 1;
// Find the first `*/` that does not correspond to a nested `/*`. // Find the first `*/` that does not correspond to a nested `/*`.
while let Some(c) = self.p.eat() { while let Some(c) = self.s.eat() {
state = match (state, c) { state = match (state, c) {
('*', '/') => { ('*', '/') => {
depth -= 1; depth -= 1;
@ -164,21 +164,21 @@ impl<'s> Tokens<'s> {
} }
let terminated = depth == 0; let terminated = depth == 0;
let end = self.p.index() - if terminated { 2 } else { 0 }; let end = self.s.index() - if terminated { 2 } else { 0 };
Token::BlockComment(self.p.get(start .. end)) Token::BlockComment(self.s.get(start .. end))
} }
fn read_hex(&mut self) -> Token<'s> { fn read_hex(&mut self) -> Token<'s> {
// This parses more than the permissable 0-9, a-f, A-F character ranges // This parses more than the permissable 0-9, a-f, A-F character ranges
// to provide nicer error messages later. // to provide nicer error messages later.
Token::Hex(self.p.eat_while(|c| c.is_ascii_alphanumeric())) Token::Hex(self.s.eat_while(|c| c.is_ascii_alphanumeric()))
} }
fn read_string(&mut self) -> Token<'s> { fn read_string(&mut self) -> Token<'s> {
let mut escaped = false; let mut escaped = false;
Token::Str { Token::Str {
string: self.p.eat_until(|c| { string: self.s.eat_until(|c| {
if c == '"' && !escaped { if c == '"' && !escaped {
true true
} else { } else {
@ -186,21 +186,21 @@ impl<'s> Tokens<'s> {
false false
} }
}), }),
terminated: self.p.eat_if('"'), terminated: self.s.eat_if('"'),
} }
} }
fn read_raw(&mut self) -> Token<'s> { fn read_raw(&mut self) -> Token<'s> {
let mut backticks = 1; let mut backticks = 1;
while self.p.eat_if('`') { while self.s.eat_if('`') {
backticks += 1; backticks += 1;
} }
let start = self.p.index(); let start = self.s.index();
let mut found = 0; let mut found = 0;
while found < backticks { while found < backticks {
match self.p.eat() { match self.s.eat() {
Some('`') => found += 1, Some('`') => found += 1,
Some(_) => found = 0, Some(_) => found = 0,
None => break, None => break,
@ -208,29 +208,29 @@ impl<'s> Tokens<'s> {
} }
let terminated = found == backticks; let terminated = found == backticks;
let end = self.p.index() - if terminated { found } else { 0 }; let end = self.s.index() - if terminated { found } else { 0 };
Token::Raw { Token::Raw {
raw: self.p.get(start .. end), raw: self.s.get(start .. end),
backticks, backticks,
terminated, terminated,
} }
} }
fn read_escaped(&mut self) -> Token<'s> { fn read_escaped(&mut self) -> Token<'s> {
if let Some(c) = self.p.peek() { if let Some(c) = self.s.peek() {
match c { match c {
'[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => { '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' | '#' | '~' => {
let start = self.p.index(); let start = self.s.index();
self.p.eat_assert(c); self.s.eat_assert(c);
Token::Text(&self.p.eaten_from(start)) Token::Text(&self.s.eaten_from(start))
} }
'u' if self.p.peek_nth(1) == Some('{') => { 'u' if self.s.peek_nth(1) == Some('{') => {
self.p.eat_assert('u'); self.s.eat_assert('u');
self.p.eat_assert('{'); self.s.eat_assert('{');
Token::UnicodeEscape { Token::UnicodeEscape {
sequence: self.p.eat_while(|c| c.is_ascii_hexdigit()), sequence: self.s.eat_while(|c| c.is_ascii_hexdigit()),
terminated: self.p.eat_if('}'), terminated: self.s.eat_if('}'),
} }
} }
c if c.is_whitespace() => Token::Backslash, c if c.is_whitespace() => Token::Backslash,
@ -246,7 +246,7 @@ impl<'s> Tokens<'s> {
let header = self.mode == Header; let header = self.mode == Header;
let mut last_was_e = false; let mut last_was_e = false;
self.p.eat_until(|c| { self.s.eat_until(|c| {
let end = match c { let end = match c {
c if c.is_whitespace() => true, c if c.is_whitespace() => true,
'[' | ']' | '*' | '/' => true, '[' | ']' | '*' | '/' => true,
@ -259,7 +259,7 @@ impl<'s> Tokens<'s> {
end end
}); });
let read = self.p.eaten_from(start); let read = self.s.eaten_from(start);
if self.mode == Header { if self.mode == Header {
parse_expr(read) parse_expr(read)
} else { } else {

View File

@ -3,7 +3,7 @@
use std::fmt::{self, Debug, Display, Formatter}; use std::fmt::{self, Debug, Display, Formatter};
use super::Pos; use super::Pos;
use crate::parse::{is_newline_char, CharParser}; use crate::parse::{is_newline_char, Scanner};
/// Enables conversion of byte position to locations. /// Enables conversion of byte position to locations.
pub struct LineMap<'s> { pub struct LineMap<'s> {
@ -15,11 +15,11 @@ impl<'s> LineMap<'s> {
/// Create a new line map for a source string. /// Create a new line map for a source string.
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
let mut line_starts = vec![Pos::ZERO]; let mut line_starts = vec![Pos::ZERO];
let mut p = CharParser::new(src); let mut s = Scanner::new(src);
while let Some(c) = p.eat_merging_crlf() { while let Some(c) = s.eat_merging_crlf() {
if is_newline_char(c) { if is_newline_char(c) {
line_starts.push(p.index().into()); line_starts.push(s.index().into());
} }
} }