diff --git a/src/syntax/span.rs b/src/syntax/span.rs index bbb6a2061..9bf7cafbe 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -71,7 +71,7 @@ debug_display!(Span); /// A line-column position in source code. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Position { - /// The 1-indexed line (inclusive). + /// The 0-indexed line (inclusive). pub line: usize, /// The 0-indexed column (inclusive). pub column: usize, diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index efcd1fc0a..2e9dd35be 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -102,7 +102,7 @@ impl<'s> Iterator for Tokens<'s> { '*' if second == Some('/') => { self.eat(); StarSlash } // Whitespace. - c if c.is_whitespace() => self.parse_whitespace(c), + c if c.is_whitespace() => self.parse_whitespace(start), // Functions. '[' => { self.set_state(Header); LeftBracket } @@ -196,20 +196,11 @@ impl<'s> Tokens<'s> { }, true, 0, -2)) } - fn parse_whitespace(&mut self, c: char) -> Token<'s> { - let mut newlines = if is_newline_char(c) { 1 } else { 0 }; - let mut last = c; + fn parse_whitespace(&mut self, start: Position) -> Token<'s> { + self.read_string_until(|n| !n.is_whitespace(), false, 0, 0); + let end = self.chars.position(); - self.read_string_until(|n| { - if is_newline_char(n) && !(last == '\r' && n == '\n') { - newlines += 1; - } - - last = n; - !n.is_whitespace() - }, false, 0, 0); - - Whitespace(newlines) + Whitespace(end.line - start.line) } fn parse_string(&mut self) -> Token<'s> { diff --git a/tests/parse.rs b/tests/parse.rs index e00b05d83..14a5b22d3 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -23,12 +23,31 @@ fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) } /// Parses the test syntax. macro_rules! tokens { - ($($src:expr =>($line:expr)=> $tokens:expr)*) => ({ + ($($task:ident $src:expr =>($line:expr)=> [$($target:tt)*])*) => ({ #[allow(unused_mut)] let mut cases = Vec::new(); - $(cases.push(($line, $src, $tokens.to_vec()));)* + $(cases.push(($line, $src, tokens!(@$task [$($target)*])));)* cases }); + + (@t $tokens:expr) => ({ + Target::Tokenized($tokens.to_vec()) + }); + + (@ts [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => ({ + Target::TokenizedSpanned(vec![ + $(Spanned { v: $t, span: Span { + start: Position { line: $sl, column: $sc }, + end: Position { line: $el, column: $ec }, + }}),* + ]) + }); +} + +#[derive(Debug)] +enum Target { + Tokenized(Vec>), + TokenizedSpanned(Vec>>), } fn main() { @@ -47,11 +66,11 @@ fn main() { let mut failed = 0; // Go through all tests in a test file. - for (line, src, expected) in cases.into_iter() { - let found: Vec<_> = tokenize(src).map(Spanned::value).collect(); + for (line, src, target) in cases.into_iter() { + let (correct, expected, found) = test_case(src, target); // Check whether the tokenization works correctly. - if found == expected { + if correct { okay += 1; } else { if failed == 0 { @@ -82,3 +101,17 @@ fn main() { std::process::exit(-1); } } + +fn test_case(src: &str, target: Target) -> (bool, String, String) { + match target { + Target::Tokenized(tokens) => { + let found: Vec<_> = tokenize(src).map(Spanned::value).collect(); + (found == tokens, format!("{:?}", tokens), format!("{:?}", found)) + } + + Target::TokenizedSpanned(tokens) => { + let found: Vec<_> = tokenize(src).collect(); + (found == tokens, format!("{:?}", tokens), format!("{:?}", found)) + } + } +} diff --git a/tests/parsing/tokens.rs b/tests/parsing/tokens.rs index 4f5474bb2..78d891f97 100644 --- a/tests/parsing/tokens.rs +++ b/tests/parsing/tokens.rs @@ -1,62 +1,74 @@ // Whitespace. -"" => [] -" " => [W(0)] -" " => [W(0)] -"\t" => [W(0)] -" \t" => [W(0)] -"\n" => [W(1)] -"\n " => [W(1)] -" \n" => [W(1)] -" \n " => [W(1)] -" \n\t \n " => [W(2)] -"\r\n" => [W(1)] -" \r\r\n \x0D" => [W(3)] -"\n\r" => [W(2)] +t "" => [] +t " " => [W(0)] +t " " => [W(0)] +t "\t" => [W(0)] +t " \t" => [W(0)] +t "\n" => [W(1)] +t "\n " => [W(1)] +t " \n" => [W(1)] +t " \n " => [W(1)] +t " \n\t \n " => [W(2)] +t "\r\n" => [W(1)] +t " \r\r\n \x0D" => [W(3)] +t "\n\r" => [W(2)] // Comments. -"a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)] -"a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)] -"a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)] -"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")] -"/**/" => [BC("")] -"_/*_/*a*/*/" => [U, BC("_/*a*/")] -"/*/*/" => [BC("/*/")] -"abc*/" => [T("abc"), SS] +t "a // bc\n " => [T("a"), W(0), LC(" bc"), W(1)] +t "a //a//b\n " => [T("a"), W(0), LC("a//b"), W(1)] +t "a //a//b\r\n" => [T("a"), W(0), LC("a//b"), W(1)] +t "a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")] +t "/**/" => [BC("")] +t "_/*_/*a*/*/" => [U, BC("_/*a*/")] +t "/*/*/" => [BC("/*/")] +t "abc*/" => [T("abc"), SS] // Header only tokens. -"[" => [LB] -"]" => [RB] -"[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB] -"[a:b]" => [LB, ID("a"), CL, ID("b"), RB] -"[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB] -"[=]" => [LB, EQ, RB] -"[,]" => [LB, CM, RB] -"a: b" => [T("a"), T(":"), W(0), T("b")] -"c=d, " => [T("c"), T("=d"), T(","), W(0)] -r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB] -r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB] -"\"hi\"" => [T("\"hi"), T("\"")] -"[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0), +t "[" => [LB] +t "]" => [RB] +t "[(){}:=,]" => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB] +t "[a:b]" => [LB, ID("a"), CL, ID("b"), RB] +t "[🌓, 🌍,]" => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB] +t "[=]" => [LB, EQ, RB] +t "[,]" => [LB, CM, RB] +t "a: b" => [T("a"), T(":"), W(0), T("b")] +t "c=d, " => [T("c"), T("=d"), T(","), W(0)] +t r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB] +t r#"["hi", 12pt]"# => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB] +t "\"hi\"" => [T("\"hi"), T("\"")] +t "[a: true, x=1]" => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0), ID("x"), EQ, NUM(1.0), RB] -"[120%]" => [LB, NUM(1.2), RB] +t "[120%]" => [LB, NUM(1.2), RB] // Body only tokens. -"_*`" => [U, ST, B] -"[_*`]" => [LB, T("_"), T("*"), T("`"), RB] -"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")] +t "_*`" => [U, ST, B] +t "[func]*bold*" => [LB, ID("func"), RB, ST, T("bold"), ST] +t "[_*`]" => [LB, T("_"), T("*"), T("`"), RB] +t "hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")] // Escapes. -r"\[" => [T("[")] -r"\]" => [T("]")] -r"\\" => [T(r"\")] -r"\/" => [T("/")] -r"\*" => [T("*")] -r"\_" => [T("_")] -r"\`" => [T("`")] +t r"\[" => [T("[")] +t r"\]" => [T("]")] +t r"\\" => [T(r"\")] +t r"\/" => [T("/")] +t r"\*" => [T("*")] +t r"\_" => [T("_")] +t r"\`" => [T("`")] // Unescapable special symbols. -r"\:" => [T(r"\"), T(":")] -r"\=" => [T(r"\"), T("=")] -r"[\:]" => [LB, T(r"\"), CL, RB] -r"[\=]" => [LB, T(r"\"), EQ, RB] -r"[\,]" => [LB, T(r"\"), CM, RB] +t r"\:" => [T(r"\"), T(":")] +t r"\=" => [T(r"\"), T("=")] +t r"[\:]" => [LB, T(r"\"), CL, RB] +t r"[\=]" => [LB, T(r"\"), EQ, RB] +t r"[\,]" => [LB, T(r"\"), CM, RB] + +// Spans +ts "hello" => [(0:0, 0:5, T("hello"))] +ts "ab\r\nc" => [(0:0, 0:2, T("ab")), (0:2, 1:0, W(1)), (1:0, 1:1, T("c"))] +ts "[a=10]" => [(0:0, 0:1, LB), (0:1, 0:2, ID("a")), (0:2, 0:3, EQ), + (0:3, 0:5, NUM(10.0)), (0:5, 0:6, RB)] +ts r#"[x = "(1)"]*"# => [(0:0, 0:1, LB), (0:1, 0:2, ID("x")), (0:2, 0:3, W(0)), + (0:3, 0:4, EQ), (0:4, 0:5, W(0)), (0:5, 0:10, STR("(1)")), + (0:10, 0:11, RB), (0:11, 0:12, ST)] +ts "// ab\r\n\nf" => [(0:0, 0:5, LC(" ab")), (0:5, 2:0, W(2)), (2:0, 2:1, T("f"))] +ts "/*b*/_" => [(0:0, 0:5, BC("b")), (0:5, 0:6, U)]