From 4e8359385f73e549a563fd356b6858050464991d Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 7 Feb 2020 22:29:16 +0100 Subject: [PATCH] =?UTF-8?q?Improve=20syntax=20testing=20framework=20?= =?UTF-8?q?=E2=99=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... and finally expand a few escape sequences in strings. --- src/syntax/mod.rs | 7 +- src/syntax/parsing.rs | 186 ++++++++++++++++++++---------------------- src/syntax/test.rs | 86 +++++++++++++++++-- src/syntax/tokens.rs | 116 ++++++++++++++------------ 4 files changed, 237 insertions(+), 158 deletions(-) diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 620a929eb..f640f84e2 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -9,6 +9,10 @@ use crate::{Pass, Feedback}; use crate::layout::{LayoutContext, Commands, Command}; use self::span::{Spanned, SpanVec}; +#[cfg(test)] +#[macro_use] +mod test; + pub mod expr; pub mod func; pub mod span; @@ -16,9 +20,6 @@ pub_use_mod!(scope); pub_use_mod!(parsing); pub_use_mod!(tokens); -#[cfg(test)] -mod test; - /// Represents a parsed piece of source that can be layouted and in the future /// also be queried for information used for refactorings, autocomplete, etc. diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index d24985a6d..57a24e610 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -246,7 +246,7 @@ impl<'s> FuncParser<'s> { } self.eat(); - spanned(Expr::Str(string.to_string())) + spanned(Expr::Str(unescape(string))) } Token::ExprNumber(n) => { self.eat(); spanned(Expr::Number(n)) } Token::ExprSize(s) => { self.eat(); spanned(Expr::Size(s)) } @@ -363,130 +363,108 @@ impl<'s> FuncParser<'s> { } } +/// Unescape a string. +fn unescape(string: &str) -> String { + let mut s = String::with_capacity(string.len()); + let mut escaped = false; + + for c in string.chars() { + if c == '\\' { + if escaped { + s.push('\\'); + } + escaped = !escaped; + } else { + if escaped { + match c { + '"' => s.push('"'), + 'n' => s.push('\n'), + 't' => s.push('\t'), + c => { s.push('\\'); s.push(c); } + } + } else { + s.push(c); + } + + escaped = false; + } + } + + s +} + #[cfg(test)] #[allow(non_snake_case)] mod tests { use crate::size::Size; - use super::super::test::{DebugFn, SpanlessEq}; + use super::super::test::{DebugFn, check, zspan}; use super::*; + use Decoration::*; use Node::{ Space as S, Newline as N, ToggleItalic as Italic, ToggleBolder as Bold, ToggleMonospace as Mono, }; - use Decoration::*; - - pub use Expr::{Number as Num, Bool}; - pub fn Id(text: &str) -> Expr { Expr::Ident(Ident(text.to_string())) } - pub fn Str(text: &str) -> Expr { Expr::Str(text.to_string()) } + use Expr::{/*Number as Num,*/ Bool}; + fn Id(text: &str) -> Expr { Expr::Ident(Ident(text.to_string())) } + fn Str(text: &str) -> Expr { Expr::Str(text.to_string()) } fn T(text: &str) -> Node { Node::Text(text.to_string()) } + /// Test whether the given string parses into the given transform pass. + macro_rules! test { + ($source:expr => [$($model:tt)*], $transform:expr) => { + let (exp, cmp) = spanned![vec $($model)*]; + + let mut scope = Scope::new::(); + scope.add::("f"); + scope.add::("n"); + scope.add::("box"); + let ctx = ParseContext { scope: &scope }; + + let found = parse(Position::ZERO, $source, ctx); + let (exp, found) = $transform(exp, found); + + check($source, exp, found, cmp); + }; + } + /// Test whether the given string parses into the given node list. macro_rules! p { - ($s:expr => [$($b:tt)*]) => { - let ctx = ParseContext { scope: &scope() }; - let model = parse(Position::ZERO, $s, ctx).output; - let (expected, cmp) = model!([$($b)*]); - - if !cmp(&model, &expected) { - fail($s, model, expected); - } + ($($tts:tt)*) => { + test!($($tts)*, |exp, found: Pass| (exp, found.output.nodes)); }; } /// Test whether the given string yields the given parse errors. macro_rules! e { - ($s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $e:expr)),* $(,)?]) => { - let ctx = ParseContext { scope: &scope() }; - let errors = parse(Position::ZERO, $s, ctx).feedback - .errors - .into_iter() - .map(|s| s.map(|e| e.message)) - .collect::>(); - - let expected = vec![ - $(Spanned { - v: $e.to_string(), - span: Span { - start: Position { line: $sl, column: $sc }, - end: Position { line: $el, column: $ec }, - }, - }),* - ]; - - if errors != expected { - fail($s, errors, expected); - } + ($($tts:tt)*) => { + test!($($tts)*, |exp: Vec>, found: Pass| ( + exp.into_iter().map(|s| s.map(|e| e.to_string())).collect::>(), + found.feedback.errors.into_iter().map(|s| s.map(|e| e.message)) + .collect::>() + )); }; } /// Test whether the given string yields the given decorations. macro_rules! d { - ($s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $d:expr)),* $(,)?]) => { - let ctx = ParseContext { scope: &scope() }; - let decos = parse(Position::ZERO, $s, ctx).feedback.decos; - - let expected = vec![ - $(Spanned { - v: $d, - span: Span { - start: Position { line: $sl, column: $sc }, - end: Position { line: $el, column: $ec }, - }, - }),* - ]; - - if decos != expected { - fail($s, decos, expected); - } + ($($tts:tt)*) => { + test!($($tts)*, |exp, found: Pass| (exp, found.feedback.decos)); }; } - fn scope() -> Scope { - let mut scope = Scope::new::(); - scope.add::("f"); - scope.add::("n"); - scope.add::("box"); - scope - } - - fn fail(src: &str, found: impl Debug, expected: impl Debug) { - eprintln!("source: {:?}", src); - eprintln!("found: {:#?}", found); - eprintln!("expected: {:#?}", expected); - panic!("test failed"); - } - - /// Parse a list of optionally spanned nodes into a syntax model. - macro_rules! model { - ([$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $n:expr)),* $(,)?]) => ((SyntaxModel { - nodes: vec![ - $(Spanned { v: $n, span: Span { - start: Position { line: $sl, column: $sc }, - end: Position { line: $el, column: $ec }, - }}),* - ] - }, ::eq)); - - ([$($e:tt)*]) => ((SyntaxModel { - nodes: vec![$($e)*].into_iter().map(zspan).collect::>() - }, ::spanless_eq)); - } - - /// Build a `DebugFn` function model. + /// Write down a `DebugFn` function model compactly. macro_rules! func { ($name:expr $(,pos: [$($item:expr),* $(,)?])? $(,key: [$($key:expr => $value:expr),* $(,)?])?; $($b:tt)*) => ({ - #![allow(unused_mut, unused_assignments)] - - let mut pos = Tuple::new(); - let mut key = Object::new(); - $(pos = Tuple { items: vec![$(zspan($item)),*] };)? - $(key = Object { + #[allow(unused_mut)] + let mut args = FuncArgs::new(); + $(args.pos = Tuple { items: spanned![vec $($item),*].0 };)? + $(args.key = Object { pairs: vec![$(Pair { key: zspan(Ident($key.to_string())), value: zspan($value), @@ -496,22 +474,32 @@ mod tests { Node::Model(Box::new(DebugFn { header: FuncHeader { name: zspan(Ident($name.to_string())), - args: FuncArgs { - pos, - key, - }, + args, }, body: func!(@body $($b)*), })) }); - (@body Some([$($b:tt)*])) => (Some(model!([$($b)*]).0)); + (@body Some([$($body:tt)*])) => ({ + Some(SyntaxModel { nodes: spanned![vec $($body)*].0 }) + }); + (@body None) => (None); } - /// Span an element with a zero span. - fn zspan(v: T) -> Spanned { - Spanned { v, span: Span::ZERO } + #[test] + fn unescape_strings() { + fn test(string: &str, expected: &str) { + assert_eq!(unescape(string), expected.to_string()); + } + + test(r#"hello world"#, "hello world"); + test(r#"hello\nworld"#, "hello\nworld"); + test(r#"a\"bc"#, "a\"bc"); + test(r#"a\\"#, "a\\"); + test(r#"a\\\nbc"#, "a\\\nbc"); + test(r#"a\tbc"#, "a\tbc"); + test("🌎", "🌎"); } #[test] diff --git a/src/syntax/test.rs b/src/syntax/test.rs index df3547681..6c89b4f52 100644 --- a/src/syntax/test.rs +++ b/src/syntax/test.rs @@ -1,8 +1,62 @@ +use std::fmt::Debug; + use super::func::FuncHeader; use super::expr::{Expr, Tuple, Object}; +use super::span::{Span, Spanned}; +use super::tokens::Token; use super::*; +/// Check whether the expected and found results for the given source code +/// match by the comparison function, and print them out otherwise. +pub fn check(src: &str, exp: T, found: T, spans: bool) +where T: Debug + PartialEq + SpanlessEq { + let cmp = if spans { PartialEq::eq } else { SpanlessEq::spanless_eq }; + if !cmp(&exp, &found) { + println!("source: {:?}", src); + println!("expected: {:#?}", exp); + println!("found: {:#?}", found); + panic!("test failed"); + } +} + +/// Create a vector of optionally spanned expressions from a list description. +/// +/// # Examples +/// When you want to add span information to the items, the format is as +/// follows. +/// ``` +/// spanned![(0:0, 0:5, "hello"), (0:5, 0:3, "world")] +/// ``` +/// The span information can simply be omitted to create a vector with items +/// that are spanned with dummy zero spans. +macro_rules! spanned { + (item ($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)) => ({ + #[allow(unused_imports)] + use $crate::syntax::span::{Position, Span, Spanned}; + Spanned { + span: Span::new( + Position::new($sl, $sc), + Position::new($el, $ec) + ), + v: $v + } + }); + + (vec $(($sl:tt:$sc:tt, $el:tt:$ec:tt, $v:expr)),* $(,)?) => { + (vec![$(spanned![item ($sl:$sc, $el:$ec, $v)]),*], true) + }; + + (vec $($v:expr),* $(,)?) => { + (vec![$($crate::syntax::test::zspan($v)),*], false) + }; +} + +/// Span an element with a zero span. +pub fn zspan(v: T) -> Spanned { + Spanned { v, span: Span::ZERO } +} + function! { /// Most functions in the tests are parsed into the debug function for easy /// inspection of arguments and body. @@ -30,26 +84,31 @@ pub trait SpanlessEq { fn spanless_eq(&self, other: &Rhs) -> bool; } -impl SpanlessEq for Vec>> { - fn spanless_eq(&self, other: &Vec>) -> bool { +impl SpanlessEq for Vec> { + fn spanless_eq(&self, other: &Vec>) -> bool { self.len() == other.len() - && self.iter().zip(other).all(|(x, y)| x.v == y.v) + && self.iter().zip(other).all(|(x, y)| x.v.spanless_eq(&y.v)) } } impl SpanlessEq for SyntaxModel { fn spanless_eq(&self, other: &SyntaxModel) -> bool { + self.nodes.spanless_eq(&other.nodes) + } +} + +impl SpanlessEq for Node { + fn spanless_eq(&self, other: &Node) -> bool { fn downcast<'a>(func: &'a (dyn Model + 'static)) -> &'a DebugFn { func.downcast::().expect("not a debug fn") } - self.nodes.len() == other.nodes.len() - && self.nodes.iter().zip(&other.nodes).all(|(x, y)| match (&x.v, &y.v) { + match (self, other) { (Node::Model(a), Node::Model(b)) => { downcast(a.as_ref()).spanless_eq(downcast(b.as_ref())) } (a, b) => a == b, - }) + } } } @@ -86,3 +145,18 @@ impl SpanlessEq for Object { .all(|(x, y)| x.key.v == y.key.v && x.value.v.spanless_eq(&y.value.v)) } } + +/// Implement `SpanlessEq` by just forwarding to `PartialEq`. +macro_rules! forward { + ($type:ty) => { + impl SpanlessEq for $type { + fn spanless_eq(&self, other: &$type) -> bool { + self == other + } + } + }; +} + +forward!(String); +forward!(Token<'_>); +forward!(Decoration); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index f4ea5daf5..41acb94fe 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -65,6 +65,10 @@ pub enum Token<'s> { /// A quoted string in a function header: `"..."`. ExprStr { /// The string inside the quotes. + /// + /// _Note_: If the string contains escape sequences these are not yet + /// applied to be able to just store a string slice here instead of + /// a String. The escaping is done later in the parser. string: &'s str, /// Whether the closing quote was present. terminated: bool @@ -210,11 +214,13 @@ impl<'s> Iterator for Tokens<'s> { // Expressions or just strings. c => { + let body = self.mode == Body; let text = self.read_string_until(|n| { match n { c if c.is_whitespace() => true, - '\\' | '[' | ']' | '*' | '_' | '`' | ':' | '=' | - ',' | '"' | '/' => true, + '\\' | '[' | ']' | '/' => true, + '*' | '_' | '`' if body => true, + ':' | '=' | ',' | '"' if !body => true, _ => false, } }, false, -(c.len_utf8() as isize), 0).0; @@ -441,18 +447,19 @@ pub fn is_newline_char(character: char) -> bool { /// Whether this word is a valid identifier. pub fn is_identifier(string: &str) -> bool { - let mut chars = string.chars(); + fn is_extra_allowed(c: char) -> bool { + c == '.' || c == '-' || c == '_' + } + let mut chars = string.chars(); match chars.next() { - Some('-') => {} - Some(c) if UnicodeXID::is_xid_start(c) => {} + Some(c) if UnicodeXID::is_xid_start(c) || is_extra_allowed(c) => {} _ => return false, } while let Some(c) = chars.next() { match c { - '.' | '-' => {} - c if UnicodeXID::is_xid_continue(c) => {} + c if UnicodeXID::is_xid_continue(c) || is_extra_allowed(c) => {} _ => return false, } } @@ -460,11 +467,10 @@ pub fn is_identifier(string: &str) -> bool { true } - #[cfg(test)] mod tests { + use super::super::test::check; use super::*; - use Token::{ Space as S, LineComment as LC, BlockComment as BC, @@ -481,32 +487,19 @@ mod tests { /// Test whether the given string tokenizes into the given list of tokens. macro_rules! t { - ($m:expr, $s:expr => [$(($sl:tt:$sc:tt, $el:tt:$ec:tt, $t:expr)),* $(,)?]) => { - let tokens = Tokens::new(Position::ZERO, $s, $m).collect::>(); - assert_eq!(tokens, vec![$(Spanned { - span: Span::new(Position::new($sl, $sc), Position::new($el, $ec)), - v: $t - }),*]); - - }; - - ($m:expr, $s:expr => [$($t:expr),* $(,)?]) => { - let tokens = Tokens::new(Position::ZERO, $s, $m) - .map(Spanned::value) - .collect::>(); - assert_eq!(tokens, vec![$($t),*]); - }; + ($mode:expr, $source:expr => [$($tokens:tt)*]) => { + let (exp, spans) = spanned![vec $($tokens)*]; + let found = Tokens::new(Position::ZERO, $source, $mode).collect::>(); + check($source, exp, found, spans); + } } - /// Parse a function token. + /// Write down a function token compactly. macro_rules! func { - ($header:expr, Some(($sl:tt:$sc:tt, $el:tt:$ec:tt, $body:expr)), $terminated:expr) => { + ($header:expr, Some($($tokens:tt)*), $terminated:expr) => { Function { header: $header, - body: Some(Spanned { - span: Span::new(Position::new($sl, $sc), Position::new($el, $ec)), - v: $body, - }), + body: Some(spanned![item $($tokens)*]), terminated: $terminated, } }; @@ -542,40 +535,63 @@ mod tests { t!(Body, "_/*_/*a*/*/" => [Underscore, BC("_/*a*/")]); t!(Body, "/*/*/" => [BC("/*/")]); t!(Body, "abc*/" => [T("abc"), Invalid("*/")]); - } - - #[test] - fn tokenize_header_only_tokens() { - t!(Body, "\"hi\"" => [T("\"hi"), T("\"")]); - t!(Body, "a: b" => [T("a"), T(":"), S(0), T("b")]); - t!(Body, "c=d, " => [T("c"), T("=d"), T(","), S(0)]); - t!(Header, "[" => [func!("", None, false)]); - t!(Header, "]" => [Invalid("]")]); - t!(Header, "(){}:=," => [LP, RP, LB, RB, Colon, Equals, Comma]); - t!(Header, "a:b" => [Id("a"), Colon, Id("b")]); - t!(Header, "=" => [Equals]); - t!(Header, "," => [Comma]); - t!(Header, r#""hello\"world""# => [Str(r#"hello\"world"#, true)]); - t!(Header, r#""hi", 12pt"# => [Str("hi", true), Comma, S(0), ExprSize(Size::pt(12.0))]); - t!(Header, "a: true, x=1" => [Id("a"), Colon, S(0), Bool(true), Comma, S(0), Id("x"), Equals, Num(1.0)]); - t!(Header, "120%" => [Num(1.2)]); - t!(Header, "🌓, 🌍," => [Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma]); + t!(Body, "/***/" => [BC("*")]); + t!(Body, "/**\\****/*/*/" => [BC("*\\***"), Invalid("*/"), Invalid("*/")]); + t!(Body, "/*abc" => [BC("abc")]); } #[test] fn tokenize_body_only_tokens() { t!(Body, "_*`" => [Underscore, Star, Backtick]); + t!(Body, "***" => [Star, Star, Star]); t!(Body, "[func]*bold*" => [func!("func", None, true), Star, T("bold"), Star]); t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]); - t!(Header, "_*`" => [Invalid("_"), Invalid("*"), Invalid("`")]); + t!(Header, "_*`" => [Invalid("_*`")]); } #[test] - fn tokenize_nested_functions() { + fn tokenize_header_only_tokens() { + t!(Body, "a: b" => [T("a:"), S(0), T("b")]); + t!(Body, "c=d, " => [T("c=d,"), S(0)]); + t!(Header, "(){}:=," => [LP, RP, LB, RB, Colon, Equals, Comma]); + t!(Header, "a:b" => [Id("a"), Colon, Id("b")]); + t!(Header, "a: true, x=1" => [Id("a"), Colon, S(0), Bool(true), Comma, S(0), Id("x"), Equals, Num(1.0)]); + t!(Header, "=3.14" => [Equals, Num(3.14)]); + t!(Header, "12.3e5" => [Num(12.3e5)]); + t!(Header, "120%" => [Num(1.2)]); + t!(Header, "12e4%" => [Num(1200.0)]); + t!(Header, "__main__" => [Id("__main__")]); + t!(Header, ".func.box" => [Id(".func.box")]); + t!(Header, "--arg, _b, _1" => [Id("--arg"), Comma, S(0), Id("_b"), Comma, S(0), Id("_1")]); + t!(Header, "12_pt, 12pt" => [Invalid("12_pt"), Comma, S(0), ExprSize(Size::pt(12.0))]); + t!(Header, "1e5in" => [ExprSize(Size::inches(100000.0))]); + t!(Header, "2.3cm" => [ExprSize(Size::cm(2.3))]); + t!(Header, "02.4mm" => [ExprSize(Size::mm(2.4))]); + t!(Header, "2.4.cm" => [Invalid("2.4.cm")]); + t!(Header, "🌓, 🌍," => [Invalid("🌓"), Comma, S(0), Invalid("🌍"), Comma]); + } + + #[test] + fn tokenize_strings() { + t!(Body, "a \"hi\" string" => [T("a"), S(0), T("\"hi\""), S(0), T("string")]); + t!(Header, "\"hello" => [Str("hello", false)]); + t!(Header, "\"hello world\"" => [Str("hello world", true)]); + t!(Header, "\"hello\nworld\"" => [Str("hello\nworld", true)]); + t!(Header, r#"1"hello\nworld"false"# => [Num(1.0), Str("hello\\nworld", true), Bool(false)]); + t!(Header, r#""a\"bc""# => [Str(r#"a\"bc"#, true)]); + t!(Header, r#""a\\"bc""# => [Str(r#"a\\"#, true), Id("bc"), Str("", false)]); + t!(Header, r#""a\tbc"# => [Str("a\\tbc", false)]); + t!(Header, "\"🌎\"" => [Str("🌎", true)]); + } + + #[test] + fn tokenize_functions() { t!(Body, "[f: [=][*]]" => [func!("f: [=][*]", None, true)]); t!(Body, "[_][[,],]," => [func!("_", Some((0:3, 0:9, "[,],")), true), T(",")]); t!(Body, "[=][=][=]" => [func!("=", Some((0:3, 0:6, "=")), true), func!("=", None, true)]); t!(Body, "[=][[=][=][=]]" => [func!("=", Some((0:3, 0:14, "[=][=][=]")), true)]); + t!(Header, "[" => [func!("", None, false)]); + t!(Header, "]" => [Invalid("]")]); } #[test]