From 7cc279f7ae122f4c40592004dde89792c636b3c8 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 30 Sep 2020 17:25:09 +0200 Subject: [PATCH] =?UTF-8?q?Replace=20line/column=20with=20byte=20positions?= =?UTF-8?q?=20=F0=9F=94=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/main.rs | 41 +++++++----- src/diagnostic.rs | 11 ++++ src/parse/tests.rs | 144 ++++++++++++++++++++--------------------- src/parse/tokenizer.rs | 94 +++++++++++++-------------- src/syntax/lines.rs | 114 ++++++++++++++++++++++++++++++++ src/syntax/mod.rs | 2 + src/syntax/span.rs | 86 +++++++----------------- tests/test_typeset.rs | 43 ++++++------ 8 files changed, 319 insertions(+), 216 deletions(-) create mode 100644 src/syntax/lines.rs diff --git a/main/main.rs b/main/main.rs index d3ad641bb..5b3538d59 100644 --- a/main/main.rs +++ b/main/main.rs @@ -9,7 +9,8 @@ use futures_executor::block_on; use typstc::export::pdf; use typstc::font::FontLoader; -use typstc::Typesetter; +use typstc::syntax::LineMap; +use typstc::{Feedback, Pass, Typesetter}; fn main() { let args: Vec<_> = std::env::args().collect(); @@ -41,23 +42,29 @@ fn main() { let loader = Rc::new(RefCell::new(loader)); let typesetter = Typesetter::new(loader.clone()); - let pass = block_on(typesetter.typeset(&src)); - let layouts = pass.output; + let Pass { + output: layouts, + feedback: Feedback { mut diagnostics, .. }, + } = block_on(typesetter.typeset(&src)); - let mut feedback = pass.feedback; - feedback.diagnostics.sort(); - for diagnostic in feedback.diagnostics { - let span = diagnostic.span; - println!( - "{}: {}:{}:{} - {}:{}: {}", - format!("{:?}", diagnostic.v.level).to_lowercase(), - src_path.display(), - span.start.line + 1, - span.start.column + 1, - span.end.line + 1, - span.end.column + 1, - diagnostic.v.message, - ); + if !diagnostics.is_empty() { + diagnostics.sort(); + + let map = LineMap::new(&src); + for diagnostic in diagnostics { + let span = diagnostic.span; + let start = map.location(span.start); + let end = map.location(span.end); + + println!( + " {}: {}:{}-{}: {}", + diagnostic.v.level, + src_path.display(), + start, + end, + diagnostic.v.message, + ); + } } let loader = loader.borrow(); diff --git a/src/diagnostic.rs b/src/diagnostic.rs index 1bf9f42f7..2548e1d5b 100644 --- a/src/diagnostic.rs +++ b/src/diagnostic.rs @@ -4,6 +4,8 @@ //! layout on a best effort process, generating diagnostics for incorrect //! things. +use std::fmt::{self, Display, Formatter}; + /// A diagnostic that arose in parsing or layouting. #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] #[cfg_attr(feature = "serialize", derive(serde::Serialize))] @@ -30,6 +32,15 @@ impl Diagnostic { } } +impl Display for Level { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.pad(match self { + Self::Warning => "warning", + Self::Error => "error", + }) + } +} + /// Construct a diagnostic with `Error` level. /// /// ``` diff --git a/src/parse/tests.rs b/src/parse/tests.rs index 209717f9e..d663aa2a4 100644 --- a/src/parse/tests.rs +++ b/src/parse/tests.rs @@ -186,8 +186,8 @@ where } } -pub fn s(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned { - v.span_with(Span::new(Pos::new(sl, sc), Pos::new(el, ec))) +pub fn s(start: u32, end: u32, v: T) -> Spanned { + v.span_with(Span::new(start, end)) } // Enables tests to optionally specify spans. @@ -201,11 +201,11 @@ impl From for Spanned { #[test] fn test_parse_groups() { - e!("[)" => s(0,1, 0,2, "expected function name, found closing paren"), - s(0,2, 0,2, "expected closing bracket")); + e!("[)" => s(1, 2, "expected function name, found closing paren"), + s(2, 2, "expected closing bracket")); - e!("[v:{]}" => s(0,4, 0,4, "expected closing brace"), - s(0,5, 0,6, "unexpected closing brace")); + e!("[v:{]}" => s(4, 4, "expected closing brace"), + s(5, 6, "unexpected closing brace")); } #[test] @@ -222,24 +222,24 @@ fn test_parse_simple_nodes() { t!(r"a\ b" => T("a"), L, S, T("b")); t!("`py`" => R!["py"]); t!("`hi\nyou" => R!["hi", "you"]); - e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); + e!("`hi\nyou" => s(7, 7, "expected backtick")); t!("`hi\\`du`" => R!["hi`du"]); - ts!("```java out```" => s(0,0, 0,14, C![Lang(s(0,3, 0,7, "java")), "out"])); + ts!("```java out```" => s(0, 14, C![Lang(s(3, 7, "java")), "out"])); t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]); t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![ Lang("typst"), " Typst uses ``` to indicate code blocks" ]); - e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks")); - e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier")); - e!("\\u{d421c809}" => s(0,0, 0,12, "invalid unicode escape sequence")); - e!("\\u{abc" => s(0,6, 0,6, "expected closing brace")); + e!("``` hi\nyou" => s(10, 10, "expected backticks")); + e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier")); + e!("\\u{d421c809}" => s(0, 12, "invalid unicode escape sequence")); + e!("\\u{abc" => s(6, 6, "expected closing brace")); t!("πŸ’œ\n\n 🌍" => T("πŸ’œ"), P, T("🌍")); - ts!("hi" => s(0,0, 0,2, T("hi"))); - ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B)); - ts!("πŸ’œ\n\n 🌍" => s(0,0, 0,1, T("πŸ’œ")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍"))); + ts!("hi" => s(0, 2, T("hi"))); + ts!("*Hi*" => s(0, 1, B), s(1, 3, T("Hi")), s(3, 4, B)); + ts!("πŸ’œ\n\n 🌍" => s(0, 4, T("πŸ’œ")), s(4, 7, P), s(7, 11, T("🌍"))); } #[test] @@ -249,7 +249,7 @@ fn test_parse_comments() { t!("first//\n//\nsecond" => T("first"), S, S, T("second")); t!("first//\n \nsecond" => T("first"), P, T("second")); t!("first/*\n \n*/second" => T("first"), T("second")); - e!("🌎\n*/n" => s(1,0, 1,2, "unexpected end of block comment")); + e!("🌎\n*/n" => s(5, 7, "unexpected end of block comment")); // In header. t!("[val:/*12pt*/]" => F!("val")); @@ -284,28 +284,28 @@ fn test_parse_headings() { t!("[box][\n] # hi" => F!("box"; Tree![S]), S, T("#"), S, T("hi")); // Depth warnings. - e!("########" => s(0,0, 0,8, "section depth larger than 6 has no effect")); + e!("########" => s(0, 8, "section depth larger than 6 has no effect")); } #[test] fn test_parse_function_names() { // No closing bracket. t!("[" => F!("")); - e!("[" => s(0,1, 0,1, "expected function name"), - s(0,1, 0,1, "expected closing bracket")); + e!("[" => s(1, 1, "expected function name"), + s(1, 1, "expected closing bracket")); // No name. - e!("[]" => s(0,1, 0,1, "expected function name")); - e!("[\"]" => s(0,1, 0,3, "expected function name, found string"), - s(0,3, 0,3, "expected closing bracket")); + e!("[]" => s(1, 1, "expected function name")); + e!("[\"]" => s(1, 3, "expected function name, found string"), + s(3, 3, "expected closing bracket")); // A valid name. t!("[hi]" => F!("hi")); t!("[ f]" => F!("f")); // An invalid name. - e!("[12]" => s(0,1, 0,3, "expected function name, found number")); - e!("[ 🌎]" => s(0,3, 0,4, "expected function name, found invalid token")); + e!("[12]" => s(1, 3, "expected function name, found number")); + e!("[ 🌎]" => s(3, 7, "expected function name, found invalid token")); } #[test] @@ -321,8 +321,8 @@ fn test_parse_chaining() { ])); // Errors for unclosed / empty predecessor groups - e!("[hi: (5.0, 2.1 >> you]" => s(0, 15, 0, 15, "expected closing paren")); - e!("[>> abc]" => s(0, 1, 0, 1, "expected function name")); + e!("[hi: (5.0, 2.1 >> you]" => s(15, 15, "expected closing paren")); + e!("[>> abc]" => s(1, 1, "expected function name")); } #[test] @@ -332,34 +332,34 @@ fn test_parse_colon_starting_func_args() { // Wrong token. t!("[val=]" => F!("val")); - e!("[val=]" => s(0,4, 0,4, "expected colon")); - e!("[val/🌎:$]" => s(0,4, 0,4, "expected colon")); + e!("[val=]" => s(4, 4, "expected colon")); + e!("[val/🌎:$]" => s(4, 4, "expected colon")); // String in invalid header without colon still parsed as string // Note: No "expected quote" error because not even the string was // expected. - e!("[val/\"]" => s(0,4, 0,4, "expected colon"), - s(0,7, 0,7, "expected closing bracket")); + e!("[val/\"]" => s(4, 4, "expected colon"), + s(7, 7, "expected closing bracket")); } #[test] fn test_parse_function_bodies() { t!("[val: 1][*Hi*]" => F!("val"; Num(1.0), Tree![B, T("Hi"), B])); - e!(" [val][ */ ]" => s(0,8, 0,10, "unexpected end of block comment")); + e!(" [val][ */]" => s(8, 10, "unexpected end of block comment")); // Raw in body. t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]])); - e!("[val][`Hi]`" => s(0,11, 0,11, "expected closing bracket")); + e!("[val][`Hi]`" => s(11, 11, "expected closing bracket")); // Crazy. t!("[v][[v][v][v]]" => F!("v"; Tree![F!("v"; Tree![T("v")]), F!("v")])); // Spanned. ts!(" [box][Oh my]" => - s(0,0, 0,1, S), - s(0,1, 0,13, F!(s(0,2, 0,5, "box"); - s(0,6, 0,13, Tree![ - s(0,7, 0,9, T("Oh")), s(0,9, 0,10, S), s(0,10, 0,12, T("my")) + s(0, 1, S), + s(1, 13, F!(s(2, 5, "box"); + s(6, 13, Tree![ + s(7, 9, T("Oh")), s(9, 10, S), s(10, 12, T("my")), ]) )) ); @@ -390,18 +390,18 @@ fn test_parse_values() { // Healed colors. v!("#12345" => Color(RgbaColor::new_healed(0, 0, 0, 0xff))); - e!("[val: #12345]" => s(0,6, 0,12, "invalid color")); - e!("[val: #a5]" => s(0,6, 0,9, "invalid color")); - e!("[val: #14b2ah]" => s(0,6, 0,13, "invalid color")); - e!("[val: #f075ff011]" => s(0,6, 0,16, "invalid color")); + e!("[val: #12345]" => s(6, 12, "invalid color")); + e!("[val: #a5]" => s(6, 9, "invalid color")); + e!("[val: #14b2ah]" => s(6, 13, "invalid color")); + e!("[val: #f075ff011]" => s(6, 16, "invalid color")); // Unclosed string. v!("\"hello" => Str("hello]")); - e!("[val: \"hello]" => s(0,13, 0,13, "expected quote"), - s(0,13, 0,13, "expected closing bracket")); + e!("[val: \"hello]" => s(13, 13, "expected quote"), + s(13, 13, "expected closing bracket")); // Spanned. - ts!("[val: 1.4]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.4))))); + ts!("[val: 1.4]" => s(0, 10, F!(s(1, 4, "val"); s(6, 9, Num(1.4))))); } #[test] @@ -431,23 +431,20 @@ fn test_parse_expressions() { v!("3/4*5" => Mul(Div(Num(3.0), Num(4.0)), Num(5.0))); // Spanned. - ts!("[val: 1 + 3]" => s(0,0, 0,12, F!( - s(0,1, 0,4, "val"); s(0,6, 0,11, Add( - s(0,6, 0,7, Num(1.0)), - s(0,10, 0,11, Num(3.0)), - )) + ts!("[val: 1 + 3]" => s(0, 12, F!( + s(1, 4, "val"); s(6, 11, Add(s(6, 7, Num(1.0)), s(10, 11, Num(3.0)))) ))); // Span of parenthesized expression contains parens. - ts!("[val: (1)]" => s(0,0, 0,10, F!(s(0,1, 0,4, "val"); s(0,6, 0,9, Num(1.0))))); + ts!("[val: (1)]" => s(0, 10, F!(s(1, 4, "val"); s(6, 9, Num(1.0))))); // Invalid expressions. v!("4pt--" => Len(Length::pt(4.0))); - e!("[val: 4pt--]" => s(0,10, 0,11, "dangling minus"), - s(0,6, 0,10, "missing right summand")); + e!("[val: 4pt--]" => s(10, 11, "dangling minus"), + s(6, 10, "missing right summand")); v!("3mm+4pt*" => Add(Len(Length::mm(3.0)), Len(Length::pt(4.0)))); - e!("[val: 3mm+4pt*]" => s(0,10, 0,14, "missing right factor")); + e!("[val: 3mm+4pt*]" => s(10, 14, "missing right factor")); } #[test] @@ -461,13 +458,14 @@ fn test_parse_tables() { v!("(1, key=\"value\")" => Table![Num(1.0), "key" => Str("value")]); // Decorations. - d!("[val: key=hi]" => s(0,6, 0,9, TableKey)); - d!("[val: (key=hi)]" => s(0,7, 0,10, TableKey)); - d!("[val: f(key=hi)]" => s(0,8, 0,11, TableKey)); + d!("[val: key=hi]" => s(6, 9, TableKey)); + d!("[val: (key=hi)]" => s(7, 10, TableKey)); + d!("[val: f(key=hi)]" => s(8, 11, TableKey)); // Spanned with spacing around keyword arguments. - ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0,0, 4,2, F!( - s(0,1, 0,4, "val"); s(1,1, 1,3, "hi") => s(3,4, 4,1, Str("s\n")) + ts!("[val: \n hi \n = /* //\n */ \"s\n\"]" => s(0, 30, F!( + s(1, 4, "val"); + s(8, 10, "hi") => s(25, 29, Str("s\n")) ))); e!("[val: \n hi \n = /* //\n */ \"s\n\"]" => ); } @@ -490,11 +488,11 @@ fn test_parse_tables_compute_func_calls() { // Unclosed. v!("lang(δΈ­ζ–‡]" => Call!("lang"; Id("δΈ­ζ–‡"))); - e!("[val: lang(δΈ­ζ–‡]" => s(0,13, 0,13, "expected closing paren")); + e!("[val: lang(δΈ­ζ–‡]" => s(17, 17, "expected closing paren")); // Invalid name. v!("πŸ‘ (\"abc\", 13e-5)" => Table!(Str("abc"), Num(13.0e-5))); - e!("[val: πŸ‘ (\"abc\", 13e-5)]" => s(0,6, 0,7, "expected value, found invalid token")); + e!("[val: πŸ‘ (\"abc\", 13e-5)]" => s(6, 10, "expected value, found invalid token")); } #[test] @@ -514,32 +512,32 @@ fn test_parse_tables_nested() { #[test] fn test_parse_tables_errors() { // Expected value. - e!("[val: (=)]" => s(0,7, 0,8, "expected value, found equals sign")); - e!("[val: (,)]" => s(0,7, 0,8, "expected value, found comma")); + e!("[val: (=)]" => s(7, 8, "expected value, found equals sign")); + e!("[val: (,)]" => s(7, 8, "expected value, found comma")); v!("(\x07 abc,)" => Table![Id("abc")]); - e!("[val: (\x07 abc,)]" => s(0,7, 0,8, "expected value, found invalid token")); - e!("[val: (key=,)]" => s(0,11, 0,12, "expected value, found comma")); - e!("[val: hi,)]" => s(0,9, 0,10, "expected value, found closing paren")); + e!("[val: (\x07 abc,)]" => s(7, 8, "expected value, found invalid token")); + e!("[val: (key=,)]" => s(11, 12, "expected value, found comma")); + e!("[val: hi,)]" => s(9, 10, "expected value, found closing paren")); // Expected comma. v!("(true false)" => Table![Bool(true), Bool(false)]); - e!("[val: (true false)]" => s(0,11, 0,11, "expected comma")); + e!("[val: (true false)]" => s(11, 11, "expected comma")); // Expected closing paren. - e!("[val: (#000]" => s(0,11, 0,11, "expected closing paren")); - e!("[val: (key]" => s(0,10, 0,10, "expected closing paren")); - e!("[val: (key=]" => s(0,11, 0,11, "expected value"), - s(0,11, 0,11, "expected closing paren")); + e!("[val: (#000]" => s(11, 11, "expected closing paren")); + e!("[val: (key]" => s(10, 10, "expected closing paren")); + e!("[val: (key=]" => s(11, 11, "expected value"), + s(11, 11, "expected closing paren")); // Bad key. v!("true=you" => Bool(true), Id("you")); e!("[val: true=you]" => - s(0,10, 0,10, "expected comma"), - s(0,10, 0,11, "expected value, found equals sign")); + s(10, 10, "expected comma"), + s(10, 11, "expected value, found equals sign")); // Unexpected equals sign. v!("z=y=4" => Num(4.0), "z" => Id("y")); e!("[val: z=y=4]" => - s(0,9, 0,9, "expected comma"), - s(0,9, 0,10, "expected value, found equals sign")); + s(9, 9, "expected comma"), + s(9, 10, "expected value, found equals sign")); } diff --git a/src/parse/tokenizer.rs b/src/parse/tokenizer.rs index d36053091..92d15edca 100644 --- a/src/parse/tokenizer.rs +++ b/src/parse/tokenizer.rs @@ -17,7 +17,6 @@ pub struct Tokens<'s> { iter: Peekable>, mode: TokenMode, stack: Vec, - pos: Pos, index: usize, } @@ -38,7 +37,6 @@ impl<'s> Tokens<'s> { iter: src.chars().peekable(), mode, stack: vec![], - pos: Pos::ZERO, index: 0, } } @@ -55,16 +53,10 @@ impl<'s> Tokens<'s> { self.mode = self.stack.pop().expect("no pushed mode"); } - /// The index in the string at which the last token ends and next token will - /// start. - pub fn index(&self) -> usize { - self.index - } - - /// The line-colunn position in the source at which the last token ends and - /// next token will start. + /// The position in the string at which the last token ends and next token + /// will start. pub fn pos(&self) -> Pos { - self.pos + Pos(self.index as u32) } } @@ -86,7 +78,7 @@ impl<'s> Iterator for Tokens<'s> { } // Whitespace. - c if c.is_whitespace() => self.read_whitespace(start), + c if c.is_whitespace() => self.read_whitespace(c), // Functions and blocks. '[' => LeftBracket, @@ -160,9 +152,8 @@ impl<'s> Iterator for Tokens<'s> { }; let end = self.pos(); - let span = Span { start, end }; - Some(Spanned { v: token, span }) + Some(token.span_with(Span::new(start, end))) } } @@ -210,11 +201,28 @@ impl<'s> Tokens<'s> { Chain } - fn read_whitespace(&mut self, start: Pos) -> Token<'s> { - self.read_string_until(false, 0, 0, |n| !n.is_whitespace()); - let end = self.pos(); + fn read_whitespace(&mut self, mut c: char) -> Token<'s> { + let mut newlines = 0; - Space(end.line - start.line) + loop { + if is_newline_char(c) { + if c == '\r' && self.peek() == Some('\n') { + self.eat(); + } + + newlines += 1; + } + + match self.peek() { + Some(n) if n.is_whitespace() => { + self.eat(); + c = n; + } + _ => break, + } + } + + Space(newlines) } fn read_string(&mut self) -> Token<'s> { @@ -257,7 +265,7 @@ impl<'s> Tokens<'s> { } } - let start = self.index(); + let start = self.index; let mut backticks = 0u32; while backticks < 3 { @@ -273,7 +281,7 @@ impl<'s> Tokens<'s> { } let terminated = backticks == 3; - let end = self.index() - if terminated { 3 } else { 0 }; + let end = self.index - if terminated { 3 } else { 0 }; Code { lang, @@ -325,7 +333,7 @@ impl<'s> Tokens<'s> { } } Some(c) if is_escapable(c) => { - let index = self.index(); + let index = self.index; self.eat(); Text(&self.src[index .. index + c.len_utf8()]) } @@ -369,7 +377,7 @@ impl<'s> Tokens<'s> { offset_end: isize, mut f: impl FnMut(char) -> bool, ) -> (&'s str, bool) { - let start = ((self.index() as isize) + offset_start) as usize; + let start = ((self.index as isize) + offset_start) as usize; let mut matched = false; while let Some(c) = self.peek() { @@ -384,7 +392,7 @@ impl<'s> Tokens<'s> { self.eat(); } - let mut end = self.index(); + let mut end = self.index; if matched { end = ((end as isize) + offset_end) as usize; } @@ -395,14 +403,6 @@ impl<'s> Tokens<'s> { fn eat(&mut self) -> Option { let c = self.iter.next()?; self.index += c.len_utf8(); - - if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) { - self.pos.line += 1; - self.pos.column = 0; - } else { - self.pos.column += 1; - } - Some(c) } @@ -615,25 +615,25 @@ mod tests { #[test] fn tokenize_unescapable_symbols() { - t!(Body, r"\a" => T("\\"), T("a")); - t!(Body, r"\:" => T(r"\"), T(":")); - t!(Body, r"\=" => T(r"\"), T("=")); - t!(Body, r"\u{2GA4"=> UE("2", false), T("GA4")); - t!(Body, r"\u{ " => UE("", false), Space(0)); - t!(Body, r"\u" => T(r"\u")); - t!(Header, r"\\\\" => Invalid(r"\\\\")); - t!(Header, r"\a" => Invalid(r"\a")); - t!(Header, r"\:" => Invalid(r"\"), Colon); - t!(Header, r"\=" => Invalid(r"\"), Equals); - t!(Header, r"\," => Invalid(r"\"), Comma); + t!(Body, r"\a" => T("\\"), T("a")); + t!(Body, r"\:" => T(r"\"), T(":")); + t!(Body, r"\=" => T(r"\"), T("=")); + t!(Body, r"\u{2GA4" => UE("2", false), T("GA4")); + t!(Body, r"\u{ " => UE("", false), Space(0)); + t!(Body, r"\u" => T(r"\u")); + t!(Header, r"\\\\" => Invalid(r"\\\\")); + t!(Header, r"\a" => Invalid(r"\a")); + t!(Header, r"\:" => Invalid(r"\"), Colon); + t!(Header, r"\=" => Invalid(r"\"), Equals); + t!(Header, r"\," => Invalid(r"\"), Comma); } #[test] fn tokenize_with_spans() { - ts!(Body, "hello" => s(0,0, 0,5, T("hello"))); - ts!(Body, "ab\r\nc" => s(0,0, 0,2, T("ab")), s(0,2, 1,0, S(1)), s(1,0, 1,1, T("c"))); - ts!(Body, "// ab\r\n\nf" => s(0,0, 0,5, LC(" ab")), s(0,5, 2,0, S(2)), s(2,0, 2,1, T("f"))); - ts!(Body, "/*b*/_" => s(0,0, 0,5, BC("b")), s(0,5, 0,6, Underscore)); - ts!(Header, "a=10" => s(0,0, 0,1, Id("a")), s(0,1, 0,2, Equals), s(0,2, 0,4, Num(10.0))); + ts!(Body, "hello" => s(0, 5, T("hello"))); + ts!(Body, "ab\r\nc" => s(0, 2, T("ab")), s(2, 4, S(1)), s(4, 5, T("c"))); + ts!(Body, "// ab\r\n\nf" => s(0, 5, LC(" ab")), s(5, 8, S(2)), s(8, 9, T("f"))); + ts!(Body, "/*b*/_" => s(0, 5, BC("b")), s(5, 6, Underscore)); + ts!(Header, "a=10" => s(0, 1, Id("a")), s(1, 2, Equals), s(2, 4, Num(10.0))); } } diff --git a/src/syntax/lines.rs b/src/syntax/lines.rs new file mode 100644 index 000000000..86fc461bd --- /dev/null +++ b/src/syntax/lines.rs @@ -0,0 +1,114 @@ +//! Conversion of byte positions to line/column locations. + +use std::fmt::{self, Debug, Display, Formatter}; + +use super::Pos; +use crate::parse::is_newline_char; + +/// Enables conversion of byte position to locations. +pub struct LineMap<'s> { + src: &'s str, + line_starts: Vec, +} + +impl<'s> LineMap<'s> { + /// Create a new line map for a source string. + pub fn new(src: &'s str) -> Self { + let mut line_starts = vec![Pos::ZERO]; + let mut iter = src.char_indices().peekable(); + + while let Some((mut i, c)) = iter.next() { + if is_newline_char(c) { + i += c.len_utf8(); + if c == '\r' && matches!(iter.peek(), Some((_, '\n'))) { + i += '\n'.len_utf8(); + iter.next(); + } + + line_starts.push(Pos(i as u32)); + } + } + + Self { src, line_starts } + } + + /// Convert a byte position to a location. + /// + /// # Panics + /// This panics if the position is out of bounds. + pub fn location(&self, pos: Pos) -> Location { + let line_index = match self.line_starts.binary_search(&pos) { + Ok(i) => i, + Err(i) => i - 1, + }; + + let line_start = self.line_starts[line_index]; + let head = &self.src[line_start.to_usize() .. pos.to_usize()]; + let column_index = head.chars().count(); + + Location { + line: 1 + line_index as u32, + column: 1 + column_index as u32, + } + } +} + +/// One-indexed line-column position in source code. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr(feature = "serialize", derive(serde::Serialize))] +pub struct Location { + /// The one-indexed line. + pub line: u32, + /// The one-indexed column. + pub column: u32, +} + +impl Location { + /// Create a new location from line and column. + pub fn new(line: u32, column: u32) -> Self { + Self { line, column } + } +} + +impl Debug for Location { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + Display::fmt(self, f) + } +} + +impl Display for Location { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}:{}", self.line, self.column) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST: &str = "Γ€bcde\nfπŸ’›g\r\nhi\rjkl"; + + #[test] + fn test_line_map_new() { + let map = LineMap::new(TEST); + assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]); + } + + #[test] + fn test_line_map_location() { + let map = LineMap::new(TEST); + assert_eq!(map.location(Pos(0)), Location::new(1, 1)); + assert_eq!(map.location(Pos(2)), Location::new(1, 2)); + assert_eq!(map.location(Pos(6)), Location::new(1, 6)); + assert_eq!(map.location(Pos(7)), Location::new(2, 1)); + assert_eq!(map.location(Pos(8)), Location::new(2, 2)); + assert_eq!(map.location(Pos(12)), Location::new(2, 3)); + assert_eq!(map.location(Pos(21)), Location::new(4, 4)); + } + + #[test] + #[should_panic] + fn test_line_map_panics_out_of_bounds() { + LineMap::new(TEST).location(Pos(22)); + } +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index fe887c2fe..f442ba9e0 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,9 +1,11 @@ //! Syntax types. +mod lines; mod span; mod token; mod tree; +pub use lines::*; pub use span::*; pub use token::*; pub use tree::*; diff --git a/src/syntax/span.rs b/src/syntax/span.rs index cda35ec0b..1bd14c654 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,7 +1,6 @@ //! Mapping of values to the locations they originate from in source code. use std::fmt::{self, Debug, Formatter}; -use std::ops::{Add, Sub}; #[cfg(test)] use std::cell::Cell; @@ -11,12 +10,6 @@ thread_local! { static CMP_SPANS: Cell = Cell::new(true); } -/// Span offsetting. -pub trait Offset { - /// Offset all spans contained in `Self` by the given position. - fn offset(self, by: Pos) -> Self; -} - /// Annotate a value with a span. pub trait SpanWith: Sized { /// Wraps `self` in a `Spanned` with the given span. @@ -27,6 +20,12 @@ pub trait SpanWith: Sized { impl SpanWith for T {} +/// Span offsetting. +pub trait Offset { + /// Offset all spans contained in `Self` by the given position. + fn offset(self, by: Pos) -> Self; +} + /// A vector of spanned values of type `T`. pub type SpanVec = Vec>; @@ -112,13 +111,13 @@ impl Span { pub const ZERO: Self = Self { start: Pos::ZERO, end: Pos::ZERO }; /// Create a new span from start and end positions. - pub fn new(start: Pos, end: Pos) -> Self { - Self { start, end } + pub fn new(start: impl Into, end: impl Into) -> Self { + Self { start: start.into(), end: end.into() } } /// Create a span including just a single position. - pub fn at(pos: Pos) -> Self { - Self { start: pos, end: pos } + pub fn at(pos: impl Into + Copy) -> Self { + Self::new(pos, pos) } /// Create a new span with the earlier start and later end position. @@ -169,70 +168,35 @@ impl Debug for Span { } } -/// Zero-indexed line-column position in source code. +/// A byte position. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serialize", derive(serde::Serialize))] -pub struct Pos { - /// The zero-indexed line. - pub line: usize, - /// The zero-indexed column. - pub column: usize, -} +pub struct Pos(pub u32); impl Pos { - /// The line 0, column 0 position. - pub const ZERO: Self = Self { line: 0, column: 0 }; + /// The zero position. + pub const ZERO: Self = Self(0); - /// Create a new position from line and column. - pub fn new(line: usize, column: usize) -> Self { - Self { line, column } + /// Convert to a usize for indexing. + pub fn to_usize(self) -> usize { + self.0 as usize + } +} + +impl From for Pos { + fn from(index: u32) -> Self { + Self(index) } } impl Offset for Pos { fn offset(self, by: Self) -> Self { - by + self - } -} - -impl Add for Pos { - type Output = Self; - - fn add(self, rhs: Self) -> Self { - if rhs.line == 0 { - Self { - line: self.line, - column: self.column + rhs.column, - } - } else { - Self { - line: self.line + rhs.line, - column: rhs.column, - } - } - } -} - -impl Sub for Pos { - type Output = Self; - - fn sub(self, rhs: Self) -> Self { - if self.line == rhs.line { - Self { - line: 0, - column: self.column - rhs.column, - } - } else { - Self { - line: self.line - rhs.line, - column: self.column, - } - } + Pos(self.0 + by.0) } } impl Debug for Pos { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{}:{}", self.line, self.column) + self.0.fmt(f) } } diff --git a/tests/test_typeset.rs b/tests/test_typeset.rs index e9051d104..2c8a093d3 100644 --- a/tests/test_typeset.rs +++ b/tests/test_typeset.rs @@ -19,7 +19,8 @@ use typstc::layout::MultiLayout; use typstc::length::Length; use typstc::paper::PaperClass; use typstc::style::PageStyle; -use typstc::Typesetter; +use typstc::syntax::LineMap; +use typstc::{Feedback, Pass, Typesetter}; const TEST_DIR: &str = "tests"; const OUT_DIR: &str = "tests/out"; @@ -79,29 +80,35 @@ fn main() { fn test( name: &str, src: &str, - path: &Path, + src_path: &Path, typesetter: &mut Typesetter, loader: &SharedFontLoader, ) { println!("Testing {}.", name); - let typeset = block_on(typesetter.typeset(src)); - let layouts = typeset.output; - let mut feedback = typeset.feedback; + let Pass { + output: layouts, + feedback: Feedback { mut diagnostics, .. }, + } = block_on(typesetter.typeset(&src)); - feedback.diagnostics.sort(); - for diagnostic in feedback.diagnostics { - let span = diagnostic.span; - println!( - " {:?}: {}:{}:{} - {}:{}: {}", - diagnostic.v.level, - path.display(), - span.start.line + 1, - span.start.column + 1, - span.end.line + 1, - span.end.column + 1, - diagnostic.v.message, - ); + if !diagnostics.is_empty() { + diagnostics.sort(); + + let map = LineMap::new(&src); + for diagnostic in diagnostics { + let span = diagnostic.span; + let start = map.location(span.start); + let end = map.location(span.end); + + println!( + " {}: {}:{}-{}: {}", + diagnostic.v.level, + src_path.display(), + start, + end, + diagnostic.v.message, + ); + } } let loader = loader.borrow();