From ec884ec1d85f6e1d7868db3e82d572579cc5d345 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 5 Oct 2022 12:49:39 +0200 Subject: [PATCH] Refactor syntax module --- benches/oneshot.rs | 2 +- src/eval/mod.rs | 28 +- src/library/text/raw.rs | 33 ++- src/parse/incremental.rs | 37 ++- src/parse/mod.rs | 23 +- src/parse/parser.rs | 10 +- src/parse/tokens.rs | 49 ++-- src/source.rs | 4 +- src/syntax/ast.rs | 108 +++---- src/syntax/highlight.rs | 283 +++++++++--------- src/syntax/kind.rs | 548 ++++++++++++++++++++++++++++++++++ src/syntax/mod.rs | 614 +-------------------------------------- tests/typ/code/array.typ | 4 +- tests/typ/code/block.typ | 2 +- tests/typ/code/call.typ | 2 +- tests/typ/text/link.typ | 4 +- 16 files changed, 848 insertions(+), 903 deletions(-) create mode 100644 src/syntax/kind.rs diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 23f829b3a..d47275129 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -66,7 +66,7 @@ fn bench_edit(iai: &mut Iai) { fn bench_highlight(iai: &mut Iai) { let source = Source::detached(TEXT); iai.run(|| { - typst::syntax::highlight_node( + typst::syntax::highlight::highlight_categories( source.root(), 0 .. source.len_bytes(), &mut |_, _| {}, diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 5bbac77e2..cc9d3422e 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -133,25 +133,25 @@ pub trait Eval { fn eval(&self, vm: &mut Vm) -> SourceResult; } -impl Eval for Markup { +impl Eval for MarkupNode { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { - eval_markup(vm, &mut self.nodes()) + eval_markup(vm, &mut self.items()) } } /// Evaluate a stream of markup nodes. fn eval_markup( vm: &mut Vm, - nodes: &mut impl Iterator, + nodes: &mut impl Iterator, ) -> SourceResult { let flow = vm.flow.take(); let mut seq = Vec::with_capacity(nodes.size_hint().1.unwrap_or_default()); while let Some(node) = nodes.next() { seq.push(match node { - MarkupNode::Expr(Expr::Set(set)) => { + MarkupItem::Expr(Expr::Set(set)) => { let styles = set.eval(vm)?; if vm.flow.is_some() { break; @@ -159,7 +159,7 @@ fn eval_markup( eval_markup(vm, nodes)?.styled_with_map(styles) } - MarkupNode::Expr(Expr::Show(show)) => { + MarkupItem::Expr(Expr::Show(show)) => { let recipe = show.eval(vm)?; if vm.flow.is_some() { break; @@ -168,7 +168,7 @@ fn eval_markup( eval_markup(vm, nodes)? .styled_with_entry(StyleEntry::Recipe(recipe).into()) } - MarkupNode::Expr(Expr::Wrap(wrap)) => { + MarkupItem::Expr(Expr::Wrap(wrap)) => { let tail = eval_markup(vm, nodes)?; vm.scopes.top.define(wrap.binding().take(), tail); wrap.body().eval(vm)?.display() @@ -189,7 +189,7 @@ fn eval_markup( Ok(Content::sequence(seq)) } -impl Eval for MarkupNode { +impl Eval for MarkupItem { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { @@ -252,12 +252,12 @@ impl Eval for RawNode { } } -impl Eval for Math { +impl Eval for MathNode { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { let nodes = - self.nodes().map(|node| node.eval(vm)).collect::>()?; + self.items().map(|node| node.eval(vm)).collect::>()?; Ok(Content::show(library::math::MathNode::Row( Arc::new(nodes), self.span(), @@ -265,7 +265,7 @@ impl Eval for Math { } } -impl Eval for MathNode { +impl Eval for MathItem { type Output = library::math::MathNode; fn eval(&self, vm: &mut Vm) -> SourceResult { @@ -278,7 +278,7 @@ impl Eval for MathNode { Self::Align(node) => node.eval(vm)?, Self::Group(node) => library::math::MathNode::Row( Arc::new( - node.nodes() + node.items() .map(|node| node.eval(vm)) .collect::>()?, ), @@ -346,7 +346,7 @@ impl Eval for HeadingNode { } } -impl Eval for ListNode { +impl Eval for ListItem { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { @@ -355,7 +355,7 @@ impl Eval for ListNode { } } -impl Eval for EnumNode { +impl Eval for EnumItem { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { @@ -367,7 +367,7 @@ impl Eval for EnumNode { } } -impl Eval for DescNode { +impl Eval for DescItem { type Output = Content; fn eval(&self, vm: &mut Vm) -> SourceResult { diff --git a/src/library/text/raw.rs b/src/library/text/raw.rs index a64b1a924..8b0874f82 100644 --- a/src/library/text/raw.rs +++ b/src/library/text/raw.rs @@ -8,8 +8,6 @@ use syntect::parsing::SyntaxSet; use super::{FontFamily, Hyphenate, TextNode}; use crate::library::layout::BlockSpacing; use crate::library::prelude::*; -use crate::parse::TokenMode; -use crate::syntax; /// Monospaced text with optional syntax highlighting. #[derive(Debug, Hash)] @@ -73,14 +71,14 @@ impl Show for RawNode { .into(); let mut realized = if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) { - let mode = match lang.as_deref() { - Some("typc") => TokenMode::Code, - _ => TokenMode::Markup, + let root = match lang.as_deref() { + Some("typc") => crate::parse::parse_code(&self.text), + _ => crate::parse::parse(&self.text), }; let mut seq = vec![]; - syntax::highlight_themed(&self.text, mode, &THEME, |piece, style| { - seq.push(styled(piece, foreground, style)); + crate::syntax::highlight::highlight_themed(&root, &THEME, |range, style| { + seq.push(styled(&self.text[range], foreground, style)); }); Content::sequence(seq) @@ -167,24 +165,29 @@ pub static THEME: Lazy = Lazy::new(|| Theme { author: Some("The Typst Project Developers".into()), settings: ThemeSettings::default(), scopes: vec![ + item("comment", Some("#8a8a8a"), None), + item("constant.character.escape", Some("#1d6c76"), None), + item("constant.character.shortcut", Some("#1d6c76"), None), item("markup.bold", None, Some(FontStyle::BOLD)), item("markup.italic", None, Some(FontStyle::ITALIC)), + item("markup.underline", None, Some(FontStyle::UNDERLINE)), + item("markup.raw", Some("#818181"), None), + item("string.other.math.typst", None, None), + item("punctuation.definition.math", Some("#298e0d"), None), + item("keyword.operator.math", Some("#1d6c76"), None), item("markup.heading, entity.name.section", None, Some(FontStyle::BOLD)), item("markup.heading.typst", None, Some(FontStyle::BOLD | FontStyle::UNDERLINE)), - item("markup.raw", Some("#818181"), None), - item("markup.list", Some("#8b41b1"), None), - item("comment", Some("#8a8a8a"), None), - item("punctuation.shortcut", Some("#1d6c76"), None), - item("constant.character.escape", Some("#1d6c76"), None), + item("punctuation.definition.list", Some("#8b41b1"), None), + item("markup.list.term", None, Some(FontStyle::BOLD)), item("entity.name.label, markup.other.reference", Some("#1d6c76"), None), item("keyword, constant.language, variable.language", Some("#d73a49"), None), item("storage.type, storage.modifier", Some("#d73a49"), None), - item("entity.other", Some("#8b41b1"), None), + item("constant", Some("#b60157"), None), + item("string", Some("#298e0d"), None), item("entity.name, variable.function, support", Some("#4b69c6"), None), item("support.macro", Some("#16718d"), None), item("meta.annotation", Some("#301414"), None), - item("constant", Some("#b60157"), None), - item("string", Some("#298e0d"), None), + item("entity.other, meta.interpolation", Some("#8b41b1"), None), item("invalid", Some("#ff0000"), None), ], }); diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 06096a752..e0be9b6d3 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -96,11 +96,10 @@ fn try_reparse( && (ahead.is_none() || change.replaced.start > child_span.end) && !ahead.map_or(false, Ahead::is_compulsory) { - ahead = - Some(Ahead::new(pos, at_start, child.kind().is_bounded())); + ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind()))); } - at_start = child.kind().is_at_start(at_start); + at_start = next_at_start(child.kind(), at_start); } } SearchState::Inside(start) => { @@ -137,7 +136,7 @@ fn try_reparse( if let SearchState::Contained(pos) = search { // Do not allow replacement of elements inside of constructs whose // opening and closing brackets look the same. - let safe_inside = node.kind().is_bounded(); + let safe_inside = is_bounded(node.kind()); let child = &mut node.children_mut()[pos.idx]; let prev_len = child.len(); let prev_descendants = child.descendants(); @@ -384,6 +383,36 @@ enum ReparseMode { MarkupElements { at_start: bool, min_indent: usize }, } +/// Whether changes _inside_ this node are safely encapsulated, so that only +/// this node must be reparsed. +fn is_bounded(kind: &NodeKind) -> bool { + match kind { + NodeKind::CodeBlock + | NodeKind::ContentBlock + | NodeKind::Backslash + | NodeKind::Tilde + | NodeKind::HyphQuest + | NodeKind::Hyph2 + | NodeKind::Hyph3 + | NodeKind::Dot3 + | NodeKind::Quote { .. } + | NodeKind::BlockComment + | NodeKind::Space { .. } + | NodeKind::Escape(_) => true, + _ => false, + } +} + +/// Whether `at_start` would still be true after this node given the +/// previous value of the property. +fn next_at_start(kind: &NodeKind, prev: bool) -> bool { + match kind { + NodeKind::Space { newlines: (1 ..) } => true, + NodeKind::Space { .. } | NodeKind::LineComment | NodeKind::BlockComment => prev, + _ => false, + } +} + #[cfg(test)] #[rustfmt::skip] mod tests { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 7eb7343b6..832c297e5 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -22,17 +22,6 @@ pub fn parse(text: &str) -> SyntaxNode { p.finish().into_iter().next().unwrap() } -/// Parse math directly, only used for syntax highlighting. -pub fn parse_math(text: &str) -> SyntaxNode { - let mut p = Parser::new(text, TokenMode::Math); - p.perform(NodeKind::Math, |p| { - while !p.eof() { - math_node(p); - } - }); - p.finish().into_iter().next().unwrap() -} - /// Parse code directly, only used for syntax highlighting. pub fn parse_code(text: &str) -> SyntaxNode { let mut p = Parser::new(text, TokenMode::Code); @@ -250,7 +239,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Text and markup. NodeKind::Text(_) - | NodeKind::Linebreak { .. } + | NodeKind::Backslash | NodeKind::Tilde | NodeKind::HyphQuest | NodeKind::Hyph2 @@ -353,7 +342,7 @@ fn list_node(p: &mut Parser, at_start: bool) { let min_indent = p.column(p.prev_end()); if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() { markup_indented(p, min_indent); - marker.end(p, NodeKind::List); + marker.end(p, NodeKind::ListItem); } else { marker.convert(p, NodeKind::Text(text)); } @@ -368,7 +357,7 @@ fn enum_node(p: &mut Parser, at_start: bool) { let min_indent = p.column(p.prev_end()); if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() { markup_indented(p, min_indent); - marker.end(p, NodeKind::Enum); + marker.end(p, NodeKind::EnumItem); } else { marker.convert(p, NodeKind::Text(text)); } @@ -385,7 +374,7 @@ fn desc_node(p: &mut Parser, at_start: bool) -> ParseResult { markup_line(p, |node| matches!(node, NodeKind::Colon)); p.expect(NodeKind::Colon)?; markup_indented(p, min_indent); - marker.end(p, NodeKind::Desc); + marker.end(p, NodeKind::DescItem); } else { marker.convert(p, NodeKind::Text(text)); } @@ -485,7 +474,7 @@ fn math_primary(p: &mut Parser) { match token { // Spaces, atoms and expressions. NodeKind::Space { .. } - | NodeKind::Linebreak + | NodeKind::Backslash | NodeKind::Escape(_) | NodeKind::Atom(_) | NodeKind::Ident(_) => p.eat(), @@ -820,7 +809,7 @@ fn item(p: &mut Parser, keyed: bool) -> ParseResult { } if let Some(kind) = kind { msg.push_str(", found "); - msg.push_str(kind.as_str()); + msg.push_str(kind.name()); } let error = NodeKind::Error(SpanPos::Full, msg); marker.end(p, error); diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 12dd324bd..4b73c2b98 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -159,7 +159,7 @@ impl<'s> Parser<'s> { self.eat(); Ok(()) } else { - self.expected(kind.as_str()); + self.expected(kind.name()); Err(ParseError) } } @@ -293,7 +293,7 @@ impl<'s> Parser<'s> { self.stray_terminator = s; rescan = false; } else if required { - self.expected(end.as_str()); + self.expected(end.name()); self.unterminated_group = true; } } @@ -397,7 +397,7 @@ impl Parser<'_> { /// Eat the current token and add an error that it is unexpected. pub fn unexpected(&mut self) { if let Some(found) = self.peek() { - let msg = format_eco!("unexpected {}", found); + let msg = format_eco!("unexpected {}", found.name()); let error = NodeKind::Error(SpanPos::Full, msg); self.perform(error, Self::eat); } @@ -421,7 +421,7 @@ impl Parser<'_> { pub fn expected_found(&mut self, thing: &str) { match self.peek() { Some(found) => { - let msg = format_eco!("expected {}, found {}", thing, found); + let msg = format_eco!("expected {}, found {}", thing, found.name()); let error = NodeKind::Error(SpanPos::Full, msg); self.perform(error, Self::eat); } @@ -492,7 +492,7 @@ impl Marker { let mut msg = EcoString::from(msg); if msg.starts_with("expected") { msg.push_str(", found "); - msg.push_str(child.kind().as_str()); + msg.push_str(child.kind().name()); } let error = NodeKind::Error(SpanPos::Full, msg); let inner = mem::take(child); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index d495afa01..d3c497f34 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -108,7 +108,9 @@ impl<'s> Iterator for Tokens<'s> { // Trivia. '/' if self.s.eat_if('/') => self.line_comment(), '/' if self.s.eat_if('*') => self.block_comment(), - '*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()), + '*' if self.s.eat_if('/') => { + NodeKind::Error(SpanPos::Full, "unexpected end of block comment".into()) + } c if c.is_whitespace() => self.whitespace(c), // Other things. @@ -288,8 +290,8 @@ impl<'s> Tokens<'s> { } // Linebreaks. - Some(c) if c.is_whitespace() => NodeKind::Linebreak, - None => NodeKind::Linebreak, + Some(c) if c.is_whitespace() => NodeKind::Backslash, + None => NodeKind::Backslash, // Escapes. Some(c) => { @@ -517,7 +519,7 @@ impl<'s> Tokens<'s> { '"' => self.string(), // Invalid token. - _ => NodeKind::Unknown(self.s.from(start).into()), + _ => NodeKind::Error(SpanPos::Full, "not valid here".into()), } } @@ -556,7 +558,6 @@ impl<'s> Tokens<'s> { let number = self.s.get(start .. suffix_start); let suffix = self.s.from(suffix_start); - let all = self.s.from(start); // Find out whether it is a simple number. if suffix.is_empty() { @@ -577,10 +578,10 @@ impl<'s> Tokens<'s> { "em" => NodeKind::Numeric(f, Unit::Em), "fr" => NodeKind::Numeric(f, Unit::Fr), "%" => NodeKind::Numeric(f, Unit::Percent), - _ => NodeKind::Unknown(all.into()), + _ => NodeKind::Error(SpanPos::Full, "invalid number suffix".into()), } } else { - NodeKind::Unknown(all.into()) + NodeKind::Error(SpanPos::Full, "invalid number".into()) } } @@ -745,10 +746,6 @@ mod tests { NodeKind::Error(pos, message.into()) } - fn Invalid(invalid: &str) -> NodeKind { - NodeKind::Unknown(invalid.into()) - } - /// Building blocks for suffix testing. /// /// We extend each test case with a collection of different suffixes to make @@ -926,7 +923,7 @@ mod tests { t!(Markup: "_" => Underscore); t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup["a1/"]: "= " => Eq, Space(0)); - t!(Markup[" "]: r"\" => Linebreak); + t!(Markup[" "]: r"\" => Backslash); t!(Markup: "~" => Tilde); t!(Markup["a1/"]: "-?" => HyphQuest); t!(Markup["a "]: r"a--" => Text("a"), Hyph2); @@ -972,6 +969,9 @@ mod tests { t!(Code[" /"]: "--1" => Minus, Minus, Int(1)); t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a")); t!(Code[" /"]: "a-b" => Ident("a-b")); + + // Test invalid. + t!(Code: r"\" => Error(Full, "not valid here")); } #[test] @@ -1107,6 +1107,9 @@ mod tests { t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2)); t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3)); t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3)); + + // Test invalid. + t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix")); } #[test] @@ -1161,25 +1164,9 @@ mod tests { t!(Both[""]: "/*/*" => BlockComment); t!(Both[""]: "/**/" => BlockComment); t!(Both[""]: "/***" => BlockComment); - } - #[test] - fn test_tokenize_invalid() { - // Test invalidly closed block comments. - t!(Both: "*/" => Invalid("*/")); - t!(Both: "/**/*/" => BlockComment, Invalid("*/")); - - // Test invalid expressions. - t!(Code: r"\" => Invalid(r"\")); - t!(Code: "πŸŒ“" => Invalid("πŸŒ“")); - t!(Code: r"\:" => Invalid(r"\"), Colon); - t!(Code: "meal⌚" => Ident("meal"), Invalid("⌚")); - t!(Code[" /"]: r"\a" => Invalid(r"\"), Ident("a")); - t!(Code[" /"]: "#" => Invalid("#")); - - // Test invalid number suffixes. - t!(Code[" /"]: "1foo" => Invalid("1foo")); - t!(Code: "1p%" => Invalid("1p"), Invalid("%")); - t!(Code: "1%%" => Numeric(1.0, Unit::Percent), Invalid("%")); + // Test unexpected terminator. + t!(Both: "/*Hi*/*/" => BlockComment, + Error(Full, "unexpected end of block comment")); } } diff --git a/src/source.rs b/src/source.rs index 0ada1b041..978b99867 100644 --- a/src/source.rs +++ b/src/source.rs @@ -10,7 +10,7 @@ use unscanny::Scanner; use crate::diag::SourceResult; use crate::parse::{is_newline, parse, reparse}; -use crate::syntax::ast::Markup; +use crate::syntax::ast::MarkupNode; use crate::syntax::{Span, SyntaxNode}; use crate::util::{PathExt, StrExt}; @@ -64,7 +64,7 @@ impl Source { } /// The root node of the file's typed abstract syntax tree. - pub fn ast(&self) -> SourceResult { + pub fn ast(&self) -> SourceResult { let errors = self.root.errors(); if errors.is_empty() { Ok(self.root.cast().expect("root node must be markup")) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 6a016e795..aa590da29 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,6 +1,6 @@ //! A typed layer over the untyped syntax tree. //! -//! The AST is rooted in the [`Markup`] node. +//! The AST is rooted in the [`MarkupNode`]. use std::num::NonZeroUsize; use std::ops::Deref; @@ -54,19 +54,19 @@ macro_rules! node { node! { /// The syntactical root capable of representing a full parsed document. - Markup: NodeKind::Markup { .. } + MarkupNode: NodeKind::Markup { .. } } -impl Markup { - /// The markup nodes. - pub fn nodes(&self) -> impl Iterator + '_ { +impl MarkupNode { + /// The children. + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(SyntaxNode::cast) } } /// A single piece of markup. #[derive(Debug, Clone, PartialEq)] -pub enum MarkupNode { +pub enum MarkupItem { /// Whitespace containing less than two newlines. Space, /// A forced line break. @@ -81,34 +81,34 @@ pub enum MarkupNode { Strong(StrongNode), /// Emphasized content: `_Emphasized_`. Emph(EmphNode), - /// A hyperlink. + /// A hyperlink: `https://typst.org`. Link(EcoString), /// A raw block with optional syntax highlighting: `` `...` ``. Raw(RawNode), - /// A math formula: `$a^2 = b^2 + c^2$`. - Math(Math), + /// A math formula: `$x$`, `$ x^2 $`. + Math(MathNode), /// A section heading: `= Introduction`. Heading(HeadingNode), /// An item in an unordered list: `- ...`. - List(ListNode), + List(ListItem), /// An item in an enumeration (ordered list): `+ ...` or `1. ...`. - Enum(EnumNode), - /// An item in a description list: `/ Term: Details. - Desc(DescNode), - /// A label. + Enum(EnumItem), + /// An item in a description list: `/ Term: Details`. + Desc(DescItem), + /// A label: `