From 65fac0e57c9852eb2131aa06c0bac43b70bfbfbc Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 2 Nov 2021 12:13:45 +0100 Subject: [PATCH] Refactoring Co-Authored-By: Martin --- src/diag.rs | 2 +- src/eval/capture.rs | 3 +- src/eval/mod.rs | 5 +- src/eval/walk.rs | 2 +- src/geom/relative.rs | 2 +- src/lib.rs | 2 +- src/parse/mod.rs | 3 +- src/parse/parser.rs | 8 +- src/parse/scanner.rs | 49 ++++++++--- src/parse/tokens.rs | 108 ++++++++++++------------ src/source.rs | 3 +- src/syntax/ast.rs | 141 +++++++++++++++++++++++-------- src/syntax/ident.rs | 94 --------------------- src/syntax/mod.rs | 197 ++++++++++++++++++++----------------------- src/syntax/pretty.rs | 2 +- src/syntax/span.rs | 130 ++++++---------------------- tests/typeset.rs | 14 +-- 17 files changed, 338 insertions(+), 427 deletions(-) delete mode 100644 src/syntax/ident.rs diff --git a/src/diag.rs b/src/diag.rs index f04553107..d284687ed 100644 --- a/src/diag.rs +++ b/src/diag.rs @@ -100,7 +100,7 @@ impl Trace for TypResult { { self.map_err(|mut errors| { for error in errors.iter_mut() { - if !span.contains(error.span) { + if !span.surrounds(error.span) { error.trace.push(Spanned::new(make_point(), span)); } } diff --git a/src/eval/capture.rs b/src/eval/capture.rs index b71e1ac18..e46103c8b 100644 --- a/src/eval/capture.rs +++ b/src/eval/capture.rs @@ -1,7 +1,8 @@ use std::rc::Rc; use super::{Scope, Scopes, Value}; -use crate::syntax::{ClosureParam, Expr, Imports, RedRef}; +use crate::syntax::ast::{ClosureParam, Expr, Imports}; +use crate::syntax::RedRef; /// A visitor that captures variable slots. pub struct CapturesVisitor<'a> { diff --git a/src/eval/mod.rs b/src/eval/mod.rs index ba266ea58..809209f46 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -36,7 +36,8 @@ use crate::geom::{Angle, Fractional, Length, Relative}; use crate::image::ImageStore; use crate::loading::Loader; use crate::source::{SourceId, SourceStore}; -use crate::syntax::*; +use crate::syntax::ast::*; +use crate::syntax::{Span, Spanned}; use crate::util::RefMutExt; use crate::Context; @@ -238,7 +239,7 @@ impl Eval for DictExpr { fn eval(&self, ctx: &mut EvalContext) -> TypResult { self.items() - .map(|x| Ok(((&x.name().string).into(), x.expr().eval(ctx)?))) + .map(|x| Ok((x.name().string.into(), x.expr().eval(ctx)?))) .collect() } } diff --git a/src/eval/walk.rs b/src/eval/walk.rs index e4f8ac7b8..ff73f9f90 100644 --- a/src/eval/walk.rs +++ b/src/eval/walk.rs @@ -5,7 +5,7 @@ use crate::diag::TypResult; use crate::geom::Spec; use crate::layout::BlockLevel; use crate::library::{GridNode, ParChild, ParNode, TrackSizing}; -use crate::syntax::*; +use crate::syntax::ast::*; use crate::util::BoolExt; /// Walk markup, filling the currently built template. diff --git a/src/geom/relative.rs b/src/geom/relative.rs index c2d0a0cb0..754aa6c85 100644 --- a/src/geom/relative.rs +++ b/src/geom/relative.rs @@ -3,7 +3,7 @@ use super::*; /// A relative length. /// /// _Note_: `50%` is represented as `0.5` here, but stored as `50.0` in the -/// corresponding [literal](crate::syntax::Lit::Percent). +/// corresponding [literal](crate::syntax::ast::Lit::Percent). #[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Relative(N64); diff --git a/src/lib.rs b/src/lib.rs index 468c06d8c..033230f0b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ //! //! [tokens]: parse::Tokens //! [parsed]: parse::parse -//! [markup]: syntax::Markup +//! [markup]: syntax::ast::Markup //! [evaluate]: eval::eval //! [module]: eval::Module //! [layout tree]: layout::LayoutTree diff --git a/src/parse/mod.rs b/src/parse/mod.rs index c6def4dcc..bfe938960 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -12,7 +12,8 @@ pub use tokens::*; use std::rc::Rc; -use crate::syntax::*; +use crate::syntax::ast::{Associativity, BinOp, UnOp}; +use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; use crate::util::EcoString; /// Parse a source file. diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 8c68d6308..5833c724a 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,7 +1,7 @@ use std::ops::Range; use std::rc::Rc; -use super::{is_newline, TokenMode, Tokens}; +use super::{TokenMode, Tokens}; use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind}; use crate::util::EcoString; @@ -375,11 +375,7 @@ impl<'s> Parser<'s> { /// Determine the column index for the given byte index. pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() + self.tokens.column(index) } /// Slice out part of the source string. diff --git a/src/parse/scanner.rs b/src/parse/scanner.rs index edf28e179..92a2333d4 100644 --- a/src/parse/scanner.rs +++ b/src/parse/scanner.rs @@ -1,5 +1,7 @@ use std::slice::SliceIndex; +use unicode_xid::UnicodeXID; + /// A featureful char-based scanner. #[derive(Copy, Clone)] pub struct Scanner<'s> { @@ -106,16 +108,6 @@ impl<'s> Scanner<'s> { self.index } - /// The column index of a given index in the source string. - #[inline] - pub fn column(&self, index: usize) -> usize { - self.src[.. index] - .chars() - .rev() - .take_while(|&c| !is_newline(c)) - .count() - } - /// Jump to an index in the source string. #[inline] pub fn jump(&mut self, index: usize) { @@ -124,6 +116,12 @@ impl<'s> Scanner<'s> { self.index = index; } + /// The full source string. + #[inline] + pub fn src(&self) -> &'s str { + &self.src + } + /// Slice out part of the source string. #[inline] pub fn get(&self, index: I) -> &'s str @@ -160,6 +158,16 @@ impl<'s> Scanner<'s> { // optimized away in some cases. self.src.get(start .. self.index).unwrap_or_default() } + + /// The column index of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.src[.. index] + .chars() + .rev() + .take_while(|&c| !is_newline(c)) + .count() + } } /// Whether this character denotes a newline. @@ -173,3 +181,24 @@ pub fn is_newline(character: char) -> bool { '\u{0085}' | '\u{2028}' | '\u{2029}' ) } + +/// Whether a string is a valid identifier. +#[inline] +pub fn is_ident(string: &str) -> bool { + let mut chars = string.chars(); + chars + .next() + .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) +} + +/// Whether a character can start an identifier. +#[inline] +pub fn is_id_start(c: char) -> bool { + c.is_xid_start() || c == '_' +} + +/// Whether a character can continue an identifier. +#[inline] +pub fn is_id_continue(c: char) -> bool { + c.is_xid_continue() || c == '_' || c == '-' +} diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index ef2678d4c..aa28e1f50 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,11 +1,13 @@ -use super::{is_newline, resolve_raw, Scanner}; +use std::rc::Rc; + +use super::{ + is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string, + Scanner, +}; use crate::geom::{AngularUnit, LengthUnit}; -use crate::parse::resolve::{resolve_hex, resolve_string}; use crate::syntax::*; use crate::util::EcoString; -use std::rc::Rc; - /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { s: Scanner<'s>, @@ -55,6 +57,12 @@ impl<'s> Tokens<'s> { self.s.jump(index); } + /// The column of a given index in the source string. + #[inline] + pub fn column(&self, index: usize) -> usize { + self.s.column(index) + } + /// The underlying scanner. #[inline] pub fn scanner(&self) -> Scanner<'s> { @@ -237,10 +245,8 @@ impl<'s> Tokens<'s> { let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); if self.s.eat_if('}') { - if let Some(character) = resolve_hex(&sequence) { - NodeKind::UnicodeEscape(UnicodeEscapeData { - character, - }) + if let Some(c) = resolve_hex(&sequence) { + NodeKind::UnicodeEscape(c) } else { NodeKind::Error( ErrorPosition::Full, @@ -308,7 +314,8 @@ impl<'s> Tokens<'s> { } fn raw(&mut self) -> NodeKind { - let column = self.s.column(self.s.index() - 1); + let column = self.column(self.s.index() - 1); + let mut backticks = 1; while self.s.eat_if('`') && backticks < u8::MAX { backticks += 1; @@ -486,7 +493,7 @@ impl<'s> Tokens<'s> { } })); if self.s.eat_if('"') { - NodeKind::Str(StrData { string }) + NodeKind::Str(string) } else { NodeKind::Error(ErrorPosition::End, "expected quote".into()) } @@ -556,12 +563,13 @@ mod tests { use super::*; + use ErrorPosition::*; use NodeKind::*; use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(character: char) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeData { character }) + fn UnicodeEscape(c: char) -> NodeKind { + NodeKind::UnicodeEscape(c) } fn Error(pos: ErrorPosition, message: &str) -> NodeKind { @@ -577,24 +585,12 @@ mod tests { })) } - fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind { - match err_msg { - None => { - NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) - } - Some(msg) => NodeKind::Error( - ErrorPosition::End, - format!("expected closing {}", msg).into(), - ), - } + fn Math(formula: &str, display: bool) -> NodeKind { + NodeKind::Math(Rc::new(MathData { formula: formula.into(), display })) } - fn Str(string: &str, terminated: bool) -> NodeKind { - if terminated { - NodeKind::Str(StrData { string: string.into() }) - } else { - NodeKind::Error(ErrorPosition::End, "expected quote".into()) - } + fn Str(string: &str) -> NodeKind { + NodeKind::Str(string.into()) } fn Text(string: &str) -> NodeKind { @@ -659,7 +655,7 @@ mod tests { ('/', None, "//", LineComment), ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), - ('/', Some(Markup), "$ $", Math(" ", false, None)), + ('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), @@ -781,16 +777,16 @@ mod tests { t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\"")); // Test basic unicode escapes. - t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); - t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence")); + t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. - t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace")); - t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace); + t!(Markup[" /"]: r"\u{" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{26A4" => Error(End, "expected closing brace")); + t!(Markup[" /"]: r"\u{1Q3P" => Error(End, "expected closing brace")); + t!(Markup: r"\u{1🏕}" => Error(End, "expected closing brace"), Text("🏕"), RightBrace); } #[test] @@ -882,11 +878,11 @@ mod tests { // Test basic raw block. t!(Markup: "``" => Raw("", None, 1, false)); t!(Markup: "`raw`" => Raw("raw", None, 1, false)); - t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: "`]" => Error(End, "expected 1 backtick")); // Test special symbols in raw block. t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick")); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(End, "expected 1 backtick")); // Test separated closing backticks. t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false)); @@ -894,28 +890,28 @@ mod tests { // Test more backticks. t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false)); t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks")); + t!(Markup[""]: "`````👩‍🚀````noend" => Error(End, "expected 5 backticks")); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false)); } #[test] fn test_tokenize_math_formulas() { // Test basic formula. - t!(Markup: "$$" => Math("", false, None)); - t!(Markup: "$x$" => Math("x", false, None)); - t!(Markup: r"$\\$" => Math(r"\\", false, None)); - t!(Markup: "$[x + y]$" => Math("x + y", true, None)); - t!(Markup: r"$[\\]$" => Math(r"\\", true, None)); + t!(Markup: "$$" => Math("", false)); + t!(Markup: "$x$" => Math("x", false)); + t!(Markup: r"$\\$" => Math(r"\\", false)); + t!(Markup: "$[x + y]$" => Math("x + y", true)); + t!(Markup: r"$[\\]$" => Math(r"\\", true)); // Test unterminated. - t!(Markup[""]: "$x" => Math("x", false, Some("dollar sign"))); - t!(Markup[""]: "$[x" => Math("x", true, Some("bracket and dollar sign"))); - t!(Markup[""]: "$[x]\n$" => Math("x]\n$", true, Some("bracket and dollar sign"))); + t!(Markup[""]: "$x" => Error(End, "expected closing dollar sign")); + t!(Markup[""]: "$[x" => Error(End, "expected closing bracket and dollar sign")); + t!(Markup[""]: "$[x]\n$" => Error(End, "expected closing bracket and dollar sign")); // Test escape sequences. - t!(Markup: r"$\$x$" => Math(r"\$x", false, None)); - t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true, None)); - t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, Some("bracket and dollar sign"))); + t!(Markup: r"$\$x$" => Math(r"\$x", false)); + t!(Markup: r"$[\\\]$]$" => Math(r"\\\]$", true)); + t!(Markup[""]: r"$[ ]\\$" => Error(End, "expected closing bracket and dollar sign")); } #[test] @@ -1003,16 +999,16 @@ mod tests { #[test] fn test_tokenize_strings() { // Test basic strings. - t!(Code: "\"hi\"" => Str("hi", true)); - t!(Code: "\"hi\nthere\"" => Str("hi\nthere", true)); - t!(Code: "\"🌎\"" => Str("🌎", true)); + t!(Code: "\"hi\"" => Str("hi")); + t!(Code: "\"hi\nthere\"" => Str("hi\nthere")); + t!(Code: "\"🌎\"" => Str("🌎")); // Test unterminated. - t!(Code[""]: "\"hi" => Str("hi", false)); + t!(Code[""]: "\"hi" => Error(End, "expected quote")); // Test escaped quote. - t!(Code: r#""a\"bc""# => Str("a\"bc", true)); - t!(Code[""]: r#""\""# => Str("\"", false)); + t!(Code: r#""a\"bc""# => Str("a\"bc")); + t!(Code[""]: r#""\""# => Error(End, "expected quote")); } #[test] diff --git a/src/source.rs b/src/source.rs index 3b7212514..46d6b84bb 100644 --- a/src/source.rs +++ b/src/source.rs @@ -11,7 +11,8 @@ use serde::{Deserialize, Serialize}; use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Scanner}; -use crate::syntax::{GreenNode, Markup, RedNode}; +use crate::syntax::ast::Markup; +use crate::syntax::{GreenNode, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 6ca271a96..9ad04be58 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,7 +1,18 @@ -use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode}; +//! A typed layer over the red-green tree. + +use std::ops::Deref; + +use super::{NodeKind, RedNode, RedRef, Span}; use crate::geom::{AngularUnit, LengthUnit}; +use crate::parse::is_ident; use crate::util::EcoString; +/// A typed AST node. +pub trait TypedNode: Sized { + /// Convert from a red node to a typed node. + fn from_red(value: RedRef) -> Option; +} + macro_rules! node { ($(#[$attr:meta])* $name:ident) => { node!{$(#[$attr])* $name => $name} @@ -13,7 +24,7 @@ macro_rules! node { pub struct $name(RedNode); impl TypedNode for $name { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { if node.kind() != &NodeKind::$variant { return None; } @@ -23,10 +34,12 @@ macro_rules! node { } impl $name { + /// The source code location. pub fn span(&self) -> Span { self.0.span() } + /// The underlying red node. pub fn underlying(&self) -> RedRef { self.0.as_ref() } @@ -40,7 +53,8 @@ node! { } impl Markup { - pub fn nodes<'a>(&'a self) -> impl Iterator + 'a { + /// The markup nodes. + pub fn nodes(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -73,7 +87,7 @@ pub enum MarkupNode { } impl TypedNode for MarkupNode { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Space(_) => Some(MarkupNode::Space), NodeKind::Linebreak => Some(MarkupNode::Linebreak), @@ -81,17 +95,14 @@ impl TypedNode for MarkupNode { NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())), - NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))), - NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))), - NodeKind::NonBreakingSpace => { - Some(MarkupNode::Text(EcoString::from("\u{00A0}"))) - } + NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())), + NodeKind::EnDash => Some(MarkupNode::Text("\u{2013}".into())), + NodeKind::EmDash => Some(MarkupNode::Text("\u{2014}".into())), + NodeKind::NonBreakingSpace => Some(MarkupNode::Text("\u{00A0}".into())), NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw), NodeKind::Heading => node.cast().map(MarkupNode::Heading), NodeKind::List => node.cast().map(MarkupNode::List), NodeKind::Enum => node.cast().map(MarkupNode::Enum), - NodeKind::Error(_, _) => None, _ => node.cast().map(MarkupNode::Expr), } } @@ -111,16 +122,16 @@ pub struct RawNode { } impl TypedNode for RawNode { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Raw(raw) => { - let span = node.span(); - let start = span.start + raw.backticks as usize; + let full = node.span(); + let start = full.start + raw.backticks as usize; Some(Self { block: raw.block, - lang: raw.lang.as_ref().and_then(|x| { - let span = Span::new(span.source, start, start + x.len()); - Ident::new(x, span) + lang: raw.lang.as_ref().and_then(|lang| { + let span = Span::new(full.source, start, start + lang.len()); + Ident::new(lang, span) }), text: raw.text.clone(), }) @@ -272,7 +283,7 @@ impl Expr { } impl TypedNode for Expr { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Ident(_) => node.cast().map(Self::Ident), NodeKind::Array => node.cast().map(Self::Array), @@ -325,7 +336,7 @@ pub enum Lit { } impl TypedNode for Lit { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::None => Some(Self::None(node.span())), NodeKind::Auto => Some(Self::Auto(node.span())), @@ -336,13 +347,14 @@ impl TypedNode for Lit { NodeKind::Angle(f, unit) => Some(Self::Angle(node.span(), *f, *unit)), NodeKind::Percentage(f) => Some(Self::Percent(node.span(), *f)), NodeKind::Fraction(f) => Some(Self::Fractional(node.span(), *f)), - NodeKind::Str(s) => Some(Self::Str(node.span(), s.string.clone())), + NodeKind::Str(s) => Some(Self::Str(node.span(), s.clone())), _ => None, } } } impl Lit { + /// The source code location. pub fn span(&self) -> Span { match self { Self::None(span) => *span, @@ -366,7 +378,7 @@ node! { impl ArrayExpr { /// The array items. - pub fn items<'a>(&'a self) -> impl Iterator + 'a { + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -378,7 +390,7 @@ node! { impl DictExpr { /// The named dictionary items. - pub fn items<'a>(&'a self) -> impl Iterator + 'a { + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -439,7 +451,7 @@ node! { impl BlockExpr { /// The list of expressions contained in the block. - pub fn exprs<'a>(&'a self) -> impl Iterator + 'a { + pub fn exprs(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -477,7 +489,7 @@ pub enum UnOp { } impl TypedNode for UnOp { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { Self::from_token(node.kind()) } } @@ -581,7 +593,7 @@ pub enum BinOp { } impl TypedNode for BinOp { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { Self::from_token(node.kind()) } } @@ -709,7 +721,7 @@ node! { impl CallArgs { /// The positional and named arguments. - pub fn items<'a>(&'a self) -> impl Iterator + 'a { + pub fn items(&self) -> impl Iterator + '_ { self.0.children().filter_map(RedRef::cast) } } @@ -726,7 +738,7 @@ pub enum CallArg { } impl TypedNode for CallArg { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Named => Some(CallArg::Named( node.cast().expect("named call argument is missing name"), @@ -767,7 +779,7 @@ impl ClosureExpr { } /// The parameter bindings. - pub fn params<'a>(&'a self) -> impl Iterator + 'a { + pub fn params(&self) -> impl Iterator + '_ { self.0 .children() .find(|x| x.kind() == &NodeKind::ClosureParams) @@ -805,10 +817,10 @@ pub enum ClosureParam { } impl TypedNode for ClosureParam { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { - NodeKind::Ident(i) => { - Some(ClosureParam::Pos(Ident::new(i, node.span()).unwrap())) + NodeKind::Ident(id) => { + Some(ClosureParam::Pos(Ident::new_unchecked(id, node.span()))) } NodeKind::Named => Some(ClosureParam::Named( node.cast().expect("named closure parameter is missing name"), @@ -921,7 +933,7 @@ pub enum Imports { } impl TypedNode for Imports { - fn cast_from(node: RedRef) -> Option { + fn from_red(node: RedRef) -> Option { match node.kind() { NodeKind::Star => Some(Imports::Wildcard), NodeKind::ImportItems => { @@ -1043,14 +1055,75 @@ node! { } impl ForPattern { + /// The key part of the pattern: index for arrays, name for dictionaries. pub fn key(&self) -> Option { - let mut items: Vec<_> = self.0.children().filter_map(RedRef::cast).collect(); - if items.len() > 1 { Some(items.remove(0)) } else { None } + let mut children = self.0.children().filter_map(RedRef::cast); + let key = children.next(); + if children.next().is_some() { key } else { None } } + /// The value part of the pattern. pub fn value(&self) -> Ident { self.0 .cast_last_child() .expect("for-in loop pattern is missing value") } } + +/// An unicode identifier with a few extra permissible characters. +/// +/// In addition to what is specified in the [Unicode Standard][uax31], we allow: +/// - `_` as a starting character, +/// - `_` and `-` as continuing characters. +/// +/// [uax31]: http://www.unicode.org/reports/tr31/ +#[derive(Debug, Clone, PartialEq)] +pub struct Ident { + /// The source code location. + pub span: Span, + /// The identifier string. + pub string: EcoString, +} + +impl Ident { + /// Create a new identifier from a string checking that it is a valid. + pub fn new( + string: impl AsRef + Into, + span: impl Into, + ) -> Option { + is_ident(string.as_ref()) + .then(|| Self { span: span.into(), string: string.into() }) + } + + /// Create a new identifier from a string and a span. + /// + /// The `string` must be a valid identifier. + #[track_caller] + pub fn new_unchecked(string: impl Into, span: Span) -> Self { + let string = string.into(); + debug_assert!(is_ident(&string), "`{}` is not a valid identifier", string); + Self { span, string } + } + + /// Return a reference to the underlying string. + pub fn as_str(&self) -> &str { + &self.string + } +} + +impl Deref for Ident { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl TypedNode for Ident { + fn from_red(node: RedRef) -> Option { + match node.kind() { + NodeKind::Ident(string) => Some(Ident::new_unchecked(string, node.span())), + _ => None, + } + } +} diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs deleted file mode 100644 index f5cc63300..000000000 --- a/src/syntax/ident.rs +++ /dev/null @@ -1,94 +0,0 @@ -use std::borrow::Borrow; -use std::ops::Deref; - -use unicode_xid::UnicodeXID; - -use super::{NodeKind, RedRef, Span, TypedNode}; -use crate::util::EcoString; - -/// An unicode identifier with a few extra permissible characters. -/// -/// In addition to what is specified in the [Unicode Standard][uax31], we allow: -/// - `_` as a starting character, -/// - `_` and `-` as continuing characters. -/// -/// [uax31]: http://www.unicode.org/reports/tr31/ -#[derive(Debug, Clone, PartialEq)] -pub struct Ident { - /// The source code location. - pub span: Span, - /// The identifier string. - pub string: EcoString, -} - -impl Ident { - /// Create a new identifier from a string checking that it is a valid. - pub fn new( - string: impl AsRef + Into, - span: impl Into, - ) -> Option { - if is_ident(string.as_ref()) { - Some(Self { span: span.into(), string: string.into() }) - } else { - None - } - } - - /// Return a reference to the underlying string. - pub fn as_str(&self) -> &str { - self - } -} - -impl Deref for Ident { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.string.as_str() - } -} - -impl AsRef for Ident { - fn as_ref(&self) -> &str { - self - } -} - -impl Borrow for Ident { - fn borrow(&self) -> &str { - self - } -} - -impl From<&Ident> for EcoString { - fn from(ident: &Ident) -> Self { - ident.string.clone() - } -} - -impl TypedNode for Ident { - fn cast_from(node: RedRef) -> Option { - match node.kind() { - NodeKind::Ident(i) => Some(Ident::new(i, node.span()).unwrap()), - _ => None, - } - } -} - -/// Whether a string is a valid identifier. -pub fn is_ident(string: &str) -> bool { - let mut chars = string.chars(); - chars - .next() - .map_or(false, |c| is_id_start(c) && chars.all(is_id_continue)) -} - -/// Whether a character can start an identifier. -pub fn is_id_start(c: char) -> bool { - c.is_xid_start() || c == '_' -} - -/// Whether a character can continue an identifier. -pub fn is_id_continue(c: char) -> bool { - c.is_xid_continue() || c == '_' || c == '-' -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 61e0bb7e3..d26c64849 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,31 +1,28 @@ //! Syntax types. -mod ast; -mod ident; +pub mod ast; mod pretty; mod span; -use std::fmt; -use std::fmt::{Debug, Display, Formatter}; +use std::fmt::{self, Debug, Display, Formatter}; use std::mem; use std::rc::Rc; -pub use ast::*; -pub use ident::*; pub use pretty::*; pub use span::*; +use self::ast::TypedNode; use crate::diag::Error; use crate::geom::{AngularUnit, LengthUnit}; use crate::source::SourceId; use crate::util::EcoString; -/// Children of a [`GreenNode`]. +/// An inner of leaf node in the untyped green tree. #[derive(Clone, PartialEq)] pub enum Green { - /// A non-terminal node in an Rc. + /// A reference-counted inner node. Node(Rc), - /// A terminal owned token. + /// A terminal, owned token. Token(GreenData), } @@ -77,13 +74,12 @@ impl Debug for Green { f.debug_list().entries(&n.children).finish()?; } } - Ok(()) } } -/// A syntactical node. -#[derive(Clone, PartialEq)] +/// An inner node in the untyped green tree. +#[derive(Debug, Clone, PartialEq)] pub struct GreenNode { /// Node metadata. data: GreenData, @@ -122,15 +118,15 @@ impl From> for Green { } } -/// Data shared between [`GreenNode`]s and leaf nodes. -#[derive(Clone, PartialEq)] +/// Data shared between inner and leaf nodes. +#[derive(Debug, Clone, PartialEq)] pub struct GreenData { /// What kind of node this is (each kind would have its own struct in a /// strongly typed AST). kind: NodeKind, /// The byte length of the node in the source. len: usize, - /// Whether this node or any of its children are erroneous. + /// Whether this node or any of its children contain an error. erroneous: bool, } @@ -162,8 +158,9 @@ impl From for Green { } } -/// A borrowed wrapper for the [`GreenNode`] type that allows to access spans, -/// error lists and cast to an AST. +/// A borrowed wrapper for a [`GreenNode`] with span information. +/// +/// Borrowed variant of [`RedNode`]. Can be [cast](Self::cast) to an AST node. #[derive(Copy, Clone, PartialEq)] pub struct RedRef<'a> { id: SourceId, @@ -182,50 +179,27 @@ impl<'a> RedRef<'a> { } /// The type of the node. - pub fn kind(&self) -> &NodeKind { + pub fn kind(self) -> &'a NodeKind { self.green.kind() } - /// The span of the node. - pub fn span(&self) -> Span { - Span::new(self.id, self.offset, self.offset + self.green.len()) - } - /// The length of the node. - pub fn len(&self) -> usize { + pub fn len(self) -> usize { self.green.len() } - /// Convert the node to a typed AST node. - pub fn cast(self) -> Option - where - T: TypedNode, - { - T::cast_from(self) + /// The span of the node. + pub fn span(self) -> Span { + Span::new(self.id, self.offset, self.offset + self.green.len()) } /// Whether the node or its children contain an error. - pub fn erroneous(&self) -> bool { + pub fn erroneous(self) -> bool { self.green.erroneous() } - /// The node's children. - pub fn children(self) -> impl Iterator> + Clone { - let children = match &self.green { - Green::Node(node) => node.children(), - Green::Token(_) => &[], - }; - - let mut offset = self.offset; - children.iter().map(move |green| { - let child_offset = offset; - offset += green.len(); - RedRef { id: self.id, offset: child_offset, green } - }) - } - /// The error messages for this node and its descendants. - pub fn errors(&self) -> Vec { + pub fn errors(self) -> Vec { if !self.green.erroneous() { return vec![]; } @@ -248,19 +222,42 @@ impl<'a> RedRef<'a> { } } + /// Convert the node to a typed AST node. + pub fn cast(self) -> Option + where + T: TypedNode, + { + T::from_red(self) + } + + /// The node's children. + pub fn children(self) -> impl Iterator> { + let children = match &self.green { + Green::Node(node) => node.children(), + Green::Token(_) => &[], + }; + + let mut offset = self.offset; + children.iter().map(move |green| { + let child_offset = offset; + offset += green.len(); + RedRef { id: self.id, offset: child_offset, green } + }) + } + /// Get the first child of some type. - pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { + pub(crate) fn typed_child(self, kind: &NodeKind) -> Option> { self.children() .find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind)) } /// Get the first child that can cast to some AST type. - pub(crate) fn cast_first_child(&self) -> Option { + pub(crate) fn cast_first_child(self) -> Option { self.children().find_map(RedRef::cast) } /// Get the last child that can cast to some AST type. - pub(crate) fn cast_last_child(&self) -> Option { + pub(crate) fn cast_last_child(self) -> Option { self.children().filter_map(RedRef::cast).last() } } @@ -277,8 +274,9 @@ impl Debug for RedRef<'_> { } } -/// An owned wrapper for the [`GreenNode`] type that allows to access spans, -/// error lists and cast to an AST. +/// A owned wrapper for a [`GreenNode`] with span information. +/// +/// Owned variant of [`RedRef`]. Can be [cast](Self::cast) to an AST nodes. #[derive(Clone, PartialEq)] pub struct RedNode { id: SourceId, @@ -293,7 +291,7 @@ impl RedNode { } /// Convert to a borrowed representation. - pub fn as_ref<'a>(&'a self) -> RedRef<'a> { + pub fn as_ref(&self) -> RedRef<'_> { RedRef { id: self.id, offset: self.offset, @@ -301,9 +299,9 @@ impl RedNode { } } - /// The span of the node. - pub fn span(&self) -> Span { - self.as_ref().span() + /// The type of the node. + pub fn kind(&self) -> &NodeKind { + self.as_ref().kind() } /// The length of the node. @@ -311,29 +309,29 @@ impl RedNode { self.as_ref().len() } + /// The span of the node. + pub fn span(&self) -> Span { + self.as_ref().span() + } + + /// The error messages for this node and its descendants. + pub fn errors(&self) -> Vec { + self.as_ref().errors() + } + /// Convert the node to a typed AST node. pub fn cast(self) -> Option where T: TypedNode, { - T::cast_from(self.as_ref()) - } - - /// The type of the node. - pub fn kind(&self) -> &NodeKind { - self.green.kind() + self.as_ref().cast() } /// The children of the node. - pub fn children<'a>(&'a self) -> impl Iterator> + Clone { + pub fn children(&self) -> impl Iterator> { self.as_ref().children() } - /// The error messages for this node and its descendants. - pub fn errors<'a>(&'a self) -> Vec { - self.as_ref().errors() - } - /// Get the first child of some type. pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option { self.as_ref().typed_child(kind).map(RedRef::own) @@ -356,11 +354,10 @@ impl Debug for RedNode { } } -pub trait TypedNode: Sized { - /// Performs the conversion. - fn cast_from(value: RedRef) -> Option; -} - +/// All syntactical building blocks that can be part of a Typst document. +/// +/// Can be emitted as a token by the tokenizer or as part of a green node by +/// the parser. #[derive(Debug, Clone, PartialEq)] pub enum NodeKind { /// A left square bracket: `[`. @@ -469,7 +466,7 @@ pub enum NodeKind { EmDash, /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeData), + UnicodeEscape(char), /// Strong text was enabled / disabled: `*`. Strong, /// Emphasized text was enabled / disabled: `_`. @@ -508,12 +505,12 @@ pub enum NodeKind { /// A percentage: `50%`. /// /// _Note_: `50%` is stored as `50.0` here, as in the corresponding - /// [literal](Lit::Percent). + /// [literal](ast::Lit::Percent). Percentage(f64), /// A fraction unit: `3fr`. Fraction(f64), /// A quoted string: `"..."`. - Str(StrData), + Str(EcoString), /// An array expression: `(1, "hi", 12cm)`. Array, /// A dictionary expression: `(thickness: 3pt, pattern: dashed)`. @@ -572,24 +569,7 @@ pub enum NodeKind { Unknown(EcoString), } -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ErrorPosition { - /// At the start of the node. - Start, - /// Over the full width of the node. - Full, - /// At the end of the node. - End, -} - -/// A quoted string token: `"..."`. -#[derive(Debug, Clone, PartialEq)] -pub struct StrData { - /// The string inside the quotes. - pub string: EcoString, -} - -/// A raw block token: `` `...` ``. +/// Payload of a raw block: `` `...` ``. #[derive(Debug, Clone, PartialEq)] pub struct RawData { /// The raw text in the block. @@ -602,7 +582,7 @@ pub struct RawData { pub block: bool, } -/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`. +/// Payload of a math formula: `$2pi + x$` or `$[f'(x) = x^2]$`. #[derive(Debug, Clone, PartialEq)] pub struct MathData { /// The formula between the dollars. @@ -612,17 +592,15 @@ pub struct MathData { pub display: bool, } -/// A unicode escape sequence token: `\u{1F5FA}`. -#[derive(Debug, Clone, PartialEq)] -pub struct UnicodeEscapeData { - /// The resulting unicode character. - pub character: char, -} - -impl Display for NodeKind { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.pad(self.as_str()) - } +/// Where in a node an error should be annotated. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ErrorPosition { + /// At the start of the node. + Start, + /// Over the full width of the node. + Full, + /// At the end of the node. + End, } impl NodeKind { @@ -658,6 +636,7 @@ impl NodeKind { matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_)) } + /// A human-readable name for the kind. pub fn as_str(&self) -> &'static str { match self { Self::LeftBracket => "opening bracket", @@ -764,3 +743,9 @@ impl NodeKind { } } } + +impl Display for NodeKind { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.pad(self.as_str()) + } +} diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index da0bdd443..b396a39c7 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -2,7 +2,7 @@ use std::fmt::{self, Arguments, Write}; -use super::*; +use super::ast::*; /// Pretty print an item and return the resulting string. pub fn pretty(item: &T) -> String diff --git a/src/syntax/span.rs b/src/syntax/span.rs index ee7cba4c2..c26011bdb 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::fmt::{self, Debug, Formatter}; -use std::ops::{Add, Range}; +use std::ops::Range; use serde::{Deserialize, Serialize}; @@ -53,23 +53,19 @@ pub struct Span { /// The id of the source file. pub source: SourceId, /// The inclusive start position. - pub start: Pos, + pub start: usize, /// The inclusive end position. - pub end: Pos, + pub end: usize, } impl Span { /// Create a new span from start and end positions. - pub fn new(source: SourceId, start: impl Into, end: impl Into) -> Self { - Self { - source, - start: start.into(), - end: end.into(), - } + pub fn new(source: SourceId, start: usize, end: usize) -> Self { + Self { source, start, end } } /// Create a span including just a single position. - pub fn at(source: SourceId, pos: impl Into + Copy) -> Self { + pub fn at(source: SourceId, pos: usize) -> Self { Self::new(source, pos, pos) } @@ -77,19 +73,29 @@ impl Span { pub fn detached() -> Self { Self { source: SourceId::from_raw(0), - start: Pos::ZERO, - end: Pos::ZERO, + start: 0, + end: 0, } } /// Create a span with a different start position. - pub fn with_start(self, start: impl Into) -> Self { - Self { start: start.into(), ..self } + pub fn with_start(self, start: usize) -> Self { + Self { start, ..self } } /// Create a span with a different end position. - pub fn with_end(self, end: impl Into) -> Self { - Self { end: end.into(), ..self } + pub fn with_end(self, end: usize) -> Self { + Self { end, ..self } + } + + /// A new span at the position of this span's start. + pub fn at_start(&self) -> Span { + Self::at(self.source, self.start) + } + + /// A new span at the position of this span's end. + pub fn at_end(&self) -> Span { + Self::at(self.source, self.end) } /// Create a new span with the earlier start and later end position. @@ -110,28 +116,18 @@ impl Span { } /// Test whether a position is within the span. - pub fn contains_pos(&self, pos: Pos) -> bool { + pub fn contains(&self, pos: usize) -> bool { self.start <= pos && self.end >= pos } /// Test whether one span complete contains the other span. - pub fn contains(self, other: Self) -> bool { + pub fn surrounds(self, other: Self) -> bool { self.source == other.source && self.start <= other.start && self.end >= other.end } - /// Convert to a `Range` for indexing. + /// Convert to a `Range` for indexing. pub fn to_range(self) -> Range { - self.start.to_usize() .. self.end.to_usize() - } - - /// A new span at the position of this span's start. - pub fn at_start(&self) -> Span { - Self::at(self.source, self.start) - } - - /// A new span at the position of this span's end. - pub fn at_end(&self) -> Span { - Self::at(self.source, self.end) + self.start .. self.end } } @@ -150,77 +146,3 @@ impl PartialOrd for Span { } } } - -/// A byte position in source code. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)] -pub struct Pos(pub u32); - -impl Pos { - /// The zero position. - pub const ZERO: Self = Self(0); - - /// Convert to a usize for indexing. - pub fn to_usize(self) -> usize { - self.0 as usize - } -} - -impl Debug for Pos { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Debug::fmt(&self.0, f) - } -} - -impl From for Pos { - fn from(index: u32) -> Self { - Self(index) - } -} - -impl From for Pos { - fn from(index: usize) -> Self { - Self(index as u32) - } -} - -impl Add for Pos -where - T: Into, -{ - type Output = Self; - - fn add(self, rhs: T) -> Self { - Pos(self.0 + rhs.into().0) - } -} - -/// Convert a position or range into a span. -pub trait IntoSpan { - /// Convert into a span by providing the source id. - fn into_span(self, source: SourceId) -> Span; -} - -impl IntoSpan for Span { - fn into_span(self, source: SourceId) -> Span { - debug_assert_eq!(self.source, source); - self - } -} - -impl IntoSpan for Pos { - fn into_span(self, source: SourceId) -> Span { - Span::new(source, self, self) - } -} - -impl IntoSpan for usize { - fn into_span(self, source: SourceId) -> Span { - Span::new(source, self, self) - } -} - -impl IntoSpan for Range { - fn into_span(self, source: SourceId) -> Span { - Span::new(source, self.start, self.end) - } -} diff --git a/tests/typeset.rs b/tests/typeset.rs index bde383c40..68e56343c 100644 --- a/tests/typeset.rs +++ b/tests/typeset.rs @@ -24,7 +24,7 @@ use typst::loading::FsLoader; use typst::parse::Scanner; use typst::source::SourceFile; use typst::style::Style; -use typst::syntax::{Pos, Span}; +use typst::syntax::Span; use typst::Context; const TYP_DIR: &str = "./typ"; @@ -355,12 +355,12 @@ fn parse_metadata(source: &SourceFile) -> (Option, Vec) { let comments = lines[i ..].iter().take_while(|line| line.starts_with("//")).count(); - let pos = |s: &mut Scanner| -> Pos { + let pos = |s: &mut Scanner| -> usize { let first = num(s) - 1; let (delta, column) = if s.eat_if(':') { (first, num(s) - 1) } else { (0, first) }; let line = (i + comments) + delta; - source.line_column_to_byte(line, column).unwrap().into() + source.line_column_to_byte(line, column).unwrap() }; let mut s = Scanner::new(rest); @@ -375,10 +375,10 @@ fn parse_metadata(source: &SourceFile) -> (Option, Vec) { } fn print_error(source: &SourceFile, line: usize, error: &Error) { - let start_line = 1 + line + source.byte_to_line(error.span.start.to_usize()).unwrap(); - let start_col = 1 + source.byte_to_column(error.span.start.to_usize()).unwrap(); - let end_line = 1 + line + source.byte_to_line(error.span.end.to_usize()).unwrap(); - let end_col = 1 + source.byte_to_column(error.span.end.to_usize()).unwrap(); + let start_line = 1 + line + source.byte_to_line(error.span.start).unwrap(); + let start_col = 1 + source.byte_to_column(error.span.start).unwrap(); + let end_line = 1 + line + source.byte_to_line(error.span.end).unwrap(); + let end_col = 1 + source.byte_to_column(error.span.end).unwrap(); println!( "Error: {}:{}-{}:{}: {}", start_line, start_col, end_line, end_col, error.message