//! Syntax types. pub mod ast; mod highlight; mod span; use std::fmt::{self, Debug, Display, Formatter}; use std::hash::{Hash, Hasher}; use std::ops::Range; use std::sync::Arc; pub use highlight::*; pub use span::*; use self::ast::{MathNode, RawNode, TypedNode, Unit}; use crate::diag::Error; use crate::source::SourceId; use crate::util::EcoString; /// An inner or leaf node in the untyped syntax tree. #[derive(Clone, PartialEq, Hash)] pub enum SyntaxNode { /// A reference-counted inner node. Inner(Arc), /// A leaf token. Leaf(NodeData), } impl SyntaxNode { /// The metadata of the node. pub fn data(&self) -> &NodeData { match self { Self::Inner(inner) => &inner.data, Self::Leaf(leaf) => leaf, } } /// The type of the node. pub fn kind(&self) -> &NodeKind { self.data().kind() } /// The length of the node. pub fn len(&self) -> usize { self.data().len() } /// The number of descendants, including the node itself. pub fn descendants(&self) -> usize { match self { Self::Inner(inner) => inner.descendants(), Self::Leaf(_) => 1, } } /// The span of the node. pub fn span(&self) -> Span { self.data().span() } /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { Self::Inner(node) => node.erroneous, Self::Leaf(data) => data.kind.is_error(), } } /// The error messages for this node and its descendants. pub fn errors(&self) -> Vec { if !self.erroneous() { return vec![]; } match self.kind() { &NodeKind::Error(pos, ref message) => { vec![Error::new(self.span().with_pos(pos), message)] } _ => self .children() .filter(|node| node.erroneous()) .flat_map(|node| node.errors()) .collect(), } } /// The node's children. pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { match self { Self::Inner(inner) => inner.children(), Self::Leaf(_) => [].iter(), } } /// Convert the node to a typed AST node. pub fn cast(&self) -> Option where T: TypedNode, { T::from_untyped(self) } /// Get the first child that can cast to the AST type `T`. pub fn cast_first_child(&self) -> Option { self.children().find_map(Self::cast) } /// Get the last child that can cast to the AST type `T`. pub fn cast_last_child(&self) -> Option { self.children().rev().find_map(Self::cast) } /// Change the type of the node. pub fn convert(&mut self, kind: NodeKind) { match self { Self::Inner(inner) => { let node = Arc::make_mut(inner); node.erroneous |= kind.is_error(); node.data.kind = kind; } Self::Leaf(leaf) => leaf.kind = kind, } } /// Set a synthetic span for the node and all its descendants. pub fn synthesize(&mut self, span: Span) { match self { Self::Inner(inner) => Arc::make_mut(inner).synthesize(span), Self::Leaf(leaf) => leaf.synthesize(span), } } /// Assign spans to each node. pub fn numberize(&mut self, id: SourceId, within: Range) -> NumberingResult { match self { Self::Inner(inner) => Arc::make_mut(inner).numberize(id, None, within), Self::Leaf(leaf) => leaf.numberize(id, within), } } /// The upper bound of assigned numbers in this subtree. pub fn upper(&self) -> u64 { match self { Self::Inner(inner) => inner.upper(), Self::Leaf(leaf) => leaf.span().number() + 1, } } /// If the span points into this node, convert it to a byte range. pub fn range(&self, span: Span, offset: usize) -> Option> { match self { Self::Inner(inner) => inner.range(span, offset), Self::Leaf(leaf) => leaf.range(span, offset), } } /// Returns all leaf descendants of this node (may include itself). /// /// This method is slow and only intended for testing. pub fn leafs(&self) -> Vec { if match self { Self::Inner(inner) => inner.children.is_empty(), Self::Leaf(_) => true, } { vec![self.clone()] } else { self.children().flat_map(Self::leafs).collect() } } } impl Default for SyntaxNode { fn default() -> Self { Self::Leaf(NodeData::new(NodeKind::None, 0)) } } impl Debug for SyntaxNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Inner(node) => node.fmt(f), Self::Leaf(token) => token.fmt(f), } } } /// An inner node in the untyped syntax tree. #[derive(Clone, Hash)] pub struct InnerNode { /// Node metadata. data: NodeData, /// The number of nodes in the whole subtree, including this node. descendants: usize, /// Whether this node or any of its children are erroneous. erroneous: bool, /// The upper bound of this node's numbering range. upper: u64, /// This node's children, losslessly make up this node. children: Vec, } impl InnerNode { /// Creates a new node with the given kind and a single child. pub fn with_child(kind: NodeKind, child: impl Into) -> Self { Self::with_children(kind, vec![child.into()]) } /// Creates a new node with the given kind and children. pub fn with_children(kind: NodeKind, children: Vec) -> Self { let mut len = 0; let mut descendants = 1; let mut erroneous = kind.is_error(); for child in &children { len += child.len(); descendants += child.descendants(); erroneous |= child.erroneous(); } Self { data: NodeData::new(kind, len), descendants, erroneous, upper: 0, children, } } /// The node's metadata. pub fn data(&self) -> &NodeData { &self.data } /// The node's type. pub fn kind(&self) -> &NodeKind { self.data().kind() } /// The node's length. pub fn len(&self) -> usize { self.data().len() } /// The node's span. pub fn span(&self) -> Span { self.data().span() } /// The number of descendants, including the node itself. pub fn descendants(&self) -> usize { self.descendants } /// The node's children. pub fn children(&self) -> std::slice::Iter<'_, SyntaxNode> { self.children.iter() } /// Set a synthetic span for the node and all its descendants. pub fn synthesize(&mut self, span: Span) { self.data.synthesize(span); for child in &mut self.children { child.synthesize(span); } } /// Assign span numbers `within` an interval to this node's subtree or just /// a `range` of its children. pub fn numberize( &mut self, id: SourceId, range: Option>, within: Range, ) -> NumberingResult { // Determine how many nodes we will number. let descendants = match &range { Some(range) if range.is_empty() => return Ok(()), Some(range) => self.children[range.clone()] .iter() .map(SyntaxNode::descendants) .sum::(), None => self.descendants, }; // Determine the distance between two neighbouring assigned numbers. If // possible, we try to fit all numbers into the left half of `within` // so that there is space for future insertions. let space = within.end - within.start; let mut stride = space / (2 * descendants as u64); if stride == 0 { stride = space / self.descendants as u64; if stride == 0 { return Err(Unnumberable); } } // Number this node itself. let mut start = within.start; if range.is_none() { let end = start + stride; self.data.numberize(id, start .. end)?; self.upper = within.end; start = end; } // Number the children. let len = self.children.len(); for child in &mut self.children[range.unwrap_or(0 .. len)] { let end = start + child.descendants() as u64 * stride; child.numberize(id, start .. end)?; start = end; } Ok(()) } /// The upper bound of assigned numbers in this subtree. pub fn upper(&self) -> u64 { self.upper } /// If the span points into this node, convert it to a byte range. pub fn range(&self, span: Span, mut offset: usize) -> Option> { // Check whether we found it. if let Some(range) = self.data.range(span, offset) { return Some(range); } // The parent of a subtree has a smaller span number than all of its // descendants. Therefore, we can bail out early if the target span's // number is smaller than our number. if span.number() < self.span().number() { return None; } let mut children = self.children.iter().peekable(); while let Some(child) = children.next() { // Every node in this child's subtree has a smaller span number than // the next sibling. Therefore we only need to recurse if the next // sibling's span number is larger than the target span's number. if children .peek() .map_or(true, |next| next.span().number() > span.number()) { if let Some(range) = child.range(span, offset) { return Some(range); } } offset += child.len(); } None } /// The node's children, mutably. pub(crate) fn children_mut(&mut self) -> &mut [SyntaxNode] { &mut self.children } /// Replaces a range of children with a replacement. /// /// May have mutated the children if it returns `Err(_)`. pub(crate) fn replace_children( &mut self, mut range: Range, replacement: Vec, ) -> NumberingResult { let superseded = &self.children[range.clone()]; // Compute the new byte length. self.data.len = self.data.len + replacement.iter().map(SyntaxNode::len).sum::() - superseded.iter().map(SyntaxNode::len).sum::(); // Compute the new number of descendants. self.descendants = self.descendants + replacement.iter().map(SyntaxNode::descendants).sum::() - superseded.iter().map(SyntaxNode::descendants).sum::(); // Determine whether we're still erroneous after the replacement. That's // the case if // - any of the new nodes is erroneous, // - or if we were erroneous before due to a non-superseded node. self.erroneous = replacement.iter().any(SyntaxNode::erroneous) || (self.erroneous && (self.children[.. range.start].iter().any(SyntaxNode::erroneous)) || self.children[range.end ..].iter().any(SyntaxNode::erroneous)); // Perform the replacement. let replacement_count = replacement.len(); self.children.splice(range.clone(), replacement); range.end = range.start + replacement_count; // Renumber the new children. Retries until it works, taking // exponentially more children into account. let mut left = 0; let mut right = 0; let max_left = range.start; let max_right = self.children.len() - range.end; loop { let renumber = range.start - left .. range.end + right; // The minimum assignable number is either // - the upper bound of the node right before the to-be-renumbered // children, // - or this inner node's span number plus one if renumbering starts // at the first child. let start_number = renumber .start .checked_sub(1) .and_then(|i| self.children.get(i)) .map_or(self.span().number() + 1, |child| child.upper()); // The upper bound for renumbering is either // - the span number of the first child after the to-be-renumbered // children, // - or this node's upper bound if renumbering ends behind the last // child. let end_number = self .children .get(renumber.end) .map_or(self.upper(), |next| next.span().number()); // Try to renumber. let within = start_number .. end_number; let id = self.span().source(); if self.numberize(id, Some(renumber), within).is_ok() { return Ok(()); } // If it didn't even work with all children, we give up. if left == max_left && right == max_right { return Err(Unnumberable); } // Exponential expansion to both sides. left = (left + 1).next_power_of_two().min(max_left); right = (right + 1).next_power_of_two().min(max_right); } } /// Update this node after changes were made to one of its children. pub(crate) fn update_parent( &mut self, prev_len: usize, new_len: usize, prev_descendants: usize, new_descendants: usize, ) { self.data.len = self.data.len + new_len - prev_len; self.descendants = self.descendants + new_descendants - prev_descendants; self.erroneous = self.children.iter().any(SyntaxNode::erroneous); } } impl From for SyntaxNode { fn from(node: InnerNode) -> Self { Arc::new(node).into() } } impl From> for SyntaxNode { fn from(node: Arc) -> Self { Self::Inner(node) } } impl Debug for InnerNode { fn fmt(&self, f: &mut Formatter) -> fmt::Result { self.data.fmt(f)?; if !self.children.is_empty() { f.write_str(" ")?; f.debug_list().entries(&self.children).finish()?; } Ok(()) } } impl PartialEq for InnerNode { fn eq(&self, other: &Self) -> bool { self.data == other.data && self.descendants == other.descendants && self.erroneous == other.erroneous && self.children == other.children } } /// Data shared between inner and leaf nodes. #[derive(Clone, Hash)] pub struct NodeData { /// What kind of node this is (each kind would have its own struct in a /// strongly typed AST). kind: NodeKind, /// The byte length of the node in the source. len: usize, /// The node's span. span: Span, } impl NodeData { /// Create new node metadata. pub fn new(kind: NodeKind, len: usize) -> Self { Self { len, kind, span: Span::detached() } } /// The node's type. pub fn kind(&self) -> &NodeKind { &self.kind } /// The node's length. pub fn len(&self) -> usize { self.len } /// The node's span. pub fn span(&self) -> Span { self.span } /// Set a synthetic span for the node. pub fn synthesize(&mut self, span: Span) { self.span = span; } /// Assign a span to the node. pub fn numberize(&mut self, id: SourceId, within: Range) -> NumberingResult { if within.start < within.end { self.span = Span::new(id, (within.start + within.end) / 2); Ok(()) } else { Err(Unnumberable) } } /// If the span points into this node, convert it to a byte range. pub fn range(&self, span: Span, offset: usize) -> Option> { (span.with_pos(SpanPos::Full) == self.span).then(|| { let end = offset + self.len(); match span.pos() { SpanPos::Full => offset .. end, SpanPos::Start => offset .. offset, SpanPos::End => end .. end, } }) } } impl From for SyntaxNode { fn from(token: NodeData) -> Self { Self::Leaf(token) } } impl Debug for NodeData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{:?}: {}", self.kind, self.len) } } impl PartialEq for NodeData { fn eq(&self, other: &Self) -> bool { self.kind == other.kind && self.len == other.len } } /// All syntactical building blocks that can be part of a Typst document. /// /// Can be emitted as a token by the tokenizer or as part of a syntax node by /// the parser. #[derive(Debug, Clone, PartialEq)] pub enum NodeKind { /// A left curly brace, starting a code block: `{`. LeftBrace, /// A right curly brace, terminating a code block: `}`. RightBrace, /// A left square bracket, starting a content block: `[`. LeftBracket, /// A right square bracket, terminating a content block: `]`. RightBracket, /// A left round parenthesis, starting a grouped expression, collection, /// argument or parameter list: `(`. LeftParen, /// A right round parenthesis, terminating a grouped expression, collection, /// argument or parameter list: `)`. RightParen, /// The strong text toggle, multiplication operator, and wildcard import /// symbol: `*`. Star, /// Toggles emphasized text: `_`. Underscore, /// A comma separator in a sequence: `,`. Comma, /// A semicolon terminating an expression: `;`. Semicolon, /// A colon between name / key and value in a dictionary, argument or /// parameter list: `:`. Colon, /// The unary plus and addition operator: `+`. Plus, /// The unary negation and subtraction operator: `-`. Minus, /// The division operator: `/`. Slash, /// A field access and method call operator: `.`. Dot, /// The assignment operator: `=`. Eq, /// The equality operator: `==`. EqEq, /// The inequality operator: `!=`. ExclEq, /// The less-than operator: `<`. Lt, /// The less-than or equal operator: `<=`. LtEq, /// The greater-than operator: `>`. Gt, /// The greater-than or equal operator: `>=`. GtEq, /// The add-assign operator: `+=`. PlusEq, /// The subtract-assign operator: `-=`. HyphEq, /// The multiply-assign operator: `*=`. StarEq, /// The divide-assign operator: `/=`. SlashEq, /// The `not` operator. Not, /// The `and` operator. And, /// The `or` operator. Or, /// The spread operator: `..`. Dots, /// An arrow between a closure's parameters and body: `=>`. Arrow, /// The none literal: `none`. None, /// The auto literal: `auto`. Auto, /// The `let` keyword. Let, /// The `set` keyword. Set, /// The `show` keyword. Show, /// The `wrap` keyword. Wrap, /// The `if` keyword. If, /// The `else` keyword. Else, /// The `for` keyword. For, /// The `in` keyword. In, /// The `while` keyword. While, /// The `break` keyword. Break, /// The `continue` keyword. Continue, /// The `return` keyword. Return, /// The `import` keyword. Import, /// The `include` keyword. Include, /// The `from` keyword. From, /// The `as` keyword. As, /// Markup of which all lines must have a minimal indentation. /// /// Notably, the number does not determine in which column the markup /// started, but to the right of which column all markup elements must be, /// so it is zero except for headings and lists. Markup { min_indent: usize }, /// One or more whitespace characters. Single spaces are collapsed into text /// nodes if they would otherwise be surrounded by text nodes. /// /// Also stores how many newlines are contained. Space { newlines: usize }, /// Consecutive text without markup. While basic text with just single /// spaces is collapsed into a single node, certain symbols that could /// possibly be markup force text into multiple nodes. Text(EcoString), /// A forced line break: `\` or `\+` if justified. Linebreak { justified: bool }, /// A non-breaking space: `~`. NonBreakingSpace, /// A soft hyphen: `-?`. Shy, /// An en-dash: `--`. EnDash, /// An em-dash: `---`. EmDash, /// An ellipsis: `...`. Ellipsis, /// A smart quote: `'` or `"`. Quote { double: bool }, /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. Escape(char), /// Strong content: `*Strong*`. Strong, /// Emphasized content: `_Emphasized_`. Emph, /// A raw block with optional syntax highlighting: `` `...` ``. Raw(Arc), /// A math formula: `$x$`, `$[x^2]$`. Math(Arc), /// A section heading: `= Introduction`. Heading, /// An item in an unordered list: `- ...`. List, /// An item in an enumeration (ordered list): `1. ...`. Enum, /// A numbering: `23.`. /// /// Can also exist without the number: `.`. EnumNumbering(Option), /// A label: `