Reorganize syntax module

This commit is contained in:
Martin Haug 2021-11-01 10:57:45 +01:00
parent 2e7d359e59
commit 7d34a548cc
5 changed files with 626 additions and 634 deletions

View File

@ -520,7 +520,7 @@ fn dict(p: &mut Parser, items: usize) {
p.child_count() - items, p.child_count() - items,
|x| { |x| {
x.kind() == &NodeKind::Named x.kind() == &NodeKind::Named
|| x.kind().is_parenthesis() || x.kind().is_paren()
|| x.kind() == &NodeKind::Comma || x.kind() == &NodeKind::Comma
|| x.kind() == &NodeKind::Colon || x.kind() == &NodeKind::Colon
}, },
@ -550,7 +550,7 @@ fn params(p: &mut Parser, count: usize, allow_parens: bool) {
), ),
_ => false, _ => false,
} }
|| (allow_parens && x.kind().is_parenthesis()), || (allow_parens && x.kind().is_paren()),
|_| (ErrorPosition::Full, "expected identifier".into()), |_| (ErrorPosition::Full, "expected identifier".into()),
); );
} }

View File

@ -1,8 +1,164 @@
use super::{Ident, Markup, NodeKind, RedNode, RedRef, Span, TypedNode}; use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode};
use crate::geom::{AngularUnit, LengthUnit}; use crate::geom::{AngularUnit, LengthUnit};
use crate::node; use crate::node;
use crate::util::EcoString; use crate::util::EcoString;
node! {
/// The syntactical root capable of representing a full parsed document.
Markup
}
impl Markup {
pub fn nodes<'a>(&'a self) -> impl Iterator<Item = MarkupNode> + 'a {
self.0.children().filter_map(RedRef::cast)
}
}
/// A single piece of markup.
#[derive(Debug, Clone, PartialEq)]
pub enum MarkupNode {
/// Whitespace containing less than two newlines.
Space,
/// A forced line break: `\`.
Linebreak,
/// A paragraph break: Two or more newlines.
Parbreak,
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
Emph,
/// Plain text.
Text(EcoString),
/// A raw block with optional syntax highlighting: `` `...` ``.
Raw(RawNode),
/// A section heading: `= Introduction`.
Heading(HeadingNode),
/// An item in an unordered list: `- ...`.
List(ListNode),
/// An item in an enumeration (ordered list): `1. ...`.
Enum(EnumNode),
/// An expression.
Expr(Expr),
}
impl TypedNode for MarkupNode {
fn cast_from(node: RedRef) -> Option<Self> {
match node.kind() {
NodeKind::Space(_) => Some(MarkupNode::Space),
NodeKind::Linebreak => Some(MarkupNode::Linebreak),
NodeKind::Parbreak => Some(MarkupNode::Parbreak),
NodeKind::Strong => Some(MarkupNode::Strong),
NodeKind::Emph => Some(MarkupNode::Emph),
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())),
NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))),
NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))),
NodeKind::NonBreakingSpace => {
Some(MarkupNode::Text(EcoString::from("\u{00A0}")))
}
NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw),
NodeKind::Heading => node.cast().map(MarkupNode::Heading),
NodeKind::List => node.cast().map(MarkupNode::List),
NodeKind::Enum => node.cast().map(MarkupNode::Enum),
NodeKind::Error(_, _) => None,
_ => node.cast().map(MarkupNode::Expr),
}
}
}
/// A raw block with optional syntax highlighting: `` `...` ``.
#[derive(Debug, Clone, PartialEq)]
pub struct RawNode {
/// An optional identifier specifying the language to syntax-highlight in.
pub lang: Option<Ident>,
/// The raw text, determined as the raw string between the backticks trimmed
/// according to the above rules.
pub text: EcoString,
/// Whether the element is block-level, that is, it has 3+ backticks
/// and contains at least one newline.
pub block: bool,
}
impl TypedNode for RawNode {
fn cast_from(node: RedRef) -> Option<Self> {
match node.kind() {
NodeKind::Raw(raw) => {
let span = node.span();
let start = span.start + raw.backticks as usize;
Some(Self {
block: raw.block,
lang: raw.lang.as_ref().and_then(|x| {
let span = Span::new(span.source, start, start + x.len());
Ident::new(x, span)
}),
text: raw.text.clone(),
})
}
_ => None,
}
}
}
node! {
/// A section heading: `= Introduction`.
Heading => HeadingNode
}
impl HeadingNode {
/// The contents of the heading.
pub fn body(&self) -> Markup {
self.0
.cast_first_child()
.expect("heading node is missing markup body")
}
/// The section depth (numer of equals signs).
pub fn level(&self) -> u8 {
self.0
.children()
.find_map(|node| match node.kind() {
NodeKind::HeadingLevel(heading) => Some(*heading),
_ => None,
})
.expect("heading node is missing heading level")
}
}
node! {
/// An item in an unordered list: `- ...`.
List => ListNode
}
impl ListNode {
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("list node is missing body")
}
}
node! {
/// An item in an enumeration (ordered list): `1. ...`.
Enum => EnumNode
}
impl EnumNode {
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("enumeration node is missing body")
}
/// The number, if any.
pub fn number(&self) -> Option<usize> {
self.0
.children()
.find_map(|node| match node.kind() {
NodeKind::EnumNumbering(num) => Some(num.clone()),
_ => None,
})
.expect("enumeration node is missing number")
}
}
/// An expression. /// An expression.
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Expr { pub enum Expr {

View File

@ -1,159 +0,0 @@
use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode};
use crate::node;
use crate::util::EcoString;
node! {
/// The syntactical root capable of representing a full parsed document.
Markup
}
impl Markup {
pub fn nodes<'a>(&'a self) -> impl Iterator<Item = MarkupNode> + 'a {
self.0.children().filter_map(RedRef::cast)
}
}
/// A single piece of markup.
#[derive(Debug, Clone, PartialEq)]
pub enum MarkupNode {
/// Whitespace containing less than two newlines.
Space,
/// A forced line break: `\`.
Linebreak,
/// A paragraph break: Two or more newlines.
Parbreak,
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
Emph,
/// Plain text.
Text(EcoString),
/// A raw block with optional syntax highlighting: `` `...` ``.
Raw(RawNode),
/// A section heading: `= Introduction`.
Heading(HeadingNode),
/// An item in an unordered list: `- ...`.
List(ListNode),
/// An item in an enumeration (ordered list): `1. ...`.
Enum(EnumNode),
/// An expression.
Expr(Expr),
}
impl TypedNode for MarkupNode {
fn cast_from(node: RedRef) -> Option<Self> {
match node.kind() {
NodeKind::Space(_) => Some(MarkupNode::Space),
NodeKind::Linebreak => Some(MarkupNode::Linebreak),
NodeKind::Parbreak => Some(MarkupNode::Parbreak),
NodeKind::Strong => Some(MarkupNode::Strong),
NodeKind::Emph => Some(MarkupNode::Emph),
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())),
NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))),
NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))),
NodeKind::NonBreakingSpace => {
Some(MarkupNode::Text(EcoString::from("\u{00A0}")))
}
NodeKind::Raw(_) => node.cast().map(MarkupNode::Raw),
NodeKind::Heading => node.cast().map(MarkupNode::Heading),
NodeKind::List => node.cast().map(MarkupNode::List),
NodeKind::Enum => node.cast().map(MarkupNode::Enum),
NodeKind::Error(_, _) => None,
_ => node.cast().map(MarkupNode::Expr),
}
}
}
/// A raw block with optional syntax highlighting: `` `...` ``.
#[derive(Debug, Clone, PartialEq)]
pub struct RawNode {
/// An optional identifier specifying the language to syntax-highlight in.
pub lang: Option<Ident>,
/// The raw text, determined as the raw string between the backticks trimmed
/// according to the above rules.
pub text: EcoString,
/// Whether the element is block-level, that is, it has 3+ backticks
/// and contains at least one newline.
pub block: bool,
}
impl TypedNode for RawNode {
fn cast_from(node: RedRef) -> Option<Self> {
match node.kind() {
NodeKind::Raw(raw) => {
let span = node.span();
let start = span.start + raw.backticks as usize;
Some(Self {
block: raw.block,
lang: raw.lang.as_ref().and_then(|x| {
let span = Span::new(span.source, start, start + x.len());
Ident::new(x, span)
}),
text: raw.text.clone(),
})
}
_ => None,
}
}
}
node! {
/// A section heading: `= Introduction`.
Heading => HeadingNode
}
impl HeadingNode {
/// The contents of the heading.
pub fn body(&self) -> Markup {
self.0
.cast_first_child()
.expect("heading node is missing markup body")
}
/// The section depth (numer of equals signs).
pub fn level(&self) -> u8 {
self.0
.children()
.find_map(|node| match node.kind() {
NodeKind::HeadingLevel(heading) => Some(*heading),
_ => None,
})
.expect("heading node is missing heading level")
}
}
node! {
/// An item in an unordered list: `- ...`.
List => ListNode
}
impl ListNode {
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("list node is missing body")
}
}
node! {
/// An item in an enumeration (ordered list): `1. ...`.
Enum => EnumNode
}
impl EnumNode {
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("enumeration node is missing body")
}
/// The number, if any.
pub fn number(&self) -> Option<usize> {
self.0
.children()
.find_map(|node| match node.kind() {
NodeKind::EnumNumbering(num) => Some(num.clone()),
_ => None,
})
.expect("enumeration node is missing number")
}
}

View File

@ -1,450 +1,38 @@
//! Syntax types. //! Syntax types.
mod expr; mod ast;
mod ident; mod ident;
mod markup;
mod pretty; mod pretty;
mod span; mod span;
mod token;
use std::fmt; use std::fmt;
use std::fmt::{Debug, Display, Formatter}; use std::fmt::{Debug, Display, Formatter};
use std::mem; use std::mem;
use std::rc::Rc; use std::rc::Rc;
pub use expr::*; pub use ast::*;
pub use ident::*; pub use ident::*;
pub use markup::*;
pub use pretty::*; pub use pretty::*;
pub use span::*; pub use span::*;
pub use token::*;
use crate::geom::{AngularUnit, LengthUnit}; use crate::geom::{AngularUnit, LengthUnit};
use crate::source::SourceId; use crate::source::SourceId;
use crate::util::EcoString; use crate::util::EcoString;
#[derive(Debug, Clone, PartialEq)]
pub enum NodeKind {
/// A left square bracket: `[`.
LeftBracket,
/// A right square bracket: `]`.
RightBracket,
/// A left curly brace: `{`.
LeftBrace,
/// A right curly brace: `}`.
RightBrace,
/// A left round parenthesis: `(`.
LeftParen,
/// A right round parenthesis: `)`.
RightParen,
/// An asterisk: `*`.
Star,
/// A comma: `,`.
Comma,
/// A semicolon: `;`.
Semicolon,
/// A colon: `:`.
Colon,
/// A plus: `+`.
Plus,
/// A hyphen: `-`.
Minus,
/// A slash: `/`.
Slash,
/// A single equals sign: `=`.
Eq,
/// Two equals signs: `==`.
EqEq,
/// An exclamation mark followed by an equals sign: `!=`.
ExclEq,
/// A less-than sign: `<`.
Lt,
/// A less-than sign followed by an equals sign: `<=`.
LtEq,
/// A greater-than sign: `>`.
Gt,
/// A greater-than sign followed by an equals sign: `>=`.
GtEq,
/// A plus followed by an equals sign: `+=`.
PlusEq,
/// A hyphen followed by an equals sign: `-=`.
HyphEq,
/// An asterisk followed by an equals sign: `*=`.
StarEq,
/// A slash followed by an equals sign: `/=`.
SlashEq,
/// Two dots: `..`.
Dots,
/// An equals sign followed by a greater-than sign: `=>`.
Arrow,
/// The `not` operator.
Not,
/// The `and` operator.
And,
/// The `or` operator.
Or,
/// The `with` operator.
With,
/// The `with` expression: `with (1)`.
WithExpr,
/// The none literal: `none`.
None,
/// The auto literal: `auto`.
Auto,
/// The `let` keyword.
Let,
/// The `if` keyword.
If,
/// The `else` keyword.
Else,
/// The `for` keyword.
For,
/// The `in` keyword.
In,
/// The `while` keyword.
While,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `return` keyword.
Return,
/// The `import` keyword.
Import,
/// The `include` keyword.
Include,
/// The `from` keyword.
From,
/// One or more whitespace characters.
Space(usize),
/// A consecutive non-markup string.
Text(EcoString),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken),
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(Rc<RawToken>),
/// Dollar signs surrounding inner contents.
Math(Rc<MathToken>),
/// A numbering: `23.`.
///
/// Can also exist without the number: `.`.
EnumNumbering(Option<usize>),
/// An identifier: `center`.
Ident(EcoString),
/// A boolean: `true`, `false`.
Bool(bool),
/// An integer: `120`.
Int(i64),
/// A floating-point number: `1.2`, `10e-4`.
Float(f64),
/// A length: `12pt`, `3cm`.
Length(f64, LengthUnit),
/// An angle: `90deg`.
Angle(f64, AngularUnit),
/// A percentage: `50%`.
///
/// _Note_: `50%` is stored as `50.0` here, as in the corresponding
/// [literal](super::Lit::Percent).
Percentage(f64),
/// A fraction unit: `3fr`.
Fraction(f64),
/// A quoted string: `"..."`.
Str(StrToken),
/// Two slashes followed by inner contents, terminated with a newline:
/// `//<str>\n`.
LineComment,
/// A slash and a star followed by inner contents, terminated with a star
/// and a slash: `/*<str>*/`.
///
/// The comment can contain nested block comments.
BlockComment,
/// Tokens that appear in the wrong place.
Error(ErrorPosition, EcoString),
/// Unknown character sequences.
Unknown(EcoString),
/// Template markup.
Markup,
/// A forced line break: `\`.
Linebreak,
/// A paragraph break: Two or more newlines.
Parbreak,
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
Emph,
/// A non-breaking space: `~`.
NonBreakingSpace,
/// An en-dash: `--`.
EnDash,
/// An em-dash: `---`.
EmDash,
/// A section heading: `= Introduction`.
Heading,
/// A heading's level: `=`, `==`, `===`, etc.
HeadingLevel(u8),
/// An item in an unordered list: `- ...`.
List,
/// The bullet character of an item in an unordered list: `-`.
ListBullet,
/// An item in an enumeration (ordered list): `1. ...`.
Enum,
/// An array expression: `(1, "hi", 12cm)`.
Array,
/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`.
Dict,
/// A named argument: `thickness: 3pt`.
Named,
/// A template expression: `[*Hi* there!]`.
Template,
/// A grouped expression: `(1 + 2)`.
Group,
/// A block expression: `{ let x = 1; x + 2 }`.
Block,
/// A unary operation: `-x`.
Unary,
/// A binary operation: `a + b`.
Binary,
/// An invocation of a function: `f(x, y)`.
Call,
/// A function call's argument list: `(x, y)`.
CallArgs,
/// A closure expression: `(x, y) => z`.
Closure,
/// A closure's parameters: `(x, y)`.
ClosureParams,
/// A parameter sink: `..x`.
ParameterSink,
/// A for loop expression: `for x in y { ... }`.
ForExpr,
/// A while loop expression: `while x { ... }`.
WhileExpr,
/// An if expression: `if x { ... }`.
IfExpr,
/// A let expression: `let x = 1`.
LetExpr,
/// A for loop's destructuring pattern: `x` or `x, y`.
ForPattern,
/// The import expression: `import x from "foo.typ"`.
ImportExpr,
/// Items to import: `a, b, c`.
ImportItems,
/// The include expression: `include "foo.typ"`.
IncludeExpr,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ErrorPosition {
/// At the start of the node.
Start,
/// Over the full width of the node.
Full,
/// At the end of the node.
End,
}
impl Display for NodeKind {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.pad(self.as_str())
}
}
impl NodeKind {
pub fn is_parenthesis(&self) -> bool {
match self {
Self::LeftParen => true,
Self::RightParen => true,
_ => false,
}
}
pub fn is_bracket(&self) -> bool {
match self {
Self::LeftBracket => true,
Self::RightBracket => true,
_ => false,
}
}
pub fn is_brace(&self) -> bool {
match self {
Self::LeftBrace => true,
Self::RightBrace => true,
_ => false,
}
}
pub fn is_error(&self) -> bool {
matches!(self, NodeKind::Error(_, _))
}
pub fn as_str(&self) -> &'static str {
match self {
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
Self::Star => "star",
Self::Comma => "comma",
Self::Semicolon => "semicolon",
Self::Colon => "colon",
Self::Plus => "plus",
Self::Minus => "minus",
Self::Slash => "slash",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
Self::ExclEq => "inequality operator",
Self::Lt => "less-than operator",
Self::LtEq => "less-than or equal operator",
Self::Gt => "greater-than operator",
Self::GtEq => "greater-than or equal operator",
Self::PlusEq => "add-assign operator",
Self::HyphEq => "subtract-assign operator",
Self::StarEq => "multiply-assign operator",
Self::SlashEq => "divide-assign operator",
Self::Dots => "dots",
Self::Arrow => "arrow",
Self::Not => "operator `not`",
Self::And => "operator `and`",
Self::Or => "operator `or`",
Self::With => "operator `with`",
Self::WithExpr => "`with` expression",
Self::None => "`none`",
Self::Auto => "`auto`",
Self::Let => "keyword `let`",
Self::If => "keyword `if`",
Self::Else => "keyword `else`",
Self::For => "keyword `for`",
Self::In => "keyword `in`",
Self::While => "keyword `while`",
Self::Break => "keyword `break`",
Self::Continue => "keyword `continue`",
Self::Return => "keyword `return`",
Self::Import => "keyword `import`",
Self::Include => "keyword `include`",
Self::From => "keyword `from`",
Self::Space(_) => "space",
Self::Math(_) => "math formula",
Self::EnumNumbering(_) => "numbering",
Self::Str(_) => "string",
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
Self::Markup => "markup",
Self::Linebreak => "forced linebreak",
Self::Parbreak => "paragraph break",
Self::Strong => "strong",
Self::Emph => "emphasis",
Self::Text(_) => "text",
Self::NonBreakingSpace => "non-breaking space",
Self::EnDash => "en dash",
Self::EmDash => "em dash",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Raw(_) => "raw block",
Self::Heading => "heading",
Self::HeadingLevel(_) => "heading level",
Self::List => "list",
Self::ListBullet => "list bullet",
Self::Enum => "enum",
Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean",
Self::Int(_) => "integer",
Self::Float(_) => "float",
Self::Length(_, _) => "length",
Self::Angle(_, _) => "angle",
Self::Percentage(_) => "percentage",
Self::Fraction(_) => "`fr` value",
Self::Array => "array",
Self::Dict => "dictionary",
Self::Named => "named argument",
Self::Template => "template",
Self::Group => "group",
Self::Block => "block",
Self::Unary => "unary expression",
Self::Binary => "binary expression",
Self::Call => "call",
Self::CallArgs => "call arguments",
Self::Closure => "closure",
Self::ClosureParams => "closure parameters",
Self::ParameterSink => "parameter sink",
Self::ForExpr => "for-loop expression",
Self::WhileExpr => "while-loop expression",
Self::IfExpr => "if expression",
Self::LetExpr => "let expression",
Self::ForPattern => "for-loop destructuring pattern",
Self::ImportExpr => "import expression",
Self::ImportItems => "import items",
Self::IncludeExpr => "include expression",
Self::Unknown(src) => match src.as_str() {
"*/" => "end of block comment",
_ => "invalid token",
},
Self::Error(_, _) => "parse error",
}
}
}
/// A syntactical node.
#[derive(Clone, PartialEq)]
pub struct GreenNode {
/// Node metadata.
data: GreenData,
/// This node's children, losslessly make up this node.
children: Vec<Green>,
}
/// Data shared between [`GreenNode`]s and [`GreenToken`]s.
#[derive(Clone, PartialEq)]
pub struct GreenData {
/// What kind of node this is (each kind would have its own struct in a
/// strongly typed AST).
kind: NodeKind,
/// The byte length of the node in the source.
len: usize,
/// Whether this node or any of its children are erroneous.
erroneous: bool,
}
impl GreenData {
pub fn new(kind: NodeKind, len: usize) -> Self {
Self { len, erroneous: kind.is_error(), kind }
}
pub fn kind(&self) -> &NodeKind {
&self.kind
}
pub fn len(&self) -> usize {
self.len
}
pub fn erroneous(&self) -> bool {
self.erroneous
}
}
impl From<GreenData> for Green {
fn from(token: GreenData) -> Self {
Self::Token(token)
}
}
/// Children of a [`GreenNode`]. /// Children of a [`GreenNode`].
#[derive(Clone, PartialEq)] #[derive(Clone, PartialEq)]
pub enum Green { pub enum Green {
/// A terminal owned token.
Token(GreenData),
/// A non-terminal node in an Rc. /// A non-terminal node in an Rc.
Node(Rc<GreenNode>), Node(Rc<GreenNode>),
/// A terminal owned token.
Token(GreenData),
} }
impl Green { impl Green {
fn data(&self) -> &GreenData { fn data(&self) -> &GreenData {
match self { match self {
Green::Token(t) => &t,
Green::Node(n) => &n.data, Green::Node(n) => &n.data,
Green::Token(t) => &t,
} }
} }
@ -462,12 +50,41 @@ impl Green {
pub fn children(&self) -> &[Green] { pub fn children(&self) -> &[Green] {
match self { match self {
Green::Token(_) => &[],
Green::Node(n) => &n.children(), Green::Node(n) => &n.children(),
Green::Token(_) => &[],
} }
} }
} }
impl Default for Green {
fn default() -> Self {
Self::Token(GreenData::new(NodeKind::None, 0))
}
}
impl Debug for Green {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}: {}", self.kind(), self.len())?;
if let Self::Node(n) = self {
if !n.children.is_empty() {
f.write_str(" ")?;
f.debug_list().entries(&n.children).finish()?;
}
}
Ok(())
}
}
/// A syntactical node.
#[derive(Clone, PartialEq)]
pub struct GreenNode {
/// Node metadata.
data: GreenData,
/// This node's children, losslessly make up this node.
children: Vec<Green>,
}
impl GreenNode { impl GreenNode {
pub fn new(kind: NodeKind, len: usize) -> Self { pub fn new(kind: NodeKind, len: usize) -> Self {
Self { Self {
@ -503,23 +120,39 @@ impl From<Rc<GreenNode>> for Green {
} }
} }
impl Default for Green { /// Data shared between [`GreenNode`]s and [`GreenToken`]s.
fn default() -> Self { #[derive(Clone, PartialEq)]
Self::Token(GreenData::new(NodeKind::None, 0)) pub struct GreenData {
/// What kind of node this is (each kind would have its own struct in a
/// strongly typed AST).
kind: NodeKind,
/// The byte length of the node in the source.
len: usize,
/// Whether this node or any of its children are erroneous.
erroneous: bool,
}
impl GreenData {
pub fn new(kind: NodeKind, len: usize) -> Self {
Self { len, erroneous: kind.is_error(), kind }
}
pub fn kind(&self) -> &NodeKind {
&self.kind
}
pub fn len(&self) -> usize {
self.len
}
pub fn erroneous(&self) -> bool {
self.erroneous
} }
} }
impl Debug for Green { impl From<GreenData> for Green {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn from(token: GreenData) -> Self {
write!(f, "{:?}: {}", self.kind(), self.len())?; Self::Token(token)
if let Self::Node(n) = self {
if !n.children.is_empty() {
f.write_str(" ")?;
f.debug_list().entries(&n.children).finish()?;
}
}
Ok(())
} }
} }
@ -678,6 +311,408 @@ pub trait TypedNode: Sized {
fn cast_from(value: RedRef) -> Option<Self>; fn cast_from(value: RedRef) -> Option<Self>;
} }
#[derive(Debug, Clone, PartialEq)]
pub enum NodeKind {
/// A left square bracket: `[`.
LeftBracket,
/// A right square bracket: `]`.
RightBracket,
/// A left curly brace: `{`.
LeftBrace,
/// A right curly brace: `}`.
RightBrace,
/// A left round parenthesis: `(`.
LeftParen,
/// A right round parenthesis: `)`.
RightParen,
/// An asterisk: `*`.
Star,
/// A comma: `,`.
Comma,
/// A semicolon: `;`.
Semicolon,
/// A colon: `:`.
Colon,
/// A plus: `+`.
Plus,
/// A hyphen: `-`.
Minus,
/// A slash: `/`.
Slash,
/// A single equals sign: `=`.
Eq,
/// Two equals signs: `==`.
EqEq,
/// An exclamation mark followed by an equals sign: `!=`.
ExclEq,
/// A less-than sign: `<`.
Lt,
/// A less-than sign followed by an equals sign: `<=`.
LtEq,
/// A greater-than sign: `>`.
Gt,
/// A greater-than sign followed by an equals sign: `>=`.
GtEq,
/// A plus followed by an equals sign: `+=`.
PlusEq,
/// A hyphen followed by an equals sign: `-=`.
HyphEq,
/// An asterisk followed by an equals sign: `*=`.
StarEq,
/// A slash followed by an equals sign: `/=`.
SlashEq,
/// The `not` operator.
Not,
/// The `and` operator.
And,
/// The `or` operator.
Or,
/// The `with` operator.
With,
/// Two dots: `..`.
Dots,
/// An equals sign followed by a greater-than sign: `=>`.
Arrow,
/// The none literal: `none`.
None,
/// The auto literal: `auto`.
Auto,
/// The `let` keyword.
Let,
/// The `if` keyword.
If,
/// The `else` keyword.
Else,
/// The `for` keyword.
For,
/// The `in` keyword.
In,
/// The `while` keyword.
While,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `return` keyword.
Return,
/// The `import` keyword.
Import,
/// The `include` keyword.
Include,
/// The `from` keyword.
From,
/// Template markup.
Markup,
/// One or more whitespace characters.
Space(usize),
/// A forced line break: `\`.
Linebreak,
/// A paragraph break: Two or more newlines.
Parbreak,
/// A consecutive non-markup string.
Text(EcoString),
/// A non-breaking space: `~`.
NonBreakingSpace,
/// An en-dash: `--`.
EnDash,
/// An em-dash: `---`.
EmDash,
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken),
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
Emph,
/// A section heading: `= Introduction`.
Heading,
/// A heading's level: `=`, `==`, `===`, etc.
HeadingLevel(u8),
/// An item in an enumeration (ordered list): `1. ...`.
Enum,
/// A numbering: `23.`.
///
/// Can also exist without the number: `.`.
EnumNumbering(Option<usize>),
/// An item in an unordered list: `- ...`.
List,
/// The bullet character of an item in an unordered list: `-`.
ListBullet,
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(Rc<RawToken>),
/// Dollar signs surrounding inner contents.
Math(Rc<MathToken>),
/// An identifier: `center`.
Ident(EcoString),
/// A boolean: `true`, `false`.
Bool(bool),
/// An integer: `120`.
Int(i64),
/// A floating-point number: `1.2`, `10e-4`.
Float(f64),
/// A length: `12pt`, `3cm`.
Length(f64, LengthUnit),
/// An angle: `90deg`.
Angle(f64, AngularUnit),
/// A percentage: `50%`.
///
/// _Note_: `50%` is stored as `50.0` here, as in the corresponding
/// [literal](super::Lit::Percent).
Percentage(f64),
/// A fraction unit: `3fr`.
Fraction(f64),
/// A quoted string: `"..."`.
Str(StrToken),
/// An array expression: `(1, "hi", 12cm)`.
Array,
/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`.
Dict,
/// A named argument: `thickness: 3pt`.
Named,
/// A grouped expression: `(1 + 2)`.
Group,
/// A unary operation: `-x`.
Unary,
/// A binary operation: `a + b`.
Binary,
/// An invocation of a function: `f(x, y)`.
Call,
/// A function call's argument list: `(x, y)`.
CallArgs,
/// A closure expression: `(x, y) => z`.
Closure,
/// A closure's parameters: `(x, y)`.
ClosureParams,
/// A parameter sink: `..x`.
ParameterSink,
/// A template expression: `[*Hi* there!]`.
Template,
/// A block expression: `{ let x = 1; x + 2 }`.
Block,
/// A for loop expression: `for x in y { ... }`.
ForExpr,
/// A while loop expression: `while x { ... }`.
WhileExpr,
/// An if expression: `if x { ... }`.
IfExpr,
/// A let expression: `let x = 1`.
LetExpr,
/// The `with` expression: `with (1)`.
WithExpr,
/// A for loop's destructuring pattern: `x` or `x, y`.
ForPattern,
/// The import expression: `import x from "foo.typ"`.
ImportExpr,
/// Items to import: `a, b, c`.
ImportItems,
/// The include expression: `include "foo.typ"`.
IncludeExpr,
/// Two slashes followed by inner contents, terminated with a newline:
/// `//<str>\n`.
LineComment,
/// A slash and a star followed by inner contents, terminated with a star
/// and a slash: `/*<str>*/`.
///
/// The comment can contain nested block comments.
BlockComment,
/// Tokens that appear in the wrong place.
Error(ErrorPosition, EcoString),
/// Unknown character sequences.
Unknown(EcoString),
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ErrorPosition {
/// At the start of the node.
Start,
/// Over the full width of the node.
Full,
/// At the end of the node.
End,
}
/// A quoted string token: `"..."`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct StrToken {
/// The string inside the quotes.
pub string: EcoString,
}
/// A raw block token: `` `...` ``.
#[derive(Debug, Clone, PartialEq)]
pub struct RawToken {
/// The raw text in the block.
pub text: EcoString,
/// The programming language of the raw text.
pub lang: Option<EcoString>,
/// The number of opening backticks.
pub backticks: u8,
/// Whether to display this as a block.
pub block: bool,
}
/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`.
#[derive(Debug, Clone, PartialEq)]
pub struct MathToken {
/// The formula between the dollars.
pub formula: EcoString,
/// Whether the formula is display-level, that is, it is surrounded by
/// `$[..]`.
pub display: bool,
}
/// A unicode escape sequence token: `\u{1F5FA}`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct UnicodeEscapeToken {
/// The resulting unicode character.
pub character: char,
}
impl Display for NodeKind {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.pad(self.as_str())
}
}
impl NodeKind {
pub fn is_paren(&self) -> bool {
match self {
Self::LeftParen => true,
Self::RightParen => true,
_ => false,
}
}
pub fn is_bracket(&self) -> bool {
match self {
Self::LeftBracket => true,
Self::RightBracket => true,
_ => false,
}
}
pub fn is_brace(&self) -> bool {
match self {
Self::LeftBrace => true,
Self::RightBrace => true,
_ => false,
}
}
pub fn is_error(&self) -> bool {
matches!(self, NodeKind::Error(_, _))
}
pub fn as_str(&self) -> &'static str {
match self {
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
Self::Star => "star",
Self::Comma => "comma",
Self::Semicolon => "semicolon",
Self::Colon => "colon",
Self::Plus => "plus",
Self::Minus => "minus",
Self::Slash => "slash",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
Self::ExclEq => "inequality operator",
Self::Lt => "less-than operator",
Self::LtEq => "less-than or equal operator",
Self::Gt => "greater-than operator",
Self::GtEq => "greater-than or equal operator",
Self::PlusEq => "add-assign operator",
Self::HyphEq => "subtract-assign operator",
Self::StarEq => "multiply-assign operator",
Self::SlashEq => "divide-assign operator",
Self::Not => "operator `not`",
Self::And => "operator `and`",
Self::Or => "operator `or`",
Self::With => "operator `with`",
Self::Dots => "dots",
Self::Arrow => "arrow",
Self::None => "`none`",
Self::Auto => "`auto`",
Self::Let => "keyword `let`",
Self::If => "keyword `if`",
Self::Else => "keyword `else`",
Self::For => "keyword `for`",
Self::In => "keyword `in`",
Self::While => "keyword `while`",
Self::Break => "keyword `break`",
Self::Continue => "keyword `continue`",
Self::Return => "keyword `return`",
Self::Import => "keyword `import`",
Self::Include => "keyword `include`",
Self::From => "keyword `from`",
Self::Markup => "markup",
Self::Space(_) => "space",
Self::Linebreak => "forced linebreak",
Self::Parbreak => "paragraph break",
Self::Text(_) => "text",
Self::NonBreakingSpace => "non-breaking space",
Self::EnDash => "en dash",
Self::EmDash => "em dash",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Strong => "strong",
Self::Emph => "emphasis",
Self::Heading => "heading",
Self::HeadingLevel(_) => "heading level",
Self::Enum => "enumeration item",
Self::EnumNumbering(_) => "enumeration item numbering",
Self::List => "list item",
Self::ListBullet => "list bullet",
Self::Raw(_) => "raw block",
Self::Math(_) => "math formula",
Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean",
Self::Int(_) => "integer",
Self::Float(_) => "float",
Self::Length(_, _) => "length",
Self::Angle(_, _) => "angle",
Self::Percentage(_) => "percentage",
Self::Fraction(_) => "`fr` value",
Self::Str(_) => "string",
Self::Array => "array",
Self::Dict => "dictionary",
Self::Named => "named argument",
Self::Group => "group",
Self::Unary => "unary expression",
Self::Binary => "binary expression",
Self::Call => "call",
Self::CallArgs => "call arguments",
Self::Closure => "closure",
Self::ClosureParams => "closure parameters",
Self::ParameterSink => "parameter sink",
Self::Template => "template",
Self::Block => "block",
Self::ForExpr => "for-loop expression",
Self::WhileExpr => "while-loop expression",
Self::IfExpr => "`if` expression",
Self::LetExpr => "`let` expression",
Self::WithExpr => "`with` expression",
Self::ForPattern => "for-loop destructuring pattern",
Self::ImportExpr => "`import` expression",
Self::ImportItems => "import items",
Self::IncludeExpr => "`include` expression",
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
Self::Error(_, _) => "parse error",
Self::Unknown(src) => match src.as_str() {
"*/" => "end of block comment",
_ => "invalid token",
},
}
}
}
#[macro_export] #[macro_export]
macro_rules! node { macro_rules! node {
($(#[$attr:meta])* $name:ident) => { ($(#[$attr:meta])* $name:ident) => {

View File

@ -1,40 +0,0 @@
use crate::util::EcoString;
/// A quoted string token: `"..."`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct StrToken {
/// The string inside the quotes.
pub string: EcoString,
}
/// A raw block token: `` `...` ``.
#[derive(Debug, Clone, PartialEq)]
pub struct RawToken {
/// The raw text in the block.
pub text: EcoString,
/// The programming language of the raw text.
pub lang: Option<EcoString>,
/// The number of opening backticks.
pub backticks: u8,
/// Whether to display this as a block.
pub block: bool,
}
/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`.
#[derive(Debug, Clone, PartialEq)]
pub struct MathToken {
/// The formula between the dollars.
pub formula: EcoString,
/// Whether the formula is display-level, that is, it is surrounded by
/// `$[..]`.
pub display: bool,
}
/// A unicode escape sequence token: `\u{1F5FA}`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct UnicodeEscapeToken {
/// The resulting unicode character.
pub character: char,
}