Refactor syntax module

This commit is contained in:
Laurenz 2022-10-05 12:49:39 +02:00
parent 5a8534a395
commit ec884ec1d8
16 changed files with 848 additions and 903 deletions

View File

@ -66,7 +66,7 @@ fn bench_edit(iai: &mut Iai) {
fn bench_highlight(iai: &mut Iai) { fn bench_highlight(iai: &mut Iai) {
let source = Source::detached(TEXT); let source = Source::detached(TEXT);
iai.run(|| { iai.run(|| {
typst::syntax::highlight_node( typst::syntax::highlight::highlight_categories(
source.root(), source.root(),
0 .. source.len_bytes(), 0 .. source.len_bytes(),
&mut |_, _| {}, &mut |_, _| {},

View File

@ -133,25 +133,25 @@ pub trait Eval {
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output>; fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output>;
} }
impl Eval for Markup { impl Eval for MarkupNode {
type Output = Content; type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
eval_markup(vm, &mut self.nodes()) eval_markup(vm, &mut self.items())
} }
} }
/// Evaluate a stream of markup nodes. /// Evaluate a stream of markup nodes.
fn eval_markup( fn eval_markup(
vm: &mut Vm, vm: &mut Vm,
nodes: &mut impl Iterator<Item = MarkupNode>, nodes: &mut impl Iterator<Item = MarkupItem>,
) -> SourceResult<Content> { ) -> SourceResult<Content> {
let flow = vm.flow.take(); let flow = vm.flow.take();
let mut seq = Vec::with_capacity(nodes.size_hint().1.unwrap_or_default()); let mut seq = Vec::with_capacity(nodes.size_hint().1.unwrap_or_default());
while let Some(node) = nodes.next() { while let Some(node) = nodes.next() {
seq.push(match node { seq.push(match node {
MarkupNode::Expr(Expr::Set(set)) => { MarkupItem::Expr(Expr::Set(set)) => {
let styles = set.eval(vm)?; let styles = set.eval(vm)?;
if vm.flow.is_some() { if vm.flow.is_some() {
break; break;
@ -159,7 +159,7 @@ fn eval_markup(
eval_markup(vm, nodes)?.styled_with_map(styles) eval_markup(vm, nodes)?.styled_with_map(styles)
} }
MarkupNode::Expr(Expr::Show(show)) => { MarkupItem::Expr(Expr::Show(show)) => {
let recipe = show.eval(vm)?; let recipe = show.eval(vm)?;
if vm.flow.is_some() { if vm.flow.is_some() {
break; break;
@ -168,7 +168,7 @@ fn eval_markup(
eval_markup(vm, nodes)? eval_markup(vm, nodes)?
.styled_with_entry(StyleEntry::Recipe(recipe).into()) .styled_with_entry(StyleEntry::Recipe(recipe).into())
} }
MarkupNode::Expr(Expr::Wrap(wrap)) => { MarkupItem::Expr(Expr::Wrap(wrap)) => {
let tail = eval_markup(vm, nodes)?; let tail = eval_markup(vm, nodes)?;
vm.scopes.top.define(wrap.binding().take(), tail); vm.scopes.top.define(wrap.binding().take(), tail);
wrap.body().eval(vm)?.display() wrap.body().eval(vm)?.display()
@ -189,7 +189,7 @@ fn eval_markup(
Ok(Content::sequence(seq)) Ok(Content::sequence(seq))
} }
impl Eval for MarkupNode { impl Eval for MarkupItem {
type Output = Content; type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
@ -252,12 +252,12 @@ impl Eval for RawNode {
} }
} }
impl Eval for Math { impl Eval for MathNode {
type Output = Content; type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
let nodes = let nodes =
self.nodes().map(|node| node.eval(vm)).collect::<SourceResult<_>>()?; self.items().map(|node| node.eval(vm)).collect::<SourceResult<_>>()?;
Ok(Content::show(library::math::MathNode::Row( Ok(Content::show(library::math::MathNode::Row(
Arc::new(nodes), Arc::new(nodes),
self.span(), self.span(),
@ -265,7 +265,7 @@ impl Eval for Math {
} }
} }
impl Eval for MathNode { impl Eval for MathItem {
type Output = library::math::MathNode; type Output = library::math::MathNode;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
@ -278,7 +278,7 @@ impl Eval for MathNode {
Self::Align(node) => node.eval(vm)?, Self::Align(node) => node.eval(vm)?,
Self::Group(node) => library::math::MathNode::Row( Self::Group(node) => library::math::MathNode::Row(
Arc::new( Arc::new(
node.nodes() node.items()
.map(|node| node.eval(vm)) .map(|node| node.eval(vm))
.collect::<SourceResult<_>>()?, .collect::<SourceResult<_>>()?,
), ),
@ -346,7 +346,7 @@ impl Eval for HeadingNode {
} }
} }
impl Eval for ListNode { impl Eval for ListItem {
type Output = Content; type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
@ -355,7 +355,7 @@ impl Eval for ListNode {
} }
} }
impl Eval for EnumNode { impl Eval for EnumItem {
type Output = Content; type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {
@ -367,7 +367,7 @@ impl Eval for EnumNode {
} }
} }
impl Eval for DescNode { impl Eval for DescItem {
type Output = Content; type Output = Content;
fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> { fn eval(&self, vm: &mut Vm) -> SourceResult<Self::Output> {

View File

@ -8,8 +8,6 @@ use syntect::parsing::SyntaxSet;
use super::{FontFamily, Hyphenate, TextNode}; use super::{FontFamily, Hyphenate, TextNode};
use crate::library::layout::BlockSpacing; use crate::library::layout::BlockSpacing;
use crate::library::prelude::*; use crate::library::prelude::*;
use crate::parse::TokenMode;
use crate::syntax;
/// Monospaced text with optional syntax highlighting. /// Monospaced text with optional syntax highlighting.
#[derive(Debug, Hash)] #[derive(Debug, Hash)]
@ -73,14 +71,14 @@ impl Show for RawNode {
.into(); .into();
let mut realized = if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) { let mut realized = if matches!(lang.as_deref(), Some("typ" | "typst" | "typc")) {
let mode = match lang.as_deref() { let root = match lang.as_deref() {
Some("typc") => TokenMode::Code, Some("typc") => crate::parse::parse_code(&self.text),
_ => TokenMode::Markup, _ => crate::parse::parse(&self.text),
}; };
let mut seq = vec![]; let mut seq = vec![];
syntax::highlight_themed(&self.text, mode, &THEME, |piece, style| { crate::syntax::highlight::highlight_themed(&root, &THEME, |range, style| {
seq.push(styled(piece, foreground, style)); seq.push(styled(&self.text[range], foreground, style));
}); });
Content::sequence(seq) Content::sequence(seq)
@ -167,24 +165,29 @@ pub static THEME: Lazy<Theme> = Lazy::new(|| Theme {
author: Some("The Typst Project Developers".into()), author: Some("The Typst Project Developers".into()),
settings: ThemeSettings::default(), settings: ThemeSettings::default(),
scopes: vec![ scopes: vec![
item("comment", Some("#8a8a8a"), None),
item("constant.character.escape", Some("#1d6c76"), None),
item("constant.character.shortcut", Some("#1d6c76"), None),
item("markup.bold", None, Some(FontStyle::BOLD)), item("markup.bold", None, Some(FontStyle::BOLD)),
item("markup.italic", None, Some(FontStyle::ITALIC)), item("markup.italic", None, Some(FontStyle::ITALIC)),
item("markup.underline", None, Some(FontStyle::UNDERLINE)),
item("markup.raw", Some("#818181"), None),
item("string.other.math.typst", None, None),
item("punctuation.definition.math", Some("#298e0d"), None),
item("keyword.operator.math", Some("#1d6c76"), None),
item("markup.heading, entity.name.section", None, Some(FontStyle::BOLD)), item("markup.heading, entity.name.section", None, Some(FontStyle::BOLD)),
item("markup.heading.typst", None, Some(FontStyle::BOLD | FontStyle::UNDERLINE)), item("markup.heading.typst", None, Some(FontStyle::BOLD | FontStyle::UNDERLINE)),
item("markup.raw", Some("#818181"), None), item("punctuation.definition.list", Some("#8b41b1"), None),
item("markup.list", Some("#8b41b1"), None), item("markup.list.term", None, Some(FontStyle::BOLD)),
item("comment", Some("#8a8a8a"), None),
item("punctuation.shortcut", Some("#1d6c76"), None),
item("constant.character.escape", Some("#1d6c76"), None),
item("entity.name.label, markup.other.reference", Some("#1d6c76"), None), item("entity.name.label, markup.other.reference", Some("#1d6c76"), None),
item("keyword, constant.language, variable.language", Some("#d73a49"), None), item("keyword, constant.language, variable.language", Some("#d73a49"), None),
item("storage.type, storage.modifier", Some("#d73a49"), None), item("storage.type, storage.modifier", Some("#d73a49"), None),
item("entity.other", Some("#8b41b1"), None), item("constant", Some("#b60157"), None),
item("string", Some("#298e0d"), None),
item("entity.name, variable.function, support", Some("#4b69c6"), None), item("entity.name, variable.function, support", Some("#4b69c6"), None),
item("support.macro", Some("#16718d"), None), item("support.macro", Some("#16718d"), None),
item("meta.annotation", Some("#301414"), None), item("meta.annotation", Some("#301414"), None),
item("constant", Some("#b60157"), None), item("entity.other, meta.interpolation", Some("#8b41b1"), None),
item("string", Some("#298e0d"), None),
item("invalid", Some("#ff0000"), None), item("invalid", Some("#ff0000"), None),
], ],
}); });

View File

@ -96,11 +96,10 @@ fn try_reparse(
&& (ahead.is_none() || change.replaced.start > child_span.end) && (ahead.is_none() || change.replaced.start > child_span.end)
&& !ahead.map_or(false, Ahead::is_compulsory) && !ahead.map_or(false, Ahead::is_compulsory)
{ {
ahead = ahead = Some(Ahead::new(pos, at_start, is_bounded(child.kind())));
Some(Ahead::new(pos, at_start, child.kind().is_bounded()));
} }
at_start = child.kind().is_at_start(at_start); at_start = next_at_start(child.kind(), at_start);
} }
} }
SearchState::Inside(start) => { SearchState::Inside(start) => {
@ -137,7 +136,7 @@ fn try_reparse(
if let SearchState::Contained(pos) = search { if let SearchState::Contained(pos) = search {
// Do not allow replacement of elements inside of constructs whose // Do not allow replacement of elements inside of constructs whose
// opening and closing brackets look the same. // opening and closing brackets look the same.
let safe_inside = node.kind().is_bounded(); let safe_inside = is_bounded(node.kind());
let child = &mut node.children_mut()[pos.idx]; let child = &mut node.children_mut()[pos.idx];
let prev_len = child.len(); let prev_len = child.len();
let prev_descendants = child.descendants(); let prev_descendants = child.descendants();
@ -384,6 +383,36 @@ enum ReparseMode {
MarkupElements { at_start: bool, min_indent: usize }, MarkupElements { at_start: bool, min_indent: usize },
} }
/// Whether changes _inside_ this node are safely encapsulated, so that only
/// this node must be reparsed.
fn is_bounded(kind: &NodeKind) -> bool {
match kind {
NodeKind::CodeBlock
| NodeKind::ContentBlock
| NodeKind::Backslash
| NodeKind::Tilde
| NodeKind::HyphQuest
| NodeKind::Hyph2
| NodeKind::Hyph3
| NodeKind::Dot3
| NodeKind::Quote { .. }
| NodeKind::BlockComment
| NodeKind::Space { .. }
| NodeKind::Escape(_) => true,
_ => false,
}
}
/// Whether `at_start` would still be true after this node given the
/// previous value of the property.
fn next_at_start(kind: &NodeKind, prev: bool) -> bool {
match kind {
NodeKind::Space { newlines: (1 ..) } => true,
NodeKind::Space { .. } | NodeKind::LineComment | NodeKind::BlockComment => prev,
_ => false,
}
}
#[cfg(test)] #[cfg(test)]
#[rustfmt::skip] #[rustfmt::skip]
mod tests { mod tests {

View File

@ -22,17 +22,6 @@ pub fn parse(text: &str) -> SyntaxNode {
p.finish().into_iter().next().unwrap() p.finish().into_iter().next().unwrap()
} }
/// Parse math directly, only used for syntax highlighting.
pub fn parse_math(text: &str) -> SyntaxNode {
let mut p = Parser::new(text, TokenMode::Math);
p.perform(NodeKind::Math, |p| {
while !p.eof() {
math_node(p);
}
});
p.finish().into_iter().next().unwrap()
}
/// Parse code directly, only used for syntax highlighting. /// Parse code directly, only used for syntax highlighting.
pub fn parse_code(text: &str) -> SyntaxNode { pub fn parse_code(text: &str) -> SyntaxNode {
let mut p = Parser::new(text, TokenMode::Code); let mut p = Parser::new(text, TokenMode::Code);
@ -250,7 +239,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
// Text and markup. // Text and markup.
NodeKind::Text(_) NodeKind::Text(_)
| NodeKind::Linebreak { .. } | NodeKind::Backslash
| NodeKind::Tilde | NodeKind::Tilde
| NodeKind::HyphQuest | NodeKind::HyphQuest
| NodeKind::Hyph2 | NodeKind::Hyph2
@ -353,7 +342,7 @@ fn list_node(p: &mut Parser, at_start: bool) {
let min_indent = p.column(p.prev_end()); let min_indent = p.column(p.prev_end());
if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() { if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
markup_indented(p, min_indent); markup_indented(p, min_indent);
marker.end(p, NodeKind::List); marker.end(p, NodeKind::ListItem);
} else { } else {
marker.convert(p, NodeKind::Text(text)); marker.convert(p, NodeKind::Text(text));
} }
@ -368,7 +357,7 @@ fn enum_node(p: &mut Parser, at_start: bool) {
let min_indent = p.column(p.prev_end()); let min_indent = p.column(p.prev_end());
if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() { if at_start && p.eat_if(NodeKind::Space { newlines: 0 }) && !p.eof() {
markup_indented(p, min_indent); markup_indented(p, min_indent);
marker.end(p, NodeKind::Enum); marker.end(p, NodeKind::EnumItem);
} else { } else {
marker.convert(p, NodeKind::Text(text)); marker.convert(p, NodeKind::Text(text));
} }
@ -385,7 +374,7 @@ fn desc_node(p: &mut Parser, at_start: bool) -> ParseResult {
markup_line(p, |node| matches!(node, NodeKind::Colon)); markup_line(p, |node| matches!(node, NodeKind::Colon));
p.expect(NodeKind::Colon)?; p.expect(NodeKind::Colon)?;
markup_indented(p, min_indent); markup_indented(p, min_indent);
marker.end(p, NodeKind::Desc); marker.end(p, NodeKind::DescItem);
} else { } else {
marker.convert(p, NodeKind::Text(text)); marker.convert(p, NodeKind::Text(text));
} }
@ -485,7 +474,7 @@ fn math_primary(p: &mut Parser) {
match token { match token {
// Spaces, atoms and expressions. // Spaces, atoms and expressions.
NodeKind::Space { .. } NodeKind::Space { .. }
| NodeKind::Linebreak | NodeKind::Backslash
| NodeKind::Escape(_) | NodeKind::Escape(_)
| NodeKind::Atom(_) | NodeKind::Atom(_)
| NodeKind::Ident(_) => p.eat(), | NodeKind::Ident(_) => p.eat(),
@ -820,7 +809,7 @@ fn item(p: &mut Parser, keyed: bool) -> ParseResult<NodeKind> {
} }
if let Some(kind) = kind { if let Some(kind) = kind {
msg.push_str(", found "); msg.push_str(", found ");
msg.push_str(kind.as_str()); msg.push_str(kind.name());
} }
let error = NodeKind::Error(SpanPos::Full, msg); let error = NodeKind::Error(SpanPos::Full, msg);
marker.end(p, error); marker.end(p, error);

View File

@ -159,7 +159,7 @@ impl<'s> Parser<'s> {
self.eat(); self.eat();
Ok(()) Ok(())
} else { } else {
self.expected(kind.as_str()); self.expected(kind.name());
Err(ParseError) Err(ParseError)
} }
} }
@ -293,7 +293,7 @@ impl<'s> Parser<'s> {
self.stray_terminator = s; self.stray_terminator = s;
rescan = false; rescan = false;
} else if required { } else if required {
self.expected(end.as_str()); self.expected(end.name());
self.unterminated_group = true; self.unterminated_group = true;
} }
} }
@ -397,7 +397,7 @@ impl Parser<'_> {
/// Eat the current token and add an error that it is unexpected. /// Eat the current token and add an error that it is unexpected.
pub fn unexpected(&mut self) { pub fn unexpected(&mut self) {
if let Some(found) = self.peek() { if let Some(found) = self.peek() {
let msg = format_eco!("unexpected {}", found); let msg = format_eco!("unexpected {}", found.name());
let error = NodeKind::Error(SpanPos::Full, msg); let error = NodeKind::Error(SpanPos::Full, msg);
self.perform(error, Self::eat); self.perform(error, Self::eat);
} }
@ -421,7 +421,7 @@ impl Parser<'_> {
pub fn expected_found(&mut self, thing: &str) { pub fn expected_found(&mut self, thing: &str) {
match self.peek() { match self.peek() {
Some(found) => { Some(found) => {
let msg = format_eco!("expected {}, found {}", thing, found); let msg = format_eco!("expected {}, found {}", thing, found.name());
let error = NodeKind::Error(SpanPos::Full, msg); let error = NodeKind::Error(SpanPos::Full, msg);
self.perform(error, Self::eat); self.perform(error, Self::eat);
} }
@ -492,7 +492,7 @@ impl Marker {
let mut msg = EcoString::from(msg); let mut msg = EcoString::from(msg);
if msg.starts_with("expected") { if msg.starts_with("expected") {
msg.push_str(", found "); msg.push_str(", found ");
msg.push_str(child.kind().as_str()); msg.push_str(child.kind().name());
} }
let error = NodeKind::Error(SpanPos::Full, msg); let error = NodeKind::Error(SpanPos::Full, msg);
let inner = mem::take(child); let inner = mem::take(child);

View File

@ -108,7 +108,9 @@ impl<'s> Iterator for Tokens<'s> {
// Trivia. // Trivia.
'/' if self.s.eat_if('/') => self.line_comment(), '/' if self.s.eat_if('/') => self.line_comment(),
'/' if self.s.eat_if('*') => self.block_comment(), '/' if self.s.eat_if('*') => self.block_comment(),
'*' if self.s.eat_if('/') => NodeKind::Unknown("*/".into()), '*' if self.s.eat_if('/') => {
NodeKind::Error(SpanPos::Full, "unexpected end of block comment".into())
}
c if c.is_whitespace() => self.whitespace(c), c if c.is_whitespace() => self.whitespace(c),
// Other things. // Other things.
@ -288,8 +290,8 @@ impl<'s> Tokens<'s> {
} }
// Linebreaks. // Linebreaks.
Some(c) if c.is_whitespace() => NodeKind::Linebreak, Some(c) if c.is_whitespace() => NodeKind::Backslash,
None => NodeKind::Linebreak, None => NodeKind::Backslash,
// Escapes. // Escapes.
Some(c) => { Some(c) => {
@ -517,7 +519,7 @@ impl<'s> Tokens<'s> {
'"' => self.string(), '"' => self.string(),
// Invalid token. // Invalid token.
_ => NodeKind::Unknown(self.s.from(start).into()), _ => NodeKind::Error(SpanPos::Full, "not valid here".into()),
} }
} }
@ -556,7 +558,6 @@ impl<'s> Tokens<'s> {
let number = self.s.get(start .. suffix_start); let number = self.s.get(start .. suffix_start);
let suffix = self.s.from(suffix_start); let suffix = self.s.from(suffix_start);
let all = self.s.from(start);
// Find out whether it is a simple number. // Find out whether it is a simple number.
if suffix.is_empty() { if suffix.is_empty() {
@ -577,10 +578,10 @@ impl<'s> Tokens<'s> {
"em" => NodeKind::Numeric(f, Unit::Em), "em" => NodeKind::Numeric(f, Unit::Em),
"fr" => NodeKind::Numeric(f, Unit::Fr), "fr" => NodeKind::Numeric(f, Unit::Fr),
"%" => NodeKind::Numeric(f, Unit::Percent), "%" => NodeKind::Numeric(f, Unit::Percent),
_ => NodeKind::Unknown(all.into()), _ => NodeKind::Error(SpanPos::Full, "invalid number suffix".into()),
} }
} else { } else {
NodeKind::Unknown(all.into()) NodeKind::Error(SpanPos::Full, "invalid number".into())
} }
} }
@ -745,10 +746,6 @@ mod tests {
NodeKind::Error(pos, message.into()) NodeKind::Error(pos, message.into())
} }
fn Invalid(invalid: &str) -> NodeKind {
NodeKind::Unknown(invalid.into())
}
/// Building blocks for suffix testing. /// Building blocks for suffix testing.
/// ///
/// We extend each test case with a collection of different suffixes to make /// We extend each test case with a collection of different suffixes to make
@ -926,7 +923,7 @@ mod tests {
t!(Markup: "_" => Underscore); t!(Markup: "_" => Underscore);
t!(Markup[""]: "===" => Eq, Eq, Eq); t!(Markup[""]: "===" => Eq, Eq, Eq);
t!(Markup["a1/"]: "= " => Eq, Space(0)); t!(Markup["a1/"]: "= " => Eq, Space(0));
t!(Markup[" "]: r"\" => Linebreak); t!(Markup[" "]: r"\" => Backslash);
t!(Markup: "~" => Tilde); t!(Markup: "~" => Tilde);
t!(Markup["a1/"]: "-?" => HyphQuest); t!(Markup["a1/"]: "-?" => HyphQuest);
t!(Markup["a "]: r"a--" => Text("a"), Hyph2); t!(Markup["a "]: r"a--" => Text("a"), Hyph2);
@ -972,6 +969,9 @@ mod tests {
t!(Code[" /"]: "--1" => Minus, Minus, Int(1)); t!(Code[" /"]: "--1" => Minus, Minus, Int(1));
t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a")); t!(Code[" /"]: "--_a" => Minus, Minus, Ident("_a"));
t!(Code[" /"]: "a-b" => Ident("a-b")); t!(Code[" /"]: "a-b" => Ident("a-b"));
// Test invalid.
t!(Code: r"\" => Error(Full, "not valid here"));
} }
#[test] #[test]
@ -1107,6 +1107,9 @@ mod tests {
t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2)); t!(Code[" /"]: "1..2" => Int(1), Dots, Int(2));
t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3)); t!(Code[" /"]: "1..2.3" => Int(1), Dots, Float(2.3));
t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3)); t!(Code[" /"]: "1.2..3" => Float(1.2), Dots, Int(3));
// Test invalid.
t!(Code[" /"]: "1foo" => Error(Full, "invalid number suffix"));
} }
#[test] #[test]
@ -1161,25 +1164,9 @@ mod tests {
t!(Both[""]: "/*/*" => BlockComment); t!(Both[""]: "/*/*" => BlockComment);
t!(Both[""]: "/**/" => BlockComment); t!(Both[""]: "/**/" => BlockComment);
t!(Both[""]: "/***" => BlockComment); t!(Both[""]: "/***" => BlockComment);
}
#[test] // Test unexpected terminator.
fn test_tokenize_invalid() { t!(Both: "/*Hi*/*/" => BlockComment,
// Test invalidly closed block comments. Error(Full, "unexpected end of block comment"));
t!(Both: "*/" => Invalid("*/"));
t!(Both: "/**/*/" => BlockComment, Invalid("*/"));
// Test invalid expressions.
t!(Code: r"\" => Invalid(r"\"));
t!(Code: "🌓" => Invalid("🌓"));
t!(Code: r"\:" => Invalid(r"\"), Colon);
t!(Code: "meal⌚" => Ident("meal"), Invalid(""));
t!(Code[" /"]: r"\a" => Invalid(r"\"), Ident("a"));
t!(Code[" /"]: "#" => Invalid("#"));
// Test invalid number suffixes.
t!(Code[" /"]: "1foo" => Invalid("1foo"));
t!(Code: "1p%" => Invalid("1p"), Invalid("%"));
t!(Code: "1%%" => Numeric(1.0, Unit::Percent), Invalid("%"));
} }
} }

View File

@ -10,7 +10,7 @@ use unscanny::Scanner;
use crate::diag::SourceResult; use crate::diag::SourceResult;
use crate::parse::{is_newline, parse, reparse}; use crate::parse::{is_newline, parse, reparse};
use crate::syntax::ast::Markup; use crate::syntax::ast::MarkupNode;
use crate::syntax::{Span, SyntaxNode}; use crate::syntax::{Span, SyntaxNode};
use crate::util::{PathExt, StrExt}; use crate::util::{PathExt, StrExt};
@ -64,7 +64,7 @@ impl Source {
} }
/// The root node of the file's typed abstract syntax tree. /// The root node of the file's typed abstract syntax tree.
pub fn ast(&self) -> SourceResult<Markup> { pub fn ast(&self) -> SourceResult<MarkupNode> {
let errors = self.root.errors(); let errors = self.root.errors();
if errors.is_empty() { if errors.is_empty() {
Ok(self.root.cast().expect("root node must be markup")) Ok(self.root.cast().expect("root node must be markup"))

View File

@ -1,6 +1,6 @@
//! A typed layer over the untyped syntax tree. //! A typed layer over the untyped syntax tree.
//! //!
//! The AST is rooted in the [`Markup`] node. //! The AST is rooted in the [`MarkupNode`].
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
use std::ops::Deref; use std::ops::Deref;
@ -54,19 +54,19 @@ macro_rules! node {
node! { node! {
/// The syntactical root capable of representing a full parsed document. /// The syntactical root capable of representing a full parsed document.
Markup: NodeKind::Markup { .. } MarkupNode: NodeKind::Markup { .. }
} }
impl Markup { impl MarkupNode {
/// The markup nodes. /// The children.
pub fn nodes(&self) -> impl Iterator<Item = MarkupNode> + '_ { pub fn items(&self) -> impl Iterator<Item = MarkupItem> + '_ {
self.0.children().filter_map(SyntaxNode::cast) self.0.children().filter_map(SyntaxNode::cast)
} }
} }
/// A single piece of markup. /// A single piece of markup.
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum MarkupNode { pub enum MarkupItem {
/// Whitespace containing less than two newlines. /// Whitespace containing less than two newlines.
Space, Space,
/// A forced line break. /// A forced line break.
@ -81,34 +81,34 @@ pub enum MarkupNode {
Strong(StrongNode), Strong(StrongNode),
/// Emphasized content: `_Emphasized_`. /// Emphasized content: `_Emphasized_`.
Emph(EmphNode), Emph(EmphNode),
/// A hyperlink. /// A hyperlink: `https://typst.org`.
Link(EcoString), Link(EcoString),
/// A raw block with optional syntax highlighting: `` `...` ``. /// A raw block with optional syntax highlighting: `` `...` ``.
Raw(RawNode), Raw(RawNode),
/// A math formula: `$a^2 = b^2 + c^2$`. /// A math formula: `$x$`, `$ x^2 $`.
Math(Math), Math(MathNode),
/// A section heading: `= Introduction`. /// A section heading: `= Introduction`.
Heading(HeadingNode), Heading(HeadingNode),
/// An item in an unordered list: `- ...`. /// An item in an unordered list: `- ...`.
List(ListNode), List(ListItem),
/// An item in an enumeration (ordered list): `+ ...` or `1. ...`. /// An item in an enumeration (ordered list): `+ ...` or `1. ...`.
Enum(EnumNode), Enum(EnumItem),
/// An item in a description list: `/ Term: Details. /// An item in a description list: `/ Term: Details`.
Desc(DescNode), Desc(DescItem),
/// A label. /// A label: `<label>`.
Label(EcoString), Label(EcoString),
/// A reference. /// A reference: `@label`.
Ref(EcoString), Ref(EcoString),
/// An expression. /// An expression.
Expr(Expr), Expr(Expr),
} }
impl TypedNode for MarkupNode { impl TypedNode for MarkupItem {
fn from_untyped(node: &SyntaxNode) -> Option<Self> { fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() { match node.kind() {
NodeKind::Space { newlines: (2 ..) } => Some(Self::Parbreak), NodeKind::Space { newlines: (2 ..) } => Some(Self::Parbreak),
NodeKind::Space { .. } => Some(Self::Space), NodeKind::Space { .. } => Some(Self::Space),
NodeKind::Linebreak => Some(Self::Linebreak), NodeKind::Backslash => Some(Self::Linebreak),
NodeKind::Text(s) => Some(Self::Text(s.clone())), NodeKind::Text(s) => Some(Self::Text(s.clone())),
NodeKind::Escape(c) => Some(Self::Text((*c).into())), NodeKind::Escape(c) => Some(Self::Text((*c).into())),
NodeKind::Tilde => Some(Self::Text('\u{00A0}'.into())), NodeKind::Tilde => Some(Self::Text('\u{00A0}'.into())),
@ -123,9 +123,9 @@ impl TypedNode for MarkupNode {
NodeKind::Raw(raw) => Some(Self::Raw(raw.as_ref().clone())), NodeKind::Raw(raw) => Some(Self::Raw(raw.as_ref().clone())),
NodeKind::Math => node.cast().map(Self::Math), NodeKind::Math => node.cast().map(Self::Math),
NodeKind::Heading => node.cast().map(Self::Heading), NodeKind::Heading => node.cast().map(Self::Heading),
NodeKind::List => node.cast().map(Self::List), NodeKind::ListItem => node.cast().map(Self::List),
NodeKind::Enum => node.cast().map(Self::Enum), NodeKind::EnumItem => node.cast().map(Self::Enum),
NodeKind::Desc => node.cast().map(Self::Desc), NodeKind::DescItem => node.cast().map(Self::Desc),
NodeKind::Label(v) => Some(Self::Label(v.clone())), NodeKind::Label(v) => Some(Self::Label(v.clone())),
NodeKind::Ref(v) => Some(Self::Ref(v.clone())), NodeKind::Ref(v) => Some(Self::Ref(v.clone())),
_ => node.cast().map(Self::Expr), _ => node.cast().map(Self::Expr),
@ -144,7 +144,7 @@ node! {
impl StrongNode { impl StrongNode {
/// The contents of the strong node. /// The contents of the strong node.
pub fn body(&self) -> Markup { pub fn body(&self) -> MarkupNode {
self.0.cast_first_child().expect("strong node is missing markup body") self.0.cast_first_child().expect("strong node is missing markup body")
} }
} }
@ -156,7 +156,7 @@ node! {
impl EmphNode { impl EmphNode {
/// The contents of the emphasis node. /// The contents of the emphasis node.
pub fn body(&self) -> Markup { pub fn body(&self) -> MarkupNode {
self.0 self.0
.cast_first_child() .cast_first_child()
.expect("emphasis node is missing markup body") .expect("emphasis node is missing markup body")
@ -178,19 +178,19 @@ pub struct RawNode {
node! { node! {
/// A math formula: `$x$`, `$ x^2 $`. /// A math formula: `$x$`, `$ x^2 $`.
Math: NodeKind::Math { .. } MathNode: NodeKind::Math { .. }
} }
impl Math { impl MathNode {
/// The math nodes. /// The children.
pub fn nodes(&self) -> impl Iterator<Item = MathNode> + '_ { pub fn items(&self) -> impl Iterator<Item = MathItem> + '_ {
self.0.children().filter_map(SyntaxNode::cast) self.0.children().filter_map(SyntaxNode::cast)
} }
} }
/// A single piece of a math formula. /// A single piece of a math formula.
#[derive(Debug, Clone, PartialEq, Hash)] #[derive(Debug, Clone, PartialEq, Hash)]
pub enum MathNode { pub enum MathItem {
/// Whitespace. /// Whitespace.
Space, Space,
/// A forced line break. /// A forced line break.
@ -201,15 +201,15 @@ pub enum MathNode {
Script(ScriptNode), Script(ScriptNode),
/// A fraction: `x/2`. /// A fraction: `x/2`.
Frac(FracNode), Frac(FracNode),
/// A math alignment indicator: `&`, `&&`. /// An alignment indicator: `&`, `&&`.
Align(AlignNode), Align(AlignNode),
/// Grouped mathematical material. /// Grouped mathematical material.
Group(Math), Group(MathNode),
/// An expression. /// An expression.
Expr(Expr), Expr(Expr),
} }
impl TypedNode for MathNode { impl TypedNode for MathItem {
fn from_untyped(node: &SyntaxNode) -> Option<Self> { fn from_untyped(node: &SyntaxNode) -> Option<Self> {
match node.kind() { match node.kind() {
NodeKind::Space { .. } => Some(Self::Space), NodeKind::Space { .. } => Some(Self::Space),
@ -219,7 +219,7 @@ impl TypedNode for MathNode {
NodeKind::RightBracket => Some(Self::Atom(']'.into())), NodeKind::RightBracket => Some(Self::Atom(']'.into())),
NodeKind::LeftParen => Some(Self::Atom('('.into())), NodeKind::LeftParen => Some(Self::Atom('('.into())),
NodeKind::RightParen => Some(Self::Atom(')'.into())), NodeKind::RightParen => Some(Self::Atom(')'.into())),
NodeKind::Linebreak => Some(Self::Linebreak), NodeKind::Backslash => Some(Self::Linebreak),
NodeKind::Escape(c) => Some(Self::Atom((*c).into())), NodeKind::Escape(c) => Some(Self::Atom((*c).into())),
NodeKind::Atom(atom) => Some(Self::Atom(atom.clone())), NodeKind::Atom(atom) => Some(Self::Atom(atom.clone())),
NodeKind::Script => node.cast().map(Self::Script), NodeKind::Script => node.cast().map(Self::Script),
@ -242,12 +242,12 @@ node! {
impl ScriptNode { impl ScriptNode {
/// The base of the script. /// The base of the script.
pub fn base(&self) -> MathNode { pub fn base(&self) -> MathItem {
self.0.cast_first_child().expect("subscript is missing base") self.0.cast_first_child().expect("subscript is missing base")
} }
/// The subscript. /// The subscript.
pub fn sub(&self) -> Option<MathNode> { pub fn sub(&self) -> Option<MathItem> {
self.0 self.0
.children() .children()
.skip_while(|node| !matches!(node.kind(), NodeKind::Underscore)) .skip_while(|node| !matches!(node.kind(), NodeKind::Underscore))
@ -256,7 +256,7 @@ impl ScriptNode {
} }
/// The superscript. /// The superscript.
pub fn sup(&self) -> Option<MathNode> { pub fn sup(&self) -> Option<MathItem> {
self.0 self.0
.children() .children()
.skip_while(|node| !matches!(node.kind(), NodeKind::Hat)) .skip_while(|node| !matches!(node.kind(), NodeKind::Hat))
@ -272,12 +272,12 @@ node! {
impl FracNode { impl FracNode {
/// The numerator. /// The numerator.
pub fn num(&self) -> MathNode { pub fn num(&self) -> MathItem {
self.0.cast_first_child().expect("fraction is missing numerator") self.0.cast_first_child().expect("fraction is missing numerator")
} }
/// The denominator. /// The denominator.
pub fn denom(&self) -> MathNode { pub fn denom(&self) -> MathItem {
self.0.cast_last_child().expect("fraction is missing denominator") self.0.cast_last_child().expect("fraction is missing denominator")
} }
} }
@ -301,7 +301,7 @@ node! {
impl HeadingNode { impl HeadingNode {
/// The contents of the heading. /// The contents of the heading.
pub fn body(&self) -> Markup { pub fn body(&self) -> MarkupNode {
self.0.cast_first_child().expect("heading is missing markup body") self.0.cast_first_child().expect("heading is missing markup body")
} }
@ -318,27 +318,22 @@ impl HeadingNode {
node! { node! {
/// An item in an unordered list: `- ...`. /// An item in an unordered list: `- ...`.
ListNode: List ListItem: ListItem
} }
impl ListNode { impl ListItem {
/// The contents of the list item. /// The contents of the list item.
pub fn body(&self) -> Markup { pub fn body(&self) -> MarkupNode {
self.0.cast_first_child().expect("list item is missing body") self.0.cast_first_child().expect("list item is missing body")
} }
} }
node! { node! {
/// An item in an enumeration (ordered list): `1. ...`. /// An item in an enumeration (ordered list): `1. ...`.
EnumNode: Enum EnumItem: EnumItem
}
impl EnumNode {
/// The contents of the list item.
pub fn body(&self) -> Markup {
self.0.cast_first_child().expect("enum item is missing body")
} }
impl EnumItem {
/// The number, if any. /// The number, if any.
pub fn number(&self) -> Option<usize> { pub fn number(&self) -> Option<usize> {
self.0.children().find_map(|node| match node.kind() { self.0.children().find_map(|node| match node.kind() {
@ -346,23 +341,28 @@ impl EnumNode {
_ => None, _ => None,
}) })
} }
/// The contents of the list item.
pub fn body(&self) -> MarkupNode {
self.0.cast_first_child().expect("enum item is missing body")
}
} }
node! { node! {
/// An item in a description list: `/ Term: Details. /// An item in a description list: `/ Term: Details`.
DescNode: Desc DescItem: DescItem
} }
impl DescNode { impl DescItem {
/// The term described by the list item. /// The term described by the item.
pub fn term(&self) -> Markup { pub fn term(&self) -> MarkupNode {
self.0 self.0
.cast_first_child() .cast_first_child()
.expect("description list item is missing term") .expect("description list item is missing term")
} }
/// The description of the term. /// The description of the term.
pub fn body(&self) -> Markup { pub fn body(&self) -> MarkupNode {
self.0 self.0
.cast_last_child() .cast_last_child()
.expect("description list item is missing body") .expect("description list item is missing body")
@ -586,7 +586,7 @@ node! {
impl ContentBlock { impl ContentBlock {
/// The contained markup. /// The contained markup.
pub fn body(&self) -> Markup { pub fn body(&self) -> MarkupNode {
self.0.cast_first_child().expect("content is missing body") self.0.cast_first_child().expect("content is missing body")
} }
} }

View File

@ -1,3 +1,5 @@
//! Syntax highlighting for Typst source code.
use std::fmt::Write; use std::fmt::Write;
use std::ops::Range; use std::ops::Range;
@ -5,85 +7,9 @@ use syntect::highlighting::{Color, FontStyle, Highlighter, Style, Theme};
use syntect::parsing::Scope; use syntect::parsing::Scope;
use super::{NodeKind, SyntaxNode}; use super::{NodeKind, SyntaxNode};
use crate::parse::TokenMode;
/// Provide highlighting categories for the descendants of a node that fall into
/// a range.
pub fn highlight_node<F>(root: &SyntaxNode, range: Range<usize>, mut f: F)
where
F: FnMut(Range<usize>, Category),
{
highlight_node_impl(0, root, range, &mut f)
}
/// Provide highlighting categories for the descendants of a node that fall into
/// a range.
pub fn highlight_node_impl<F>(
mut offset: usize,
node: &SyntaxNode,
range: Range<usize>,
f: &mut F,
) where
F: FnMut(Range<usize>, Category),
{
for (i, child) in node.children().enumerate() {
let span = offset .. offset + child.len();
if range.start <= span.end && range.end >= span.start {
if let Some(category) = Category::determine(child, node, i) {
f(span, category);
}
highlight_node_impl(offset, child, range.clone(), f);
}
offset += child.len();
}
}
/// Highlight source text in a theme by calling `f` with each consecutive piece
/// and its style.
pub fn highlight_themed<F>(text: &str, mode: TokenMode, theme: &Theme, mut f: F)
where
F: FnMut(&str, Style),
{
let root = match mode {
TokenMode::Markup => crate::parse::parse(text),
TokenMode::Math => crate::parse::parse_math(text),
TokenMode::Code => crate::parse::parse_code(text),
};
let highlighter = Highlighter::new(&theme);
highlight_themed_impl(text, 0, &root, vec![], &highlighter, &mut f);
}
/// Recursive implementation for highlighting with a syntect theme.
fn highlight_themed_impl<F>(
text: &str,
mut offset: usize,
node: &SyntaxNode,
scopes: Vec<Scope>,
highlighter: &Highlighter,
f: &mut F,
) where
F: FnMut(&str, Style),
{
if node.children().len() == 0 {
let piece = &text[offset .. offset + node.len()];
let style = highlighter.style_for_stack(&scopes);
f(piece, style);
return;
}
for (i, child) in node.children().enumerate() {
let mut scopes = scopes.clone();
if let Some(category) = Category::determine(child, node, i) {
scopes.push(Scope::new(category.tm_scope()).unwrap())
}
highlight_themed_impl(text, offset, child, scopes, highlighter, f);
offset += child.len();
}
}
/// Highlight source text into a standalone HTML document. /// Highlight source text into a standalone HTML document.
pub fn highlight_html(text: &str, mode: TokenMode, theme: &Theme) -> String { pub fn highlight_html(text: &str, theme: &Theme) -> String {
let mut buf = String::new(); let mut buf = String::new();
buf.push_str("<!DOCTYPE html>\n"); buf.push_str("<!DOCTYPE html>\n");
buf.push_str("<html>\n"); buf.push_str("<html>\n");
@ -91,18 +17,19 @@ pub fn highlight_html(text: &str, mode: TokenMode, theme: &Theme) -> String {
buf.push_str(" <meta charset=\"utf-8\">\n"); buf.push_str(" <meta charset=\"utf-8\">\n");
buf.push_str("</head>\n"); buf.push_str("</head>\n");
buf.push_str("<body>\n"); buf.push_str("<body>\n");
buf.push_str(&highlight_pre(text, mode, theme)); buf.push_str(&highlight_pre(text, theme));
buf.push_str("\n</body>\n"); buf.push_str("\n</body>\n");
buf.push_str("</html>\n"); buf.push_str("</html>\n");
buf buf
} }
/// Highlight source text into an HTML pre element. /// Highlight source text into an HTML pre element.
pub fn highlight_pre(text: &str, mode: TokenMode, theme: &Theme) -> String { pub fn highlight_pre(text: &str, theme: &Theme) -> String {
let mut buf = String::new(); let mut buf = String::new();
buf.push_str("<pre>\n"); buf.push_str("<pre>\n");
highlight_themed(text, mode, theme, |piece, style| { let root = crate::parse::parse(text);
highlight_themed(&root, theme, |range, style| {
let styled = style != Style::default(); let styled = style != Style::default();
if styled { if styled {
buf.push_str("<span style=\""); buf.push_str("<span style=\"");
@ -127,7 +54,7 @@ pub fn highlight_pre(text: &str, mode: TokenMode, theme: &Theme) -> String {
buf.push_str("\">"); buf.push_str("\">");
} }
buf.push_str(piece); buf.push_str(&text[range]);
if styled { if styled {
buf.push_str("</span>"); buf.push_str("</span>");
@ -138,19 +65,82 @@ pub fn highlight_pre(text: &str, mode: TokenMode, theme: &Theme) -> String {
buf buf
} }
/// Highlight a syntax node in a theme by calling `f` with ranges and their
/// styles.
pub fn highlight_themed<F>(root: &SyntaxNode, theme: &Theme, mut f: F)
where
F: FnMut(Range<usize>, Style),
{
fn process<F>(
mut offset: usize,
node: &SyntaxNode,
scopes: Vec<Scope>,
highlighter: &Highlighter,
f: &mut F,
) where
F: FnMut(Range<usize>, Style),
{
if node.children().len() == 0 {
let range = offset .. offset + node.len();
let style = highlighter.style_for_stack(&scopes);
f(range, style);
return;
}
for (i, child) in node.children().enumerate() {
let mut scopes = scopes.clone();
if let Some(category) = Category::determine(child, node, i) {
scopes.push(Scope::new(category.tm_scope()).unwrap())
}
process(offset, child, scopes, highlighter, f);
offset += child.len();
}
}
let highlighter = Highlighter::new(&theme);
process(0, root, vec![], &highlighter, &mut f);
}
/// Highlight a syntax node by calling `f` with ranges overlapping `within` and
/// their categories.
pub fn highlight_categories<F>(root: &SyntaxNode, within: Range<usize>, mut f: F)
where
F: FnMut(Range<usize>, Category),
{
fn process<F>(mut offset: usize, node: &SyntaxNode, range: Range<usize>, f: &mut F)
where
F: FnMut(Range<usize>, Category),
{
for (i, child) in node.children().enumerate() {
let span = offset .. offset + child.len();
if range.start <= span.end && range.end >= span.start {
if let Some(category) = Category::determine(child, node, i) {
f(span, category);
}
process(offset, child, range.clone(), f);
}
offset += child.len();
}
}
process(0, root, within, &mut f)
}
/// The syntax highlighting category of a node. /// The syntax highlighting category of a node.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Category { pub enum Category {
/// A line or block comment. /// A line or block comment.
Comment, Comment,
/// Any kind of bracket, parenthesis or brace. /// A square bracket, parenthesis or brace.
Bracket, Bracket,
/// Punctuation in code. /// Punctuation in code.
Punctuation, Punctuation,
/// An easily typable shortcut to a unicode codepoint.
Shortcut,
/// An escape sequence. /// An escape sequence.
Escape, Escape,
/// An easily typable shortcut to a unicode codepoint.
Shortcut,
/// A smart quote.
Quote,
/// Strong text. /// Strong text.
Strong, Strong,
/// Emphasized text. /// Emphasized text.
@ -159,38 +149,40 @@ pub enum Category {
Link, Link,
/// Raw text or code. /// Raw text or code.
Raw, Raw,
/// A math formula. /// A full math formula.
Math, Math,
/// The delimiters of a math formula.
MathDelimiter,
/// A symbol with special meaning in a math formula.
MathSymbol,
/// A section heading. /// A section heading.
Heading, Heading,
/// A full item of a list, enumeration or description list.
ListItem,
/// A marker of a list, enumeration, or description list. /// A marker of a list, enumeration, or description list.
ListMarker, ListMarker,
/// A term in a description list. /// A term in a description list.
Term, ListTerm,
/// A label. /// A label.
Label, Label,
/// A reference. /// A reference.
Ref, Ref,
/// A keyword. /// A keyword.
Keyword, Keyword,
/// A literal defined by a keyword like `none`, `auto` or a boolean.
KeywordLiteral,
/// An operator symbol. /// An operator symbol.
Operator, Operator,
/// The none literal.
None,
/// The auto literal.
Auto,
/// A boolean literal.
Bool,
/// A numeric literal. /// A numeric literal.
Number, Number,
/// A string literal. /// A string literal.
String, String,
/// A function. /// A function or method name.
Function, Function,
/// An interpolated variable in markup. /// An interpolated variable in markup or math.
Interpolated, Interpolated,
/// An invalid node. /// A syntax error.
Invalid, Error,
} }
impl Category { impl Category {
@ -214,40 +206,38 @@ impl Category {
NodeKind::RightParen => Some(Category::Bracket), NodeKind::RightParen => Some(Category::Bracket),
NodeKind::Comma => Some(Category::Punctuation), NodeKind::Comma => Some(Category::Punctuation),
NodeKind::Semicolon => Some(Category::Punctuation), NodeKind::Semicolon => Some(Category::Punctuation),
NodeKind::Colon => match parent.kind() { NodeKind::Colon => Some(Category::Punctuation),
NodeKind::Desc => Some(Category::Term),
_ => Some(Category::Punctuation),
},
NodeKind::Star => match parent.kind() { NodeKind::Star => match parent.kind() {
NodeKind::Strong => None, NodeKind::Strong => None,
_ => Some(Category::Operator), _ => Some(Category::Operator),
}, },
NodeKind::Underscore => match parent.kind() { NodeKind::Underscore => match parent.kind() {
NodeKind::Script => Some(Category::Shortcut), NodeKind::Script => Some(Category::MathSymbol),
_ => None, _ => None,
}, },
NodeKind::Dollar => Some(Category::Math), NodeKind::Dollar => Some(Category::MathDelimiter),
NodeKind::Backslash => Some(Category::Shortcut),
NodeKind::Tilde => Some(Category::Shortcut), NodeKind::Tilde => Some(Category::Shortcut),
NodeKind::HyphQuest => Some(Category::Shortcut), NodeKind::HyphQuest => Some(Category::Shortcut),
NodeKind::Hyph2 => Some(Category::Shortcut), NodeKind::Hyph2 => Some(Category::Shortcut),
NodeKind::Hyph3 => Some(Category::Shortcut), NodeKind::Hyph3 => Some(Category::Shortcut),
NodeKind::Dot3 => Some(Category::Shortcut), NodeKind::Dot3 => Some(Category::Shortcut),
NodeKind::Quote { .. } => None, NodeKind::Quote { .. } => Some(Category::Quote),
NodeKind::Plus => match parent.kind() { NodeKind::Plus => Some(match parent.kind() {
NodeKind::Enum => Some(Category::ListMarker), NodeKind::EnumItem => Category::ListMarker,
_ => Some(Category::Operator), _ => Category::Operator,
}, }),
NodeKind::Minus => match parent.kind() { NodeKind::Minus => Some(match parent.kind() {
NodeKind::List => Some(Category::ListMarker), NodeKind::ListItem => Category::ListMarker,
_ => Some(Category::Operator), _ => Category::Operator,
}, }),
NodeKind::Slash => match parent.kind() { NodeKind::Slash => Some(match parent.kind() {
NodeKind::Desc => Some(Category::ListMarker), NodeKind::DescItem => Category::ListMarker,
NodeKind::Frac => Some(Category::Shortcut), NodeKind::Frac => Category::MathSymbol,
_ => Some(Category::Operator), _ => Category::Operator,
}, }),
NodeKind::Hat => Some(Category::Shortcut), NodeKind::Hat => Some(Category::MathSymbol),
NodeKind::Amp => Some(Category::Shortcut), NodeKind::Amp => Some(Category::MathSymbol),
NodeKind::Dot => Some(Category::Punctuation), NodeKind::Dot => Some(Category::Punctuation),
NodeKind::Eq => match parent.kind() { NodeKind::Eq => match parent.kind() {
NodeKind::Heading => None, NodeKind::Heading => None,
@ -269,8 +259,8 @@ impl Category {
NodeKind::Not => Some(Category::Keyword), NodeKind::Not => Some(Category::Keyword),
NodeKind::And => Some(Category::Keyword), NodeKind::And => Some(Category::Keyword),
NodeKind::Or => Some(Category::Keyword), NodeKind::Or => Some(Category::Keyword),
NodeKind::None => Some(Category::None), NodeKind::None => Some(Category::KeywordLiteral),
NodeKind::Auto => Some(Category::Auto), NodeKind::Auto => Some(Category::KeywordLiteral),
NodeKind::Let => Some(Category::Keyword), NodeKind::Let => Some(Category::Keyword),
NodeKind::Set => Some(Category::Keyword), NodeKind::Set => Some(Category::Keyword),
NodeKind::Show => Some(Category::Keyword), NodeKind::Show => Some(Category::Keyword),
@ -289,37 +279,35 @@ impl Category {
NodeKind::As => Some(Category::Keyword), NodeKind::As => Some(Category::Keyword),
NodeKind::Markup { .. } => match parent.kind() { NodeKind::Markup { .. } => match parent.kind() {
NodeKind::Desc NodeKind::DescItem
if parent if parent
.children() .children()
.take_while(|child| child.kind() != &NodeKind::Colon) .take_while(|child| child.kind() != &NodeKind::Colon)
.find(|c| matches!(c.kind(), NodeKind::Markup { .. })) .find(|c| matches!(c.kind(), NodeKind::Markup { .. }))
.map_or(false, |ident| std::ptr::eq(ident, child)) => .map_or(false, |ident| std::ptr::eq(ident, child)) =>
{ {
Some(Category::Term) Some(Category::ListTerm)
} }
_ => None, _ => None,
}, },
NodeKind::Linebreak { .. } => Some(Category::Shortcut),
NodeKind::Text(_) => None, NodeKind::Text(_) => None,
NodeKind::Escape(_) => Some(Category::Escape), NodeKind::Escape(_) => Some(Category::Escape),
NodeKind::Strong => Some(Category::Strong), NodeKind::Strong => Some(Category::Strong),
NodeKind::Emph => Some(Category::Emph), NodeKind::Emph => Some(Category::Emph),
NodeKind::Link(_) => Some(Category::Link), NodeKind::Link(_) => Some(Category::Link),
NodeKind::Raw(_) => Some(Category::Raw), NodeKind::Raw(_) => Some(Category::Raw),
NodeKind::Math => None, NodeKind::Math => Some(Category::Math),
NodeKind::Heading => Some(Category::Heading),
NodeKind::List => None,
NodeKind::Enum => None,
NodeKind::EnumNumbering(_) => Some(Category::ListMarker),
NodeKind::Desc => None,
NodeKind::Label(_) => Some(Category::Label),
NodeKind::Ref(_) => Some(Category::Ref),
NodeKind::Atom(_) => None, NodeKind::Atom(_) => None,
NodeKind::Script => None, NodeKind::Script => None,
NodeKind::Frac => None, NodeKind::Frac => None,
NodeKind::Align => None, NodeKind::Align => None,
NodeKind::Heading => Some(Category::Heading),
NodeKind::ListItem => Some(Category::ListItem),
NodeKind::EnumItem => Some(Category::ListItem),
NodeKind::EnumNumbering(_) => Some(Category::ListMarker),
NodeKind::DescItem => Some(Category::ListItem),
NodeKind::Label(_) => Some(Category::Label),
NodeKind::Ref(_) => Some(Category::Ref),
NodeKind::Ident(_) => match parent.kind() { NodeKind::Ident(_) => match parent.kind() {
NodeKind::Markup { .. } => Some(Category::Interpolated), NodeKind::Markup { .. } => Some(Category::Interpolated),
@ -341,7 +329,7 @@ impl Category {
} }
_ => None, _ => None,
}, },
NodeKind::Bool(_) => Some(Category::Bool), NodeKind::Bool(_) => Some(Category::KeywordLiteral),
NodeKind::Int(_) => Some(Category::Number), NodeKind::Int(_) => Some(Category::Number),
NodeKind::Float(_) => Some(Category::Number), NodeKind::Float(_) => Some(Category::Number),
NodeKind::Numeric(_, _) => Some(Category::Number), NodeKind::Numeric(_, _) => Some(Category::Number),
@ -377,39 +365,40 @@ impl Category {
NodeKind::ContinueExpr => None, NodeKind::ContinueExpr => None,
NodeKind::ReturnExpr => None, NodeKind::ReturnExpr => None,
NodeKind::Error(_, _) => Some(Category::Invalid), NodeKind::Error(_, _) => Some(Category::Error),
NodeKind::Unknown(_) => Some(Category::Invalid),
} }
} }
/// Return the TextMate grammar scope for the given highlighting category. /// Return the TextMate grammar scope for the given highlighting category.
pub fn tm_scope(&self) -> &'static str { pub fn tm_scope(&self) -> &'static str {
match self { match self {
Self::Bracket => "punctuation.definition.typst",
Self::Punctuation => "punctuation.typst",
Self::Comment => "comment.typst", Self::Comment => "comment.typst",
Self::Shortcut => "punctuation.shortcut.typst", Self::Bracket => "punctuation.definition.bracket.typst",
Self::Escape => "constant.character.escape.content.typst", Self::Punctuation => "punctuation.typst",
Self::Escape => "constant.character.escape.typst",
Self::Shortcut => "constant.character.shortcut.typst",
Self::Quote => "constant.character.quote.typst",
Self::Strong => "markup.bold.typst", Self::Strong => "markup.bold.typst",
Self::Emph => "markup.italic.typst", Self::Emph => "markup.italic.typst",
Self::Link => "markup.underline.link.typst", Self::Link => "markup.underline.link.typst",
Self::Raw => "markup.raw.typst", Self::Raw => "markup.raw.typst",
Self::Math => "string.other.math.typst", Self::Math => "string.other.math.typst",
Self::MathDelimiter => "punctuation.definition.math.typst",
Self::MathSymbol => "keyword.operator.math.typst",
Self::Heading => "markup.heading.typst", Self::Heading => "markup.heading.typst",
Self::ListMarker => "markup.list.typst", Self::ListItem => "markup.list.typst",
Self::Term => "markup.list.term.typst", Self::ListMarker => "punctuation.definition.list.typst",
Self::ListTerm => "markup.list.term.typst",
Self::Label => "entity.name.label.typst", Self::Label => "entity.name.label.typst",
Self::Ref => "markup.other.reference.typst", Self::Ref => "markup.other.reference.typst",
Self::Keyword => "keyword.typst", Self::Keyword => "keyword.typst",
Self::Operator => "keyword.operator.typst", Self::Operator => "keyword.operator.typst",
Self::None => "constant.language.none.typst", Self::KeywordLiteral => "constant.language.typst",
Self::Auto => "constant.language.auto.typst",
Self::Bool => "constant.language.boolean.typst",
Self::Number => "constant.numeric.typst", Self::Number => "constant.numeric.typst",
Self::String => "string.quoted.double.typst", Self::String => "string.quoted.double.typst",
Self::Function => "entity.name.function.typst", Self::Function => "entity.name.function.typst",
Self::Interpolated => "entity.other.interpolated.typst", Self::Interpolated => "meta.interpolation.typst",
Self::Invalid => "invalid.typst", Self::Error => "invalid.typst",
} }
} }
} }
@ -428,7 +417,7 @@ mod tests {
let mut vec = vec![]; let mut vec = vec![];
let source = Source::detached(text); let source = Source::detached(text);
let full = 0 .. text.len(); let full = 0 .. text.len();
highlight_node(source.root(), full, &mut |range, category| { highlight_categories(source.root(), full, &mut |range, category| {
vec.push((range, category)); vec.push((range, category));
}); });
assert_eq!(vec, goal); assert_eq!(vec, goal);

548
src/syntax/kind.rs Normal file
View File

@ -0,0 +1,548 @@
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use super::ast::{RawNode, Unit};
use super::SpanPos;
use crate::util::EcoString;
/// All syntactical building blocks that can be part of a Typst document.
///
/// Can be emitted as a token by the tokenizer or as part of a syntax node by
/// the parser.
#[derive(Debug, Clone, PartialEq)]
pub enum NodeKind {
/// A line comment, two slashes followed by inner contents, terminated with
/// a newline: `//<str>\n`.
LineComment,
/// A block comment, a slash and a star followed by inner contents,
/// terminated with a star and a slash: `/*<str>*/`.
///
/// The comment can contain nested block comments.
BlockComment,
/// One or more whitespace characters. Single spaces are collapsed into text
/// nodes if they would otherwise be surrounded by text nodes.
///
/// Also stores how many newlines are contained.
Space { newlines: usize },
/// A left curly brace, starting a code block: `{`.
LeftBrace,
/// A right curly brace, terminating a code block: `}`.
RightBrace,
/// A left square bracket, starting a content block: `[`.
LeftBracket,
/// A right square bracket, terminating a content block: `]`.
RightBracket,
/// A left round parenthesis, starting a grouped expression, collection,
/// argument or parameter list: `(`.
LeftParen,
/// A right round parenthesis, terminating a grouped expression, collection,
/// argument or parameter list: `)`.
RightParen,
/// A comma separator in a sequence: `,`.
Comma,
/// A semicolon terminating an expression: `;`.
Semicolon,
/// A colon between name / key and value in a dictionary, argument or
/// parameter list, or between the term and body of a description list
/// term: `:`.
Colon,
/// The strong text toggle, multiplication operator, and wildcard import
/// symbol: `*`.
Star,
/// Toggles emphasized text and indicates a subscript in a formula: `_`.
Underscore,
/// Starts and ends a math formula.
Dollar,
/// A forced line break: `\`.
Backslash,
/// The non-breaking space: `~`.
Tilde,
/// The soft hyphen: `-?`.
HyphQuest,
/// The en-dash: `--`.
Hyph2,
/// The em-dash: `---`.
Hyph3,
/// The ellipsis: `...`.
Dot3,
/// A smart quote: `'` or `"`.
Quote { double: bool },
/// The unary plus, binary addition operator, and start of enum items: `+`.
Plus,
/// The unary negation, binary subtraction operator, and start of list
/// items: `-`.
Minus,
/// The division operator, start of description list items, and fraction
/// operator in a formula: `/`.
Slash,
/// The superscript operator in a formula: `^`.
Hat,
/// The alignment operator in a formula: `&`.
Amp,
/// The field access and method call operator: `.`.
Dot,
/// The assignment operator: `=`.
Eq,
/// The equality operator: `==`.
EqEq,
/// The inequality operator: `!=`.
ExclEq,
/// The less-than operator: `<`.
Lt,
/// The less-than or equal operator: `<=`.
LtEq,
/// The greater-than operator: `>`.
Gt,
/// The greater-than or equal operator: `>=`.
GtEq,
/// The add-assign operator: `+=`.
PlusEq,
/// The subtract-assign operator: `-=`.
HyphEq,
/// The multiply-assign operator: `*=`.
StarEq,
/// The divide-assign operator: `/=`.
SlashEq,
/// The spread operator: `..`.
Dots,
/// An arrow between a closure's parameters and body: `=>`.
Arrow,
/// The `not` operator.
Not,
/// The `and` operator.
And,
/// The `or` operator.
Or,
/// The `none` literal.
None,
/// The `auto` literal.
Auto,
/// The `let` keyword.
Let,
/// The `set` keyword.
Set,
/// The `show` keyword.
Show,
/// The `wrap` keyword.
Wrap,
/// The `if` keyword.
If,
/// The `else` keyword.
Else,
/// The `for` keyword.
For,
/// The `in` keyword.
In,
/// The `while` keyword.
While,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `return` keyword.
Return,
/// The `import` keyword.
Import,
/// The `include` keyword.
Include,
/// The `from` keyword.
From,
/// The `as` keyword.
As,
/// Markup of which all lines must have a minimal indentation.
///
/// Notably, the number does not determine in which column the markup
/// started, but to the right of which column all markup elements must be,
/// so it is zero except inside indent-aware constructs like lists.
Markup { min_indent: usize },
/// Consecutive text without markup.
Text(EcoString),
/// A unicode escape sequence, written as a slash and the letter "u"
/// followed by a hexadecimal unicode entity enclosed in curly braces:
/// `\u{1F5FA}`.
Escape(char),
/// Strong content: `*Strong*`.
Strong,
/// Emphasized content: `_Emphasized_`.
Emph,
/// A hyperlink: `https://typst.org`.
Link(EcoString),
/// A raw block with optional syntax highlighting: `` `...` ``.
Raw(Arc<RawNode>),
/// A math formula: `$x$`, `$ x^2 $`.
Math,
/// An atom in a math formula: `x`, `+`, `12`.
Atom(EcoString),
/// A base with optional sub- and superscript in a math formula: `a_1^2`.
Script,
/// A fraction in a math formula: `x/2`.
Frac,
/// An alignment indicator in a math formula: `&`, `&&`.
Align,
/// A section heading: `= Introduction`.
Heading,
/// An item in an unordered list: `- ...`.
ListItem,
/// An item in an enumeration (ordered list): `+ ...` or `1. ...`.
EnumItem,
/// An explicit enumeration numbering: `23.`.
EnumNumbering(usize),
/// An item in a description list: `/ Term: Details.
DescItem,
/// A label: `<label>`.
Label(EcoString),
/// A reference: `@label`.
Ref(EcoString),
/// An identifier: `center`.
Ident(EcoString),
/// A boolean: `true`, `false`.
Bool(bool),
/// An integer: `120`.
Int(i64),
/// A floating-point number: `1.2`, `10e-4`.
Float(f64),
/// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
Numeric(f64, Unit),
/// A quoted string: `"..."`.
Str(EcoString),
/// A code block: `{ let x = 1; x + 2 }`.
CodeBlock,
/// A content block: `[*Hi* there!]`.
ContentBlock,
/// A grouped expression: `(1 + 2)`.
GroupExpr,
/// An array expression: `(1, "hi", 12cm)`.
ArrayExpr,
/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`.
DictExpr,
/// A named pair: `thickness: 3pt`.
Named,
/// A keyed pair: `"spacy key": true`.
Keyed,
/// A unary operation: `-x`.
UnaryExpr,
/// A binary operation: `a + b`.
BinaryExpr,
/// A field access: `properties.age`.
FieldAccess,
/// An invocation of a function: `f(x, y)`.
FuncCall,
/// An invocation of a method: `array.push(v)`.
MethodCall,
/// A function call's argument list: `(x, y)`.
CallArgs,
/// Spreaded arguments or a argument sink: `..x`.
Spread,
/// A closure expression: `(x, y) => z`.
ClosureExpr,
/// A closure's parameters: `(x, y)`.
ClosureParams,
/// A let expression: `let x = 1`.
LetExpr,
/// A set expression: `set text(...)`.
SetExpr,
/// A show expression: `show node: heading as [*{nody.body}*]`.
ShowExpr,
/// A wrap expression: `wrap body in columns(2, body)`.
WrapExpr,
/// An if-else expression: `if x { y } else { z }`.
IfExpr,
/// A while loop expression: `while x { ... }`.
WhileExpr,
/// A for loop expression: `for x in y { ... }`.
ForExpr,
/// A for loop's destructuring pattern: `x` or `x, y`.
ForPattern,
/// An import expression: `import a, b, c from "utils.typ"`.
ImportExpr,
/// Items to import: `a, b, c`.
ImportItems,
/// An include expression: `include "chapter1.typ"`.
IncludeExpr,
/// A break expression: `break`.
BreakExpr,
/// A continue expression: `continue`.
ContinueExpr,
/// A return expression: `return x + 1`.
ReturnExpr,
/// An invalid sequence of characters.
Error(SpanPos, EcoString),
}
impl NodeKind {
/// Whether this is a kind of parenthesis.
pub fn is_paren(&self) -> bool {
matches!(self, Self::LeftParen | Self::RightParen)
}
/// Whether this is a space.
pub fn is_space(&self) -> bool {
matches!(self, Self::Space { .. })
}
/// Whether this is trivia.
pub fn is_trivia(&self) -> bool {
self.is_space() || matches!(self, Self::LineComment | Self::BlockComment)
}
/// Whether this is a kind of error.
pub fn is_error(&self) -> bool {
matches!(self, NodeKind::Error(_, _))
}
/// A human-readable name for the kind.
pub fn name(&self) -> &'static str {
match self {
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
Self::Space { .. } => "space",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
Self::Comma => "comma",
Self::Semicolon => "semicolon",
Self::Colon => "colon",
Self::Star => "star",
Self::Underscore => "underscore",
Self::Dollar => "dollar sign",
Self::Backslash => "linebreak",
Self::Tilde => "non-breaking space",
Self::HyphQuest => "soft hyphen",
Self::Hyph2 => "en dash",
Self::Hyph3 => "em dash",
Self::Dot3 => "ellipsis",
Self::Quote { double: false } => "single quote",
Self::Quote { double: true } => "double quote",
Self::Plus => "plus",
Self::Minus => "minus",
Self::Slash => "slash",
Self::Hat => "hat",
Self::Amp => "ampersand",
Self::Dot => "dot",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
Self::ExclEq => "inequality operator",
Self::Lt => "less-than operator",
Self::LtEq => "less-than or equal operator",
Self::Gt => "greater-than operator",
Self::GtEq => "greater-than or equal operator",
Self::PlusEq => "add-assign operator",
Self::HyphEq => "subtract-assign operator",
Self::StarEq => "multiply-assign operator",
Self::SlashEq => "divide-assign operator",
Self::Dots => "dots",
Self::Arrow => "arrow",
Self::Not => "operator `not`",
Self::And => "operator `and`",
Self::Or => "operator `or`",
Self::None => "`none`",
Self::Auto => "`auto`",
Self::Let => "keyword `let`",
Self::Set => "keyword `set`",
Self::Show => "keyword `show`",
Self::Wrap => "keyword `wrap`",
Self::If => "keyword `if`",
Self::Else => "keyword `else`",
Self::For => "keyword `for`",
Self::In => "keyword `in`",
Self::While => "keyword `while`",
Self::Break => "keyword `break`",
Self::Continue => "keyword `continue`",
Self::Return => "keyword `return`",
Self::Import => "keyword `import`",
Self::Include => "keyword `include`",
Self::From => "keyword `from`",
Self::As => "keyword `as`",
Self::Markup { .. } => "markup",
Self::Text(_) => "text",
Self::Escape(_) => "escape sequence",
Self::Strong => "strong content",
Self::Emph => "emphasized content",
Self::Link(_) => "link",
Self::Raw(_) => "raw block",
Self::Math => "math formula",
Self::Atom(_) => "math atom",
Self::Script => "script",
Self::Frac => "fraction",
Self::Align => "alignment indicator",
Self::Heading => "heading",
Self::ListItem => "list item",
Self::EnumItem => "enumeration item",
Self::EnumNumbering(_) => "enumeration item numbering",
Self::DescItem => "description list item",
Self::Label(_) => "label",
Self::Ref(_) => "reference",
Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean",
Self::Int(_) => "integer",
Self::Float(_) => "float",
Self::Numeric(_, _) => "numeric value",
Self::Str(_) => "string",
Self::CodeBlock => "code block",
Self::ContentBlock => "content block",
Self::GroupExpr => "group",
Self::ArrayExpr => "array",
Self::DictExpr => "dictionary",
Self::Named => "named pair",
Self::Keyed => "keyed pair",
Self::UnaryExpr => "unary expression",
Self::BinaryExpr => "binary expression",
Self::FieldAccess => "field access",
Self::FuncCall => "function call",
Self::MethodCall => "method call",
Self::CallArgs => "call arguments",
Self::Spread => "spread",
Self::ClosureExpr => "closure",
Self::ClosureParams => "closure parameters",
Self::LetExpr => "`let` expression",
Self::SetExpr => "`set` expression",
Self::ShowExpr => "`show` expression",
Self::WrapExpr => "`wrap` expression",
Self::IfExpr => "`if` expression",
Self::WhileExpr => "while-loop expression",
Self::ForExpr => "for-loop expression",
Self::ForPattern => "for-loop destructuring pattern",
Self::ImportExpr => "`import` expression",
Self::ImportItems => "import items",
Self::IncludeExpr => "`include` expression",
Self::BreakExpr => "`break` expression",
Self::ContinueExpr => "`continue` expression",
Self::ReturnExpr => "`return` expression",
Self::Error(_, _) => "syntax error",
}
}
}
impl Hash for NodeKind {
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state);
match self {
Self::LineComment => {}
Self::BlockComment => {}
Self::Space { newlines } => newlines.hash(state),
Self::LeftBrace => {}
Self::RightBrace => {}
Self::LeftBracket => {}
Self::RightBracket => {}
Self::LeftParen => {}
Self::RightParen => {}
Self::Comma => {}
Self::Semicolon => {}
Self::Colon => {}
Self::Star => {}
Self::Underscore => {}
Self::Dollar => {}
Self::Backslash => {}
Self::Tilde => {}
Self::HyphQuest => {}
Self::Hyph2 => {}
Self::Hyph3 => {}
Self::Dot3 => {}
Self::Quote { double } => double.hash(state),
Self::Plus => {}
Self::Minus => {}
Self::Slash => {}
Self::Hat => {}
Self::Amp => {}
Self::Dot => {}
Self::Eq => {}
Self::EqEq => {}
Self::ExclEq => {}
Self::Lt => {}
Self::LtEq => {}
Self::Gt => {}
Self::GtEq => {}
Self::PlusEq => {}
Self::HyphEq => {}
Self::StarEq => {}
Self::SlashEq => {}
Self::Dots => {}
Self::Arrow => {}
Self::Not => {}
Self::And => {}
Self::Or => {}
Self::None => {}
Self::Auto => {}
Self::Let => {}
Self::Set => {}
Self::Show => {}
Self::Wrap => {}
Self::If => {}
Self::Else => {}
Self::For => {}
Self::In => {}
Self::While => {}
Self::Break => {}
Self::Continue => {}
Self::Return => {}
Self::Import => {}
Self::Include => {}
Self::From => {}
Self::As => {}
Self::Markup { min_indent } => min_indent.hash(state),
Self::Text(s) => s.hash(state),
Self::Escape(c) => c.hash(state),
Self::Strong => {}
Self::Emph => {}
Self::Link(link) => link.hash(state),
Self::Raw(raw) => raw.hash(state),
Self::Math => {}
Self::Atom(c) => c.hash(state),
Self::Script => {}
Self::Frac => {}
Self::Align => {}
Self::Heading => {}
Self::ListItem => {}
Self::EnumItem => {}
Self::EnumNumbering(num) => num.hash(state),
Self::DescItem => {}
Self::Label(c) => c.hash(state),
Self::Ref(c) => c.hash(state),
Self::Ident(v) => v.hash(state),
Self::Bool(v) => v.hash(state),
Self::Int(v) => v.hash(state),
Self::Float(v) => v.to_bits().hash(state),
Self::Numeric(v, u) => (v.to_bits(), u).hash(state),
Self::Str(v) => v.hash(state),
Self::CodeBlock => {}
Self::ContentBlock => {}
Self::GroupExpr => {}
Self::ArrayExpr => {}
Self::DictExpr => {}
Self::Named => {}
Self::Keyed => {}
Self::UnaryExpr => {}
Self::BinaryExpr => {}
Self::FieldAccess => {}
Self::FuncCall => {}
Self::MethodCall => {}
Self::CallArgs => {}
Self::Spread => {}
Self::ClosureExpr => {}
Self::ClosureParams => {}
Self::LetExpr => {}
Self::SetExpr => {}
Self::ShowExpr => {}
Self::WrapExpr => {}
Self::IfExpr => {}
Self::WhileExpr => {}
Self::ForExpr => {}
Self::ForPattern => {}
Self::ImportExpr => {}
Self::ImportItems => {}
Self::IncludeExpr => {}
Self::BreakExpr => {}
Self::ContinueExpr => {}
Self::ReturnExpr => {}
Self::Error(pos, msg) => (pos, msg).hash(state),
}
}
}

View File

@ -1,21 +1,20 @@
//! Syntax types. //! Syntax types.
pub mod ast; pub mod ast;
mod highlight; pub mod highlight;
mod kind;
mod span; mod span;
use std::fmt::{self, Debug, Display, Formatter}; use std::fmt::{self, Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Range; use std::ops::Range;
use std::sync::Arc; use std::sync::Arc;
pub use highlight::*; pub use kind::*;
pub use span::*; pub use span::*;
use self::ast::{RawNode, TypedNode, Unit}; use self::ast::TypedNode;
use crate::diag::SourceError; use crate::diag::SourceError;
use crate::source::SourceId; use crate::source::SourceId;
use crate::util::EcoString;
/// An inner or leaf node in the untyped syntax tree. /// An inner or leaf node in the untyped syntax tree.
#[derive(Clone, PartialEq, Hash)] #[derive(Clone, PartialEq, Hash)]
@ -73,8 +72,8 @@ impl SyntaxNode {
} }
match self.kind() { match self.kind() {
&NodeKind::Error(pos, ref message) => { NodeKind::Error(pos, message) => {
vec![SourceError::new(self.span().with_pos(pos), message)] vec![SourceError::new(self.span().with_pos(*pos), message)]
} }
_ => self _ => self
.children() .children()
@ -564,602 +563,3 @@ impl PartialEq for NodeData {
self.kind == other.kind && self.len == other.len self.kind == other.kind && self.len == other.len
} }
} }
/// All syntactical building blocks that can be part of a Typst document.
///
/// Can be emitted as a token by the tokenizer or as part of a syntax node by
/// the parser.
#[derive(Debug, Clone, PartialEq)]
pub enum NodeKind {
/// A line comment, two slashes followed by inner contents, terminated with
/// a newline: `//<str>\n`.
LineComment,
/// A block comment, a slash and a star followed by inner contents,
/// terminated with a star and a slash: `/*<str>*/`.
///
/// The comment can contain nested block comments.
BlockComment,
/// One or more whitespace characters. Single spaces are collapsed into text
/// nodes if they would otherwise be surrounded by text nodes.
///
/// Also stores how many newlines are contained.
Space { newlines: usize },
/// A left curly brace, starting a code block: `{`.
LeftBrace,
/// A right curly brace, terminating a code block: `}`.
RightBrace,
/// A left square bracket, starting a content block: `[`.
LeftBracket,
/// A right square bracket, terminating a content block: `]`.
RightBracket,
/// A left round parenthesis, starting a grouped expression, collection,
/// argument or parameter list: `(`.
LeftParen,
/// A right round parenthesis, terminating a grouped expression, collection,
/// argument or parameter list: `)`.
RightParen,
/// A comma separator in a sequence: `,`.
Comma,
/// A semicolon terminating an expression: `;`.
Semicolon,
/// A colon between name / key and value in a dictionary, argument or
/// parameter list, or between the term and body of a description list
/// term: `:`.
Colon,
/// The strong text toggle, multiplication operator, and wildcard import
/// symbol: `*`.
Star,
/// Toggles emphasized text and indicates a subscript in a formula: `_`.
Underscore,
/// Starts and ends a math formula.
Dollar,
/// The non-breaking space: `~`.
Tilde,
/// The soft hyphen: `-?`.
HyphQuest,
/// The en-dash: `--`.
Hyph2,
/// The em-dash: `---`.
Hyph3,
/// The ellipsis: `...`.
Dot3,
/// A smart quote: `'` or `"`.
Quote { double: bool },
/// The unary plus and addition operator, and start of enum items: `+`.
Plus,
/// The unary negation and subtraction operator, and start of list
/// items: `-`.
Minus,
/// The division operator, start of description list items, and fraction
/// operator in a formula: `/`.
Slash,
/// The superscript operator: `^`.
Hat,
/// The math alignment operator: `&`.
Amp,
/// The field access and method call operator: `.`.
Dot,
/// The assignment operator: `=`.
Eq,
/// The equality operator: `==`.
EqEq,
/// The inequality operator: `!=`.
ExclEq,
/// The less-than operator: `<`.
Lt,
/// The less-than or equal operator: `<=`.
LtEq,
/// The greater-than operator: `>`.
Gt,
/// The greater-than or equal operator: `>=`.
GtEq,
/// The add-assign operator: `+=`.
PlusEq,
/// The subtract-assign operator: `-=`.
HyphEq,
/// The multiply-assign operator: `*=`.
StarEq,
/// The divide-assign operator: `/=`.
SlashEq,
/// The spread operator: `..`.
Dots,
/// An arrow between a closure's parameters and body: `=>`.
Arrow,
/// The `not` operator.
Not,
/// The `and` operator.
And,
/// The `or` operator.
Or,
/// The `none` literal.
None,
/// The `auto` literal.
Auto,
/// The `let` keyword.
Let,
/// The `set` keyword.
Set,
/// The `show` keyword.
Show,
/// The `wrap` keyword.
Wrap,
/// The `if` keyword.
If,
/// The `else` keyword.
Else,
/// The `for` keyword.
For,
/// The `in` keyword.
In,
/// The `while` keyword.
While,
/// The `break` keyword.
Break,
/// The `continue` keyword.
Continue,
/// The `return` keyword.
Return,
/// The `import` keyword.
Import,
/// The `include` keyword.
Include,
/// The `from` keyword.
From,
/// The `as` keyword.
As,
/// Markup of which all lines must have a minimal indentation.
///
/// Notably, the number does not determine in which column the markup
/// started, but to the right of which column all markup elements must be,
/// so it is zero except for headings and lists.
Markup { min_indent: usize },
/// A forced line break in markup or math.
Linebreak,
/// Consecutive text without markup. While basic text with just single
/// spaces is collapsed into a single node, certain symbols that could
/// possibly be markup force text into multiple nodes.
Text(EcoString),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
Escape(char),
/// Strong content: `*Strong*`.
Strong,
/// Emphasized content: `_Emphasized_`.
Emph,
/// A hyperlink.
Link(EcoString),
/// A raw block with optional syntax highlighting: `` `...` ``.
Raw(Arc<RawNode>),
/// A math formula: `$x$`, `$ x^2 $`.
Math,
/// A section heading: `= Introduction`.
Heading,
/// An item in an unordered list: `- ...`.
List,
/// An item in an enumeration (ordered list): `+ ...` or `1. ...`.
Enum,
/// An explicit enumeration numbering: `23.`.
EnumNumbering(usize),
/// An item in a description list: `/ Term: Details.
Desc,
/// A label: `<label>`.
Label(EcoString),
/// A reference: `@label`.
Ref(EcoString),
/// An atom in a math formula: `x`, `+`, `12`.
Atom(EcoString),
/// A base with an optional sub- and superscript in a formula: `a_1^2`.
Script,
/// A fraction: `x/2`.
Frac,
/// A math alignment indicator: `&`, `&&`.
Align,
/// An identifier: `center`.
Ident(EcoString),
/// A boolean: `true`, `false`.
Bool(bool),
/// An integer: `120`.
Int(i64),
/// A floating-point number: `1.2`, `10e-4`.
Float(f64),
/// A numeric value with a unit: `12pt`, `3cm`, `2em`, `90deg`, `50%`.
Numeric(f64, Unit),
/// A quoted string: `"..."`.
Str(EcoString),
/// A code block: `{ let x = 1; x + 2 }`.
CodeBlock,
/// A content block: `[*Hi* there!]`.
ContentBlock,
/// A grouped expression: `(1 + 2)`.
GroupExpr,
/// An array expression: `(1, "hi", 12cm)`.
ArrayExpr,
/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`.
DictExpr,
/// A named pair: `thickness: 3pt`.
Named,
/// A keyed pair: `"spacy key": true`.
Keyed,
/// A unary operation: `-x`.
UnaryExpr,
/// A binary operation: `a + b`.
BinaryExpr,
/// A field access: `properties.age`.
FieldAccess,
/// An invocation of a function: `f(x, y)`.
FuncCall,
/// An invocation of a method: `array.push(v)`.
MethodCall,
/// A function call's argument list: `(x, y)`.
CallArgs,
/// Spreaded arguments or a argument sink: `..x`.
Spread,
/// A closure expression: `(x, y) => z`.
ClosureExpr,
/// A closure's parameters: `(x, y)`.
ClosureParams,
/// A let expression: `let x = 1`.
LetExpr,
/// A set expression: `set text(...)`.
SetExpr,
/// A show expression: `show node: heading as [*{nody.body}*]`.
ShowExpr,
/// A wrap expression: `wrap body in columns(2, body)`.
WrapExpr,
/// An if-else expression: `if x { y } else { z }`.
IfExpr,
/// A while loop expression: `while x { ... }`.
WhileExpr,
/// A for loop expression: `for x in y { ... }`.
ForExpr,
/// A for loop's destructuring pattern: `x` or `x, y`.
ForPattern,
/// An import expression: `import a, b, c from "utils.typ"`.
ImportExpr,
/// Items to import: `a, b, c`.
ImportItems,
/// An include expression: `include "chapter1.typ"`.
IncludeExpr,
/// A break expression: `break`.
BreakExpr,
/// A continue expression: `continue`.
ContinueExpr,
/// A return expression: `return x + 1`.
ReturnExpr,
/// Tokens that appear in the wrong place.
Error(SpanPos, EcoString),
/// Unknown character sequences.
Unknown(EcoString),
}
impl NodeKind {
/// Whether this is a kind of parenthesis.
pub fn is_paren(&self) -> bool {
matches!(self, Self::LeftParen | Self::RightParen)
}
/// Whether this is a space.
pub fn is_space(&self) -> bool {
matches!(self, Self::Space { .. })
}
/// Whether this is trivia.
pub fn is_trivia(&self) -> bool {
self.is_space() || matches!(self, Self::LineComment | Self::BlockComment)
}
/// Whether this is a kind of error.
pub fn is_error(&self) -> bool {
matches!(self, NodeKind::Error(_, _) | NodeKind::Unknown(_))
}
/// Whether `at_start` would still be true after this node given the
/// previous value of the property.
pub fn is_at_start(&self, prev: bool) -> bool {
match self {
Self::Space { newlines: (1 ..) } => true,
Self::Space { .. } | Self::LineComment | Self::BlockComment => prev,
_ => false,
}
}
/// Whether changes _inside_ this node are safely encapsulated, so that only
/// this node must be reparsed.
pub fn is_bounded(&self) -> bool {
match self {
Self::CodeBlock
| Self::ContentBlock
| Self::Linebreak { .. }
| Self::Tilde
| Self::HyphQuest
| Self::Hyph2
| Self::Hyph3
| Self::Dot3
| Self::Quote { .. }
| Self::BlockComment
| Self::Space { .. }
| Self::Escape(_) => true,
_ => false,
}
}
/// A human-readable name for the kind.
pub fn as_str(&self) -> &'static str {
match self {
Self::LineComment => "line comment",
Self::BlockComment => "block comment",
Self::Space { .. } => "space",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
Self::Comma => "comma",
Self::Semicolon => "semicolon",
Self::Colon => "colon",
Self::Star => "star",
Self::Underscore => "underscore",
Self::Dollar => "dollar sign",
Self::Tilde => "non-breaking space",
Self::HyphQuest => "soft hyphen",
Self::Hyph2 => "en dash",
Self::Hyph3 => "em dash",
Self::Dot3 => "ellipsis",
Self::Quote { double: false } => "single quote",
Self::Quote { double: true } => "double quote",
Self::Plus => "plus",
Self::Minus => "minus",
Self::Slash => "slash",
Self::Hat => "hat",
Self::Amp => "ampersand",
Self::Dot => "dot",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
Self::ExclEq => "inequality operator",
Self::Lt => "less-than operator",
Self::LtEq => "less-than or equal operator",
Self::Gt => "greater-than operator",
Self::GtEq => "greater-than or equal operator",
Self::PlusEq => "add-assign operator",
Self::HyphEq => "subtract-assign operator",
Self::StarEq => "multiply-assign operator",
Self::SlashEq => "divide-assign operator",
Self::Dots => "dots",
Self::Arrow => "arrow",
Self::Not => "operator `not`",
Self::And => "operator `and`",
Self::Or => "operator `or`",
Self::None => "`none`",
Self::Auto => "`auto`",
Self::Let => "keyword `let`",
Self::Set => "keyword `set`",
Self::Show => "keyword `show`",
Self::Wrap => "keyword `wrap`",
Self::If => "keyword `if`",
Self::Else => "keyword `else`",
Self::For => "keyword `for`",
Self::In => "keyword `in`",
Self::While => "keyword `while`",
Self::Break => "keyword `break`",
Self::Continue => "keyword `continue`",
Self::Return => "keyword `return`",
Self::Import => "keyword `import`",
Self::Include => "keyword `include`",
Self::From => "keyword `from`",
Self::As => "keyword `as`",
Self::Markup { .. } => "markup",
Self::Linebreak => "linebreak",
Self::Text(_) => "text",
Self::Escape(_) => "escape sequence",
Self::Strong => "strong content",
Self::Emph => "emphasized content",
Self::Link(_) => "link",
Self::Raw(_) => "raw block",
Self::Math => "math formula",
Self::Heading => "heading",
Self::List => "list item",
Self::Enum => "enumeration item",
Self::EnumNumbering(_) => "enumeration item numbering",
Self::Desc => "description list item",
Self::Label(_) => "label",
Self::Ref(_) => "reference",
Self::Atom(_) => "math atom",
Self::Script => "script",
Self::Frac => "fraction",
Self::Align => "alignment indicator",
Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean",
Self::Int(_) => "integer",
Self::Float(_) => "float",
Self::Numeric(_, _) => "numeric value",
Self::Str(_) => "string",
Self::CodeBlock => "code block",
Self::ContentBlock => "content block",
Self::GroupExpr => "group",
Self::ArrayExpr => "array",
Self::DictExpr => "dictionary",
Self::Named => "named pair",
Self::Keyed => "keyed pair",
Self::UnaryExpr => "unary expression",
Self::BinaryExpr => "binary expression",
Self::FieldAccess => "field access",
Self::FuncCall => "function call",
Self::MethodCall => "method call",
Self::CallArgs => "call arguments",
Self::Spread => "spread",
Self::ClosureExpr => "closure",
Self::ClosureParams => "closure parameters",
Self::LetExpr => "`let` expression",
Self::SetExpr => "`set` expression",
Self::ShowExpr => "`show` expression",
Self::WrapExpr => "`wrap` expression",
Self::IfExpr => "`if` expression",
Self::WhileExpr => "while-loop expression",
Self::ForExpr => "for-loop expression",
Self::ForPattern => "for-loop destructuring pattern",
Self::ImportExpr => "`import` expression",
Self::ImportItems => "import items",
Self::IncludeExpr => "`include` expression",
Self::BreakExpr => "`break` expression",
Self::ContinueExpr => "`continue` expression",
Self::ReturnExpr => "`return` expression",
Self::Error(_, _) => "parse error",
Self::Unknown(text) => match text.as_str() {
"*/" => "end of block comment",
_ => "invalid token",
},
}
}
}
impl Display for NodeKind {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.pad(self.as_str())
}
}
impl Hash for NodeKind {
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state);
match self {
Self::LineComment => {}
Self::BlockComment => {}
Self::Space { newlines } => newlines.hash(state),
Self::LeftBrace => {}
Self::RightBrace => {}
Self::LeftBracket => {}
Self::RightBracket => {}
Self::LeftParen => {}
Self::RightParen => {}
Self::Comma => {}
Self::Semicolon => {}
Self::Colon => {}
Self::Star => {}
Self::Underscore => {}
Self::Dollar => {}
Self::Tilde => {}
Self::HyphQuest => {}
Self::Hyph2 => {}
Self::Hyph3 => {}
Self::Dot3 => {}
Self::Quote { double } => double.hash(state),
Self::Plus => {}
Self::Minus => {}
Self::Slash => {}
Self::Hat => {}
Self::Amp => {}
Self::Dot => {}
Self::Eq => {}
Self::EqEq => {}
Self::ExclEq => {}
Self::Lt => {}
Self::LtEq => {}
Self::Gt => {}
Self::GtEq => {}
Self::PlusEq => {}
Self::HyphEq => {}
Self::StarEq => {}
Self::SlashEq => {}
Self::Dots => {}
Self::Arrow => {}
Self::Not => {}
Self::And => {}
Self::Or => {}
Self::None => {}
Self::Auto => {}
Self::Let => {}
Self::Set => {}
Self::Show => {}
Self::Wrap => {}
Self::If => {}
Self::Else => {}
Self::For => {}
Self::In => {}
Self::While => {}
Self::Break => {}
Self::Continue => {}
Self::Return => {}
Self::Import => {}
Self::Include => {}
Self::From => {}
Self::As => {}
Self::Markup { min_indent } => min_indent.hash(state),
Self::Linebreak => {}
Self::Text(s) => s.hash(state),
Self::Escape(c) => c.hash(state),
Self::Strong => {}
Self::Emph => {}
Self::Link(link) => link.hash(state),
Self::Raw(raw) => raw.hash(state),
Self::Math => {}
Self::Heading => {}
Self::List => {}
Self::Enum => {}
Self::EnumNumbering(num) => num.hash(state),
Self::Desc => {}
Self::Label(c) => c.hash(state),
Self::Ref(c) => c.hash(state),
Self::Atom(c) => c.hash(state),
Self::Script => {}
Self::Frac => {}
Self::Align => {}
Self::Ident(v) => v.hash(state),
Self::Bool(v) => v.hash(state),
Self::Int(v) => v.hash(state),
Self::Float(v) => v.to_bits().hash(state),
Self::Numeric(v, u) => (v.to_bits(), u).hash(state),
Self::Str(v) => v.hash(state),
Self::CodeBlock => {}
Self::ContentBlock => {}
Self::GroupExpr => {}
Self::ArrayExpr => {}
Self::DictExpr => {}
Self::Named => {}
Self::Keyed => {}
Self::UnaryExpr => {}
Self::BinaryExpr => {}
Self::FieldAccess => {}
Self::FuncCall => {}
Self::MethodCall => {}
Self::CallArgs => {}
Self::Spread => {}
Self::ClosureExpr => {}
Self::ClosureParams => {}
Self::LetExpr => {}
Self::SetExpr => {}
Self::ShowExpr => {}
Self::WrapExpr => {}
Self::IfExpr => {}
Self::WhileExpr => {}
Self::ForExpr => {}
Self::ForPattern => {}
Self::ImportExpr => {}
Self::ImportItems => {}
Self::IncludeExpr => {}
Self::BreakExpr => {}
Self::ContinueExpr => {}
Self::ReturnExpr => {}
Self::Error(pos, msg) => (pos, msg).hash(state),
Self::Unknown(text) => text.hash(state),
}
}
}

View File

@ -76,10 +76,10 @@
{)} {)}
// Error: 4 expected comma // Error: 4 expected comma
// Error: 4-6 expected expression, found end of block comment // Error: 4-6 unexpected end of block comment
{(1*/2)} {(1*/2)}
// Error: 6-8 expected expression, found invalid token // Error: 6-8 invalid number suffix
{(1, 1u 2)} {(1, 1u 2)}
// Error: 3-4 expected expression, found comma // Error: 3-4 expected expression, found comma

View File

@ -112,7 +112,7 @@
--- ---
// Multiple unseparated expressions in one line. // Multiple unseparated expressions in one line.
// Error: 2-4 expected expression, found invalid token // Error: 2-4 invalid number suffix
{1u} {1u}
// Should output `1`. // Should output `1`.

View File

@ -73,7 +73,7 @@
// Error: 7-8 expected expression, found colon // Error: 7-8 expected expression, found colon
#func(:) #func(:)
// Error: 10-12 expected expression, found end of block comment // Error: 10-12 unexpected end of block comment
#func(a:1*/) #func(a:1*/)
// Error: 8 expected comma // Error: 8 expected comma

View File

@ -5,7 +5,7 @@
https://example.com/ https://example.com/
// Link with body. // Link with body.
#link("https://typst.app/")[Some text text text] #link("https://typst.org/")[Some text text text]
// With line break. // With line break.
This link appears #link("https://google.com/")[in the middle of] a paragraph. This link appears #link("https://google.com/")[in the middle of] a paragraph.
@ -31,7 +31,7 @@ You could also make the
// Transformed link. // Transformed link.
#set page(height: 60pt) #set page(height: 60pt)
#set link(underline: false) #set link(underline: false)
#let mylink = link("https://typst.app/")[LINK] #let mylink = link("https://typst.org/")[LINK]
My cool #move(dx: 0.7cm, dy: 0.7cm, rotate(10deg, scale(200%, mylink))) My cool #move(dx: 0.7cm, dy: 0.7cm, rotate(10deg, scale(200%, mylink)))
--- ---